From 7d800ab10c2010743f70dbab5631995a99bb9b82 Mon Sep 17 00:00:00 2001 From: Markus Ehrnsperger <> Date: Fri, 24 Nov 2023 17:29:08 +0100 Subject: [PATCH] performance, 3.3.2 --- epgsearch.cpp | 26 ++++---- epgsearch.h | 3 +- recman.cpp | 102 ++++++++++++++++------------ recman.h | 2 +- setup.h | 4 +- tools.cpp | 30 +++++++-- tools.h | 2 + xxhash32.h | 180 -------------------------------------------------- 8 files changed, 102 insertions(+), 247 deletions(-) delete mode 100644 xxhash32.h diff --git a/epgsearch.cpp b/epgsearch.cpp index 06d50bef..426f6d4b 100644 --- a/epgsearch.cpp +++ b/epgsearch.cpp @@ -620,22 +620,20 @@ std::string SearchResults::AddQuery(std::string const& query) return xxHash128(query); } -std::string SearchResults::PopQuery(std::string const& md5) +std::string SearchResults::PopQuery(cSv md5) { + if (md5.empty()) return std::string(); std::string query; - if (!md5.empty()) - { - std::set::iterator it; - for (it = querySet.begin(); it != querySet.end(); it++) - { - if (md5 == xxHash128(*it)) - { - query = *it; - querySet.erase(it); - break; - } - } - } + std::set::iterator it; + for (it = querySet.begin(); it != querySet.end(); it++) + { + if (compare_xxHash128(md5, *it)) + { + query = *it; + querySet.erase(it); + break; + } + } return query; } diff --git a/epgsearch.h b/epgsearch.h index a6929e3c..5c4a63fd 100644 --- a/epgsearch.h +++ b/epgsearch.h @@ -14,6 +14,7 @@ #include #include +#include "stringhelpers.h" namespace vdrlive { @@ -394,7 +395,7 @@ class SearchResults void merge(SearchResults& r) {m_list.merge(r.m_list); m_list.sort();} static std::string AddQuery(std::string const& query); - static std::string PopQuery(std::string const& md5); + static std::string PopQuery(cSv md5); private: searchresults m_list; }; diff --git a/recman.cpp b/recman.cpp index 046f07f4..abbb9ad5 100644 --- a/recman.cpp +++ b/recman.cpp @@ -66,20 +66,23 @@ namespace vdrlive { std::string RecordingsManager::Md5Hash(cRecording const * recording) const { m_timeMd5Hash.start(); - std::string result = "recording_" + xxHash128(recording->FileName()); + std::string result; + result.reserve(42); + result.append("recording_"); + stringAppend_xxHash128(result, recording->FileName()); m_timeMd5Hash.stop(); return result; } cRecording const * RecordingsManager::GetByMd5Hash(cSv hash) const { - if (!hash.empty()) { - LOCK_RECORDINGS_READ; - for (cRecording* rec = (cRecording *)Recordings->First(); rec; rec = (cRecording *)Recordings->Next(rec)) { - if (hash == Md5Hash(rec)) - return rec; - } - } + if (hash.length() != 42) return 0; + if (hash.compare(0, 10, "recording_") != 0) return 0; + cSv xxh = hash.substr_csv(10); + LOCK_RECORDINGS_READ; + for (cRecording* rec = (cRecording *)Recordings->First(); rec; rec = (cRecording *)Recordings->Next(rec)) { + if (compare_xxHash128(xxh, rec->FileName())) return rec; + } return 0; } @@ -156,16 +159,21 @@ namespace vdrlive { int RecordingsManager::GetArchiveType(cRecording const * recording) { - std::string filename = recording->FileName(); - - std::string dvdFile = filename + "/dvd.vdr"; - if (0 == access(dvdFile.c_str(), R_OK)) { - return 1; - } - std::string hddFile = filename + "/hdd.vdr"; - if (0 == access(hddFile.c_str(), R_OK)) { - return 2; - } +// 1: on dvd +// 2: on hdd +// 0: "normal" VDR recording + if (!recording || !recording->FileName() ) return 0; + size_t folder_length = strlen(recording->FileName()); + char file[folder_length + 9]; // "/dvd.vdr" + 0 terminator -> 9 + memcpy(file, recording->FileName(), folder_length); + memcpy(file + folder_length, "/dvd.vdr", 8); + file[folder_length + 8] = 0; + struct stat buffer; + if (stat (file, &buffer) == 0) return 1; + memcpy(file + folder_length, "/hdd.vdr", 8); + if (stat (file, &buffer) == 0) return 2; +// stat is 10% faster than access on my system. On others, there is a larger difference +// see https://stackoverflow.com/questions/12774207/fastest-way-to-check-if-a-file-exists-using-standard-c-c11-14-17-c return 0; } @@ -175,7 +183,7 @@ namespace vdrlive { if (archiveType==1) { std::string dvdFile = filename + "/dvd.vdr"; - std::ifstream dvd(dvdFile.c_str()); + std::ifstream dvd(dvdFile); if (dvd) { std::string archiveDisc; @@ -189,7 +197,7 @@ namespace vdrlive { } } else if(archiveType==2) { std::string hddFile = filename + "/hdd.vdr"; - std::ifstream hdd(hddFile.c_str()); + std::ifstream hdd(hddFile); if (hdd) { std::string archiveDisc; @@ -582,6 +590,7 @@ bool searchNameDesc(RecordingsItemPtr &RecItem, const std::vectorstart(); cGetScraperVideo getScraperVideo(NULL, recording); if (m_timeIdentify) m_timeIdentify->start(); bool scraper_available = getScraperVideo.call(LiveSetup().GetPluginScraper()); @@ -595,9 +604,7 @@ bool searchNameDesc(RecordingsItemPtr &RecItem, const std::vectorstop(); m_s_episode_number = getScraperVideo.m_scraperVideo->getEpisodeNumber(); m_s_season_number = getScraperVideo.m_scraperVideo->getSeasonNumber(); - if (m_timeDurationDeviation) m_timeDurationDeviation->start(); m_duration_deviation = getScraperVideo.m_scraperVideo->getDurationDeviation(); - if (m_timeDurationDeviation) m_timeDurationDeviation->stop(); m_language = getScraperVideo.m_scraperVideo->getLanguage(); m_video_SD_HD = getScraperVideo.m_scraperVideo->getHD(); } @@ -609,6 +616,7 @@ bool searchNameDesc(RecordingsItemPtr &RecItem, const std::vectorstop(); } int RecordingsItem::CompareTexts(const RecordingsItemPtr &second, int *numEqualChars) const @@ -716,30 +724,31 @@ bool searchNameDesc(RecordingsItemPtr &RecItem, const std::vectorFileName() ) return -1; - DIR *dir = opendir(recording->FileName()); - if (dir == nullptr) return -1; - struct dirent *ent; - int number_ts_files = 0; - while ((ent = readdir (dir)) != NULL) - if (ent->d_name && strlen(ent->d_name) == 8 && strcmp(ent->d_name + 5, ".ts") == 0) { - bool only_digits = true; - for (int i = 0; i < 5; i++) if (ent->d_name[i] < '0' || ent->d_name[i] > '9') only_digits = false; - if (only_digits) ++number_ts_files; + size_t folder_length = strlen(recording->FileName()); + char file[folder_length + 10]; // 00001.ts , 5 digits, + .ts + / -> 9, +1 for 0 terminator + memcpy(file, recording->FileName(), folder_length); + memcpy(file + folder_length, "/00001.ts", 9); + file[folder_length + 9] = 0; + struct stat buffer; + int num_ts_files; + for (num_ts_files = 1; num_ts_files < 100000; ++num_ts_files) { + concat::addCharsUg0be(file + folder_length + 6, num_ts_files); + if (stat (file, &buffer) != 0) break; } - closedir (dir); - return number_ts_files; + return num_ts_files - 1; } /** * Implementation of class RecordingsItemRec: */ - RecordingsItemRec::RecordingsItemRec(int idI, cSv id, cSv name, const cRecording* recording, cMeasureTime *timeIdentify, cMeasureTime *timeOverview, cMeasureTime *timeImage, cMeasureTime *timeDurationDeviation): + RecordingsItemRec::RecordingsItemRec(int idI, cSv id, cSv name, const cRecording* recording, cMeasureTime *timeIdentify, cMeasureTime *timeOverview, cMeasureTime *timeImage, cMeasureTime *timeDurationDeviation, cMeasureTime *timeNumTsFiles, cMeasureTime *timeItemRec): RecordingsItem(name), m_recording(recording), m_id(id), m_isArchived(RecordingsManager::GetArchiveType(m_recording) ) { // dsyslog("live: REC: C: rec %s -> %s", name.c_str(), parent->Name().c_str()); + timeItemRec->start(); m_idI = idI; m_timeIdentify = timeIdentify; m_timeOverview = timeOverview; @@ -749,7 +758,10 @@ bool searchNameDesc(RecordingsItemPtr &RecItem, const std::vectorstart(); m_number_ts_files = GetNumberOfTsFiles(recording); + timeNumTsFiles->stop(); + timeItemRec->stop(); } RecordingsItemRec::~RecordingsItemRec() @@ -1006,12 +1018,14 @@ void RecordingsItemRec::AppendAsJSArray(cLargeString &target, std::vectorm_timeMd5Hash.reset(); - cMeasureTime timeRecs, timeIdentify, timeOverview, timeImage, timeDurationDeviation; + cMeasureTime timeRecs, timeIdentify, timeOverview, timeImage, timeDurationDeviation, timeNumTsFiles, timeItemRec; timeRecs.reset(); timeIdentify.reset(); timeOverview.reset(); timeImage.reset(); timeDurationDeviation.reset(); + timeNumTsFiles.reset(); + timeItemRec.reset(); std::chrono::time_point begin = std::chrono::high_resolution_clock::now(); // check availability of scraper data @@ -1057,7 +1071,7 @@ void RecordingsItemRec::AppendAsJSArray(cLargeString &target, std::vectorId(), recMan->Md5Hash(recording), recName, recording, &timeIdentify, &timeOverview, &timeImage, &timeDurationDeviation)); + RecordingsItemPtr recPtr (new RecordingsItemRec(recording->Id(), recMan->Md5Hash(recording), recName, recording, &timeIdentify, &timeOverview, &timeImage, &timeDurationDeviation, &timeNumTsFiles, &timeItemRec)); timeRecs.stop(); dir->m_entries.push_back(recPtr); m_allRecordings.push_back(recPtr); @@ -1097,12 +1111,16 @@ m_allRecordings.push_back(recPtr); m_root->finishRecordingsTree(); std::chrono::duration timeNeeded = std::chrono::high_resolution_clock::now() - begin; esyslog("live: DH: ------ RecordingsTree::RecordingsTree() --------, required time: %9.5f", timeNeeded.count() ); - recMan->m_timeMd5Hash.print("live: timeMd5Hash"); - timeRecs.print("live: timeRecs "); - timeIdentify.print("live: Identify "); - timeOverview.print("live: Overview "); - timeImage.print("live: Image "); - timeDurationDeviation.print("live: DurDev "); +/* + recMan->m_timeMd5Hash.print("live: time Hash "); + timeRecs.print("live: timeRecs "); + timeItemRec.print("live: ItemRec "); + timeIdentify.print("live: Identify "); + timeOverview.print("live: Overview "); + timeImage.print("live: Image "); + timeDurationDeviation.print("live: Scraper "); + timeNumTsFiles.print("live: NumTsFiles"); +*/ } RecordingsTree::~RecordingsTree() diff --git a/recman.h b/recman.h index 5363b5c6..2bcb4adf 100644 --- a/recman.h +++ b/recman.h @@ -339,7 +339,7 @@ template class RecordingsItemRec : public RecordingsItem { public: - RecordingsItemRec(int idI, cSv id, cSv name, const cRecording* recording, cMeasureTime *timeIdentify, cMeasureTime *timeOverview, cMeasureTime *timeImage, cMeasureTime *timeDurationDeviation); + RecordingsItemRec(int idI, cSv id, cSv name, const cRecording* recording, cMeasureTime *timeIdentify, cMeasureTime *timeOverview, cMeasureTime *timeImage, cMeasureTime *timeDurationDeviation, cMeasureTime *timeNumTsFiles, cMeasureTime *timeItemRec); virtual ~RecordingsItemRec(); diff --git a/setup.h b/setup.h index 71195270..ebfd69ef 100644 --- a/setup.h +++ b/setup.h @@ -11,8 +11,8 @@ #include -#define LIVEVERSION "3.3.1" -#define LIVEVERSNUM 30301 +#define LIVEVERSION "3.3.2" +#define LIVEVERSNUM 30302 #define LIVESUMMARY trNOOP("Live Interactive VDR Environment") namespace vdrlive { diff --git a/tools.cpp b/tools.cpp index 25be8ef7..a9ce6b4b 100644 --- a/tools.cpp +++ b/tools.cpp @@ -297,15 +297,31 @@ template void toHex(char *buf, int chars, T value) { toHex(res, 16, result); return res; } - std::string xxHash128(cSv str) - { + inline void xxHash128_buf(char *buf, cSv str) { +// sizeof buf must be >= 32. This is not checked! XXH128_hash_t result = XXH3_128bits(str.data(), str.length()); - char res[33]; - res[32] = 0; - toHex(res+16, 16, result.low64); - toHex(res , 16, result.high64); - return res; + toHex(buf+16, 16, result.low64); + toHex(buf , 16, result.high64); } + void stringAppend_xxHash128(std::string &target, cSv str) { + char buf[32]; + xxHash128_buf(buf, str); + target.append(buf, 32); + } + std::string xxHash128(cSv str) + { + char buf[32]; + xxHash128_buf(buf, str); + return std::string(buf, 32); + } + + bool compare_xxHash128(cSv str1, cSv str2) { +// return str1 == xxHash128(str2) + if (str1.length() != 32) return false; + char buf[32]; + xxHash128_buf(buf, str2); + return str1.compare(0, 32, buf, 32) == 0; + } #define HOURS(x) ((x)/100) #define MINUTES(x) ((x)%100) diff --git a/tools.h b/tools.h index a9a969ed..a6f1d3dd 100644 --- a/tools.h +++ b/tools.h @@ -101,6 +101,8 @@ template void toHex(char *buf, int chars, T value); std::string xxHash32(cSv str); std::string xxHash64(cSv str); std::string xxHash128(cSv str); + void stringAppend_xxHash128(std::string &target, cSv str); + bool compare_xxHash128(cSv str1, cSv str2); // return str1 == xxHash128(str2) time_t GetTimeT(std::string timestring); // timestring in HH:MM std::string ExpandTimeString(std::string timestring); diff --git a/xxhash32.h b/xxhash32.h deleted file mode 100644 index ad15a66e..00000000 --- a/xxhash32.h +++ /dev/null @@ -1,180 +0,0 @@ -// ////////////////////////////////////////////////////////// -// xxhash32.h -// Copyright (c) 2016 Stephan Brumme. All rights reserved. -// see http://create.stephan-brumme.com/disclaimer.html -// - -#pragma once -#include // for uint32_t and uint64_t - -/// XXHash (32 bit), based on Yann Collet's descriptions, see http://cyan4973.github.io/xxHash/ -/** How to use: - uint32_t myseed = 0; - XXHash32 myhash(myseed); - myhash.add(pointerToSomeBytes, numberOfBytes); - myhash.add(pointerToSomeMoreBytes, numberOfMoreBytes); // call add() as often as you like to ... - // and compute hash: - uint32_t result = myhash.hash(); - - // or all of the above in one single line: - uint32_t result2 = XXHash32::hash(mypointer, numBytes, myseed); - - Note: my code is NOT endian-aware ! -**/ -class XXHash32 -{ -public: - /// create new XXHash (32 bit) - /** @param seed your seed value, even zero is a valid seed and e.g. used by LZ4 **/ - explicit XXHash32(uint32_t seed) - { - state[0] = seed + Prime1 + Prime2; - state[1] = seed + Prime2; - state[2] = seed; - state[3] = seed - Prime1; - bufferSize = 0; - totalLength = 0; - } - - /// add a chunk of bytes - /** @param input pointer to a continuous block of data - @param length number of bytes - @return false if parameters are invalid / zero **/ - bool add(const void* input, uint64_t length) - { - // no data ? - if (!input || length == 0) - return false; - - totalLength += length; - // byte-wise access - const unsigned char* data = (const unsigned char*)input; - - // unprocessed old data plus new data still fit in temporary buffer ? - if (bufferSize + length < MaxBufferSize) - { - // just add new data - while (length-- > 0) - buffer[bufferSize++] = *data++; - return true; - } - - // point beyond last byte - const unsigned char* stop = data + length; - const unsigned char* stopBlock = stop - MaxBufferSize; - - // some data left from previous update ? - if (bufferSize > 0) - { - // make sure temporary buffer is full (16 bytes) - while (bufferSize < MaxBufferSize) - buffer[bufferSize++] = *data++; - - // process these 16 bytes (4x4) - process(buffer, state[0], state[1], state[2], state[3]); - } - - // copying state to local variables helps optimizer A LOT - uint32_t s0 = state[0], s1 = state[1], s2 = state[2], s3 = state[3]; - // 16 bytes at once - while (data <= stopBlock) - { - // local variables s0..s3 instead of state[0]..state[3] are much faster - process(data, s0, s1, s2, s3); - data += 16; - } - // copy back - state[0] = s0; state[1] = s1; state[2] = s2; state[3] = s3; - - // copy remainder to temporary buffer - bufferSize = stop - data; - for (unsigned int i = 0; i < bufferSize; i++) - buffer[i] = data[i]; - - // done - return true; - } - - /// get current hash - /** @return 32 bit XXHash **/ - uint32_t hash() const - { - uint32_t result = (uint32_t)totalLength; - - // fold 128 bit state into one single 32 bit value - if (totalLength >= MaxBufferSize) - result += rotateLeft(state[0], 1) + - rotateLeft(state[1], 7) + - rotateLeft(state[2], 12) + - rotateLeft(state[3], 18); - else - // internal state wasn't set in add(), therefore original seed is still stored in state2 - result += state[2] + Prime5; - - // process remaining bytes in temporary buffer - const unsigned char* data = buffer; - // point beyond last byte - const unsigned char* stop = data + bufferSize; - - // at least 4 bytes left ? => eat 4 bytes per step - for (; data + 4 <= stop; data += 4) - result = rotateLeft(result + *(uint32_t*)data * Prime3, 17) * Prime4; - - // take care of remaining 0..3 bytes, eat 1 byte per step - while (data != stop) - result = rotateLeft(result + (*data++) * Prime5, 11) * Prime1; - - // mix bits - result ^= result >> 15; - result *= Prime2; - result ^= result >> 13; - result *= Prime3; - result ^= result >> 16; - return result; - } - - /// combine constructor, add() and hash() in one static function (C style) - /** @param input pointer to a continuous block of data - @param length number of bytes - @param seed your seed value, e.g. zero is a valid seed and used by LZ4 - @return 32 bit XXHash **/ - static uint32_t hash(const void* input, uint64_t length, uint32_t seed) - { - XXHash32 hasher(seed); - hasher.add(input, length); - return hasher.hash(); - } - -private: - /// magic constants :-) - static const uint32_t Prime1 = 2654435761U; - static const uint32_t Prime2 = 2246822519U; - static const uint32_t Prime3 = 3266489917U; - static const uint32_t Prime4 = 668265263U; - static const uint32_t Prime5 = 374761393U; - - /// temporarily store up to 15 bytes between multiple add() calls - static const uint32_t MaxBufferSize = 15+1; - - // internal state and temporary buffer - uint32_t state[4]; // state[2] == seed if totalLength < MaxBufferSize - unsigned char buffer[MaxBufferSize]; - unsigned int bufferSize; - uint64_t totalLength; - - /// rotate bits, should compile to a single CPU instruction (ROL) - static inline uint32_t rotateLeft(uint32_t x, unsigned char bits) - { - return (x << bits) | (x >> (32 - bits)); - } - - /// process a block of 4x4 bytes, this is the main part of the XXHash32 algorithm - static inline void process(const void* data, uint32_t& state0, uint32_t& state1, uint32_t& state2, uint32_t& state3) - { - const uint32_t* block = (const uint32_t*) data; - state0 = rotateLeft(state0 + block[0] * Prime2, 13) * Prime1; - state1 = rotateLeft(state1 + block[1] * Prime2, 13) * Prime1; - state2 = rotateLeft(state2 + block[2] * Prime2, 13) * Prime1; - state3 = rotateLeft(state3 + block[3] * Prime2, 13) * Prime1; - } -};