From a9e5fbcce8465e4a531d9f3cf43b9c1b38b18eb5 Mon Sep 17 00:00:00 2001 From: Azi Hassan Date: Wed, 27 Dec 2023 22:24:13 +0100 Subject: [PATCH] [feature/ISSUE-28] Cache base.js based on base.js URL and not video ID --- source/cache.d | 135 ++++++++++++++++++++++++++++++++++++++++------- source/helpers.d | 21 ++++++++ 2 files changed, 137 insertions(+), 19 deletions(-) diff --git a/source/cache.d b/source/cache.d index a369c6c..88ba78e 100644 --- a/source/cache.d +++ b/source/cache.d @@ -3,13 +3,13 @@ import std.array : replace; import std.base64 : Base64URL; import std.conv : to; import std.datetime : SysTime, Clock, days; -import std.file : exists, getcwd, readText, tempDir, write; +import std.file : exists, getcwd, readText, remove, tempDir, write; import std.net.curl : get; import std.path : buildPath; import std.typecons : Flag, Yes, No; import std.string : indexOf; -import helpers : StdoutLogger, parseID, parseQueryString; +import helpers : StdoutLogger, parseID, parseQueryString, parseBaseJSKey; import parsers : parseBaseJSURL, YoutubeVideoURLExtractor, SimpleYoutubeVideoURLExtractor, AdvancedYoutubeVideoURLExtractor; struct Cache @@ -31,24 +31,28 @@ struct Cache { this(logger); this.downloadAsString = downloadAsString; + this.forceRefresh = forceRefresh; } YoutubeVideoURLExtractor makeParser(string url, int itag) { - string htmlCachePath = getCachePath(url) ~ ".html"; - string baseJSCachePath = getCachePath(url) ~ ".js"; - updateCache(url, htmlCachePath, baseJSCachePath, itag); - + string htmlCachePath = getHTMLCachePath(url) ~ ".html"; + updateHTMLCache(url, htmlCachePath, itag); string html = htmlCachePath.readText(); + + string baseJSURL = html.parseBaseJSURL(); + string baseJSCachePath = getBaseJSCachePath(baseJSURL) ~ ".js"; + updateBaseJSCache(baseJSURL, baseJSCachePath, itag); string baseJS = baseJSCachePath.readText(); - if(html.indexOf("signatureCipher:") == -1) + + if(html.indexOf("signatureCipher") == -1) { return new SimpleYoutubeVideoURLExtractor(html, baseJS, logger); } return new AdvancedYoutubeVideoURLExtractor(html, baseJS, logger); } - private void updateCache(string url, string htmlCachePath, string baseJSCachePath, int itag) + private void updateHTMLCache(string url, string htmlCachePath, int itag) { bool shouldRedownload = forceRefresh || !htmlCachePath.exists() || isStale(htmlCachePath.readText(), itag); if(shouldRedownload) @@ -56,8 +60,6 @@ struct Cache logger.display("Cache miss, downloading HTML..."); string html = this.downloadAsString(url); htmlCachePath.write(html); - string baseJS = this.downloadAsString(html.parseBaseJSURL()); - baseJSCachePath.write(baseJS); } else { @@ -65,16 +67,31 @@ struct Cache } } + private void updateBaseJSCache(string url, string baseJSCachePath, int itag) + { + bool shouldRedownload = forceRefresh || !baseJSCachePath.exists(); + if(shouldRedownload) + { + logger.display("base.js cache miss, downloading from " ~ url); + string baseJS = this.downloadAsString(url); + baseJSCachePath.write(baseJS); + } + else + { + logger.display("base.js cache hit, skipping download..."); + } + } + private bool isStale(string html, int itag) { - YoutubeVideoURLExtractor shallowParser = html.indexOf("signatureCipher:") == -1 + YoutubeVideoURLExtractor shallowParser = html.indexOf("signatureCipher") == -1 ? new SimpleYoutubeVideoURLExtractor(html, "", logger) : new AdvancedYoutubeVideoURLExtractor(html, "", logger); ulong expire = shallowParser.findExpirationTimestamp(itag); return SysTime.fromUnixTime(expire) < Clock.currTime(); } - private string getCachePath(string url) + private string getHTMLCachePath(string url) { string cacheKey = url.parseID(); if(cacheKey == "") @@ -84,6 +101,17 @@ struct Cache return buildPath(cacheDirectory, cacheKey); } + + private string getBaseJSCachePath(string url) + { + string cacheKey = url.parseBaseJSKey(); + if(cacheKey == "") + { + cacheKey = Base64URL.encode(cast(ubyte[]) url); + } + + return buildPath(cacheDirectory, cacheKey); + } } unittest @@ -91,7 +119,10 @@ unittest writeln("Given SimpleYoutubeVideoURLExtractor, when cache is stale, should redownload HTML"); bool downloadAttempted; auto downloadAsString = delegate string(string url) { - downloadAttempted = true; + if(url == "https://youtu.be/zoz") + { + downloadAttempted = true; + } return "zoz.html".readText(); }; auto cache = Cache(new StdoutLogger(), downloadAsString); @@ -106,7 +137,10 @@ unittest writeln("Given SimpleYoutubeVideoURLExtractor, when cache is fresh, should not download HTML"); bool downloadAttempted; auto downloadAsString = delegate string(string url) { - downloadAttempted = true; + if(url == "https://youtu.be/zoz-fresh") + { + downloadAttempted = true; + } return "zoz.html".readText(); }; SysTime tomorrow = Clock.currTime() + 1.days; @@ -114,7 +148,6 @@ unittest cache.cacheDirectory = getcwd(); "zoz-fresh.html".write("zoz.html".readText().dup.replace("expire=1638935038", "expire=" ~ tomorrow.toUnixTime().to!string)); - "zoz-fresh.js".write("base.min.js".readText()); auto parser = cache.makeParser("https://youtu.be/zoz-fresh", 18); assert(!downloadAttempted); @@ -125,7 +158,10 @@ unittest writeln("Given AdvancedYoutubeVideoURLExtractor, when cache is stale, should redownload HTML"); bool downloadAttempted; auto downloadAsString = delegate string(string url) { - downloadAttempted = true; + if(url == "https://youtu.be/dQw4w9WgXcQ") + { + downloadAttempted = true; + } return "dQw4w9WgXcQ.html".readText(); }; auto cache = Cache(new StdoutLogger(), downloadAsString); @@ -140,7 +176,10 @@ unittest writeln("Given AdvancedYoutubeVideoURLExtractor, when cache is fresh, should not download HTML"); bool downloadAttempted; auto downloadAsString = delegate string(string url) { - downloadAttempted = true; + if(url == "https://youtu.be/dQw4w9WgXcQ-fresh") + { + downloadAttempted = true; + } return "dQw4w9WgXcQ-fresh.html".readText(); }; SysTime tomorrow = Clock.currTime() + 1.days; @@ -148,7 +187,6 @@ unittest cache.cacheDirectory = getcwd(); //mock previously cached and fresh files - "dQw4w9WgXcQ-fresh.js".write("base.min.js".readText()); "dQw4w9WgXcQ-fresh.html".write( "dQw4w9WgXcQ.html".readText().dup.replace("expire%3D1677997809", "expire%3D" ~ tomorrow.toUnixTime().to!string) ); @@ -162,8 +200,17 @@ unittest { writeln("When forcing refresh, should download HTML"); bool downloadAttempted; + bool baseJSDownloadAttempted; auto downloadAsString = delegate string(string url) { - downloadAttempted = true; + writeln("downloadAsString : ", url); + if(url == "https://youtu.be/zoz") + { + downloadAttempted = true; + } + if(url == "https://www.youtube.com/s/player/0c96dfd3/player_ias.vflset/ar_EG/base.js") + { + baseJSDownloadAttempted = true; + } return "zoz.html".readText(); }; auto cache = Cache(new StdoutLogger(), downloadAsString, Yes.forceRefresh); @@ -171,4 +218,54 @@ unittest auto parser = cache.makeParser("https://youtu.be/zoz", 18); assert(downloadAttempted); + assert(baseJSDownloadAttempted); +} + +unittest +{ + writeln("When base.js is cached, should read from cache"); + "0c96dfd3.js".write("base.min.js".readText()); + + bool baseJSDownloadAttempted; + auto downloadAsString = delegate string(string url) { + if(url == "https://www.youtube.com/s/player/0c96dfd3/player_ias.vflset/ar_EG/base.js") + { + baseJSDownloadAttempted = true; + return "0c96dfd3.js".readText(); + } + return "zoz.html".readText(); + }; + auto cache = Cache(new StdoutLogger(), downloadAsString); + cache.cacheDirectory = getcwd(); + + auto parser = cache.makeParser("https://youtu.be/zoz", 18); + assert(!baseJSDownloadAttempted); +} + +unittest +{ + writeln("When base.js is not cached, should download it"); + if("0c96dfd3.js".exists()) + { + "0c96dfd3.js".remove(); + } + scope(exit) + { + "0c96dfd3.js".remove(); + } + + bool baseJSDownloadAttempted; + auto downloadAsString = delegate string(string url) { + if(url == "https://www.youtube.com/s/player/0c96dfd3/player_ias.vflset/ar_EG/base.js") + { + baseJSDownloadAttempted = true; + return "base.min.js".readText(); + } + return "zoz.html".readText(); + }; + auto cache = Cache(new StdoutLogger(), downloadAsString); + cache.cacheDirectory = getcwd(); + + auto parser = cache.makeParser("https://youtu.be/zoz", 18); + assert(baseJSDownloadAttempted); } diff --git a/source/helpers.d b/source/helpers.d index 864a75c..37a5232 100644 --- a/source/helpers.d +++ b/source/helpers.d @@ -220,3 +220,24 @@ unittest assert("https://www.youtube.com/shorts/_tT2ldpZHek".parseID() == "_tT2ldpZHek"); assert("qlsdkqsldkj".parseID() == ""); } + +string parseBaseJSKey(string url) +{ + string id; + if(url.startsWith("https://")) + { + url = url["https://".length .. $]; + } + if(url.startsWith("www.youtube.com")) + { + url = url["www.youtube.com".length .. $]; + } + return url.split("/")[3]; +} + +unittest +{ + assert("/s/player/0c96dfd3/player_ias.vflset/ar_EG/base.js".parseBaseJSKey() == "0c96dfd3"); + assert("https://www.youtube.com/s/player/0c96dfd3/player_ias.vflset/ar_EG/base.js".parseBaseJSKey() == "0c96dfd3"); + assert("www.youtube.com/s/player/0c96dfd3/player_ias.vflset/ar_EG/base.js".parseBaseJSKey() == "0c96dfd3"); +}