From 882605a826dd9c20578ec56037424bf9db22c183 Mon Sep 17 00:00:00 2001 From: AshDyson Date: Thu, 1 Apr 2021 17:21:07 +0100 Subject: [PATCH] (New) IMdB caching --- .gitignore | 1 + api/.DS_Store | Bin 8196 -> 10244 bytes api/meta/imdb.js | 132 ++++++++++++++++++++++++++++-------------- api/models/imdb.js | 8 +++ api/package-lock.json | 40 +++++++++++++ api/package.json | 4 +- api/tmdb/movie.js | 6 +- api/tmdb/show.js | 7 +-- api/worker.js | 4 ++ 9 files changed, 147 insertions(+), 55 deletions(-) create mode 100644 api/models/imdb.js diff --git a/.gitignore b/.gitignore index d6748e6d5..a8a884b8d 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,4 @@ frontend/.eslintcache admin/.eslintcache /logs api/.DS_Store +api/imdb_dump.txt diff --git a/api/.DS_Store b/api/.DS_Store index 3fcc44b076950d4b0891314d542933f9aec42109..ff0e78e63120f64e560acb430b912151ec28d358 100644 GIT binary patch delta 241 zcmZp1XbF&DU|?W$DortDU{C-uIe-{M3-C-V6q~50D9Q!oFar4u4EYRY45 { + logger.info("IMDB: Cache Download complete"); + try { + await parseData(tempFile); + logger.info("IMDB: Cache Finished"); + } catch (e) { + console.log(e); + logger.error("IMDB: Cache failed - db write issue"); + } + }); + } catch (e) { + logger.log({ level: "error", message: e }); } - return data; } -async function crawl(id) { +async function parseData(file) { + logger.info("IMDB: Cache Emptying old cache"); + await Imdb.deleteMany({}); + logger.info("IMDB: Cache cleared"); + logger.info("IMDB: Cache parsing download, updating local cache"); + return new Promise((resolve, reject) => { + let buffer = []; + lineReader.eachLine(file, async (line, last, cb) => { + let data = line.split("\t"); + if (data[0] === "tconst" || (parseInt(data[2]) < 1000 && !last)) { + cb(); + return; + } + if (buffer.length < 50000) { + buffer.push({ + insertOne: { + document: { + id: data[0], + rating: data[1], + }, + }, + }); + if (!last) { + cb(); + return; + } + } + try { + await processBuffer(buffer); + buffer = []; + if (last) { + resolve(); + } + cb(); + } catch { + cb(false); + reject(); + } + }); + }); +} + +async function processBuffer(data) { try { - let res = await axios.get(`https://www.imdb.com/title/${id}`); - let raw = cheerio.load(res.data); - let meta = JSON.parse(raw(`script[type='application/ld+json']`).html()); - let rating = meta.aggregateRating; - delete rating["@type"]; - let description = meta.description; - return { - rating: rating, - description: description, - }; - } catch (err) { - return { - rating: false, - description: false, - }; + await Imdb.bulkWrite(data); + } catch { + throw "IMDB: Error cannot write to Db"; } } -module.exports = lookup; +module.exports = { lookup, storeCache }; diff --git a/api/models/imdb.js b/api/models/imdb.js new file mode 100644 index 000000000..a0fa2521f --- /dev/null +++ b/api/models/imdb.js @@ -0,0 +1,8 @@ +const mongoose = require("mongoose"); + +const ImdbSchema = mongoose.Schema({ + id: String, + rating: String, +}); + +module.exports = mongoose.model("Imdb", ImdbSchema); diff --git a/api/package-lock.json b/api/package-lock.json index 570d5c9b9..a157ed7b2 100644 --- a/api/package-lock.json +++ b/api/package-lock.json @@ -76,6 +76,11 @@ "negotiator": "0.6.2" } }, + "adm-zip": { + "version": "0.5.5", + "resolved": "https://registry.npmjs.org/adm-zip/-/adm-zip-0.5.5.tgz", + "integrity": "sha512-IWwXKnCbirdbyXSfUDvCCrmYrOHANRZcc8NcRrvTlIApdl7PwE9oGcsYvNeJPAVY1M+70b4PxXGKIf8AEuiQ6w==" + }, "append-field": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/append-field/-/append-field-1.0.0.tgz", @@ -270,6 +275,11 @@ "text-hex": "1.0.x" } }, + "commander": { + "version": "2.20.3", + "resolved": "https://registry.npmjs.org/commander/-/commander-2.20.3.tgz", + "integrity": "sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==" + }, "concat-stream": { "version": "1.6.2", "resolved": "https://registry.npmjs.org/concat-stream/-/concat-stream-1.6.2.tgz", @@ -361,6 +371,16 @@ "type": "^1.0.1" } }, + "d3-dsv": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/d3-dsv/-/d3-dsv-2.0.0.tgz", + "integrity": "sha512-E+Pn8UJYx9mViuIUkoc93gJGGYut6mSDKy2+XaPwccwkRGlR+LO97L2VCCRjQivTwLHkSnAJG7yo00BWY6QM+w==", + "requires": { + "commander": "2", + "iconv-lite": "0.4", + "rw": "1" + } + }, "debug": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/debug/-/debug-3.1.0.tgz", @@ -826,6 +846,11 @@ "resolved": "https://registry.npmjs.org/kuler/-/kuler-2.0.0.tgz", "integrity": "sha512-Xq9nH7KlWZmXAtodXDDRE7vs6DU1gTU8zYDHDiWLSip45Egwq3plLHzPn27NgvzL2r1LMPC1vdqh98sQxtqj4A==" }, + "line-reader": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/line-reader/-/line-reader-0.4.0.tgz", + "integrity": "sha1-F+RIGNoKwzVnW6MAlU+U72cOZv0=" + }, "lodash": { "version": "4.17.21", "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", @@ -1269,6 +1294,11 @@ "util-deprecate": "~1.0.1" } }, + "readline": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/readline/-/readline-1.3.0.tgz", + "integrity": "sha1-xYDXfvLPyHUrEySYBg3JeTp6wBw=" + }, "regexp-clone": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/regexp-clone/-/regexp-clone-1.0.0.tgz", @@ -1288,6 +1318,11 @@ "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-2.0.0.tgz", "integrity": "sha1-lICrIOlP+h2egKgEx+oUdhGWa1c=" }, + "rw": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/rw/-/rw-1.3.3.tgz", + "integrity": "sha1-P4Yt+pGrdmsUiF700BEkv9oHT7Q=" + }, "safe-buffer": { "version": "5.1.2", "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", @@ -1620,6 +1655,11 @@ "version": "4.0.0", "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==" + }, + "zlib": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/zlib/-/zlib-1.0.5.tgz", + "integrity": "sha1-bnyXL8NxxkWmr7A6sUdp3vEU/MA=" } } } diff --git a/api/package.json b/api/package.json index 2ada5a906..97e5c1e81 100644 --- a/api/package.json +++ b/api/package.json @@ -27,6 +27,7 @@ "iso-639-1": "^2.1.8", "joi": "^17.3.0", "jsonwebtoken": "^8.5.1", + "line-reader": "^0.4.0", "mongoose": "^5.12.1", "multer": "^1.4.2", "nodejs-nodemailer-outlook": "^1.2.3", @@ -35,7 +36,8 @@ "saslprep": "^1.0.3", "winston": "^3.3.3", "xhr-request": "^1.1.0", - "xml-js": "^1.6.11" + "xml-js": "^1.6.11", + "zlib": "^1.0.5" }, "devDependencies": { "dotenv": "^8.2.0" diff --git a/api/tmdb/movie.js b/api/tmdb/movie.js index d96984fac..578f055c5 100644 --- a/api/tmdb/movie.js +++ b/api/tmdb/movie.js @@ -7,7 +7,7 @@ const getConfig = require("../util/config"); const fanartLookup = require("../fanart"); const onServer = require("../plex/onServer"); -const imdb = require("../meta/imdb"); +const { lookup: imdb } = require("../meta/imdb"); const getLanguage = require("./languages"); const logger = require("../util/logger"); @@ -44,10 +44,6 @@ async function movieLookup(id, minified = false) { movie.tile = findEnLogo(fanart.moviethumb); } } - if (minified) { - // Pre-fetch IMDB on minfied lookup but don't wait or return - imdb(movie.imdb_id); - } try { let collectionData = false; let [ diff --git a/api/tmdb/show.js b/api/tmdb/show.js index d8edd4535..c8f91eb07 100644 --- a/api/tmdb/show.js +++ b/api/tmdb/show.js @@ -6,7 +6,7 @@ const axios = require("axios"); const getConfig = require("../util/config"); const fanartLookup = require("../fanart"); const onServer = require("../plex/onServer"); -const imdb = require("../meta/imdb"); +const { lookup: imdb } = require("../meta/imdb"); const getLanguage = require("./languages"); const logger = require("../util/logger"); @@ -39,11 +39,6 @@ async function showLookup(id, minified = false) { return { error: "no id returned" }; } - if (minified) { - // Pre-fetch IMDB on minfied lookup but don't wait or return - imdb(external.imdb_id); - } - try { let [ imdb_data, diff --git a/api/worker.js b/api/worker.js index 56f2adcec..49feef19f 100644 --- a/api/worker.js +++ b/api/worker.js @@ -5,6 +5,7 @@ const QuotaSystem = require("./requests/quotas"); const getConfig = require("./util/config"); const mongoose = require("mongoose"); const buildDiscovery = require("./discovery/build"); +const { storeCache: imdbCache } = require("./meta/imdb"); class Worker { async connnectDb() { @@ -26,6 +27,7 @@ class Worker { // return; // for debug local try { await this.connnectDb(); + await imdbCache(); const libUpdate = new LibraryUpdate(); await libUpdate.scan(); buildDiscovery(); @@ -57,6 +59,7 @@ class Worker { logger.log("verbose", `API: Registering Quota reset job`); this.resetQuotas.start(); } catch (err) { + console.log(err); logger.error("CRONW: Failed to start crons!"); } } @@ -72,6 +75,7 @@ class Worker { break; case 3: new QuotaSystem().reset(); + imdbCache(); default: logger.log("warn", "CRONW: Invalid cron"); }