Skip to content

Commit

Permalink
(New) IMdB caching
Browse files Browse the repository at this point in the history
  • Loading branch information
AshDyson committed Apr 1, 2021
1 parent 03a8723 commit 882605a
Show file tree
Hide file tree
Showing 9 changed files with 147 additions and 55 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ frontend/.eslintcache
admin/.eslintcache
/logs
api/.DS_Store
api/imdb_dump.txt
Binary file modified api/.DS_Store
Binary file not shown.
132 changes: 89 additions & 43 deletions api/meta/imdb.js
Original file line number Diff line number Diff line change
@@ -1,63 +1,109 @@
// https://www.imdb.com/title/tt6475714/
const axios = require("axios");
const cheerio = require("cheerio");
const logger = require("../util/logger");

const cacheManager = require("cache-manager");
const memoryCache = cacheManager.caching({
store: "memory",
max: 1000,
ttl: 604800 /*seconds*/,
});
const zlib = require("zlib");
const fs = require("fs");
const path = require("path");
const lineReader = require("line-reader");
const Imdb = require("../models/imdb");

async function lookup(imdb_id) {
return false;
if (!imdb_id) {
return false;
}

try {
let data = await getRaw(imdb_id);
return data;
} catch (err) {
return {
rating: false,
description: false,
};
}
let data = await Imdb.findOne({ id: imdb_id });
if (!data) return false;
return {
rating: { ratingValue: data.rating },
description: false,
};
}

async function getRaw(id) {
let data = false;
async function storeCache() {
const unzip = zlib.createGunzip();
let project_folder, tempFile;
if (process.pkg) {
project_folder = path.dirname(process.execPath);
tempFile = path.join(project_folder, "./imdb_dump.txt");
} else {
project_folder = __dirname;
tempFile = path.join(project_folder, "../imdb_dump.txt");
}
logger.info("IMDB: Rebuilding Cache");
try {
data = await memoryCache.wrap(`imdb_${id}`, function () {
return crawl(id);
logger.info("IMDB: Cache Downloading latest cache");
const res = await axios({
url: "https://datasets.imdbws.com/title.ratings.tsv.gz",
method: "GET",
responseType: "stream",
});
} catch (err) {
logger.log("warn", `Error crawling imdb - ${id}`);
logger.log({ level: "error", message: err });
logger.info("IMDB: Cache Storing to temp");
const fileStream = fs.createWriteStream(tempFile);
res.data.pipe(unzip).pipe(fileStream);
fileStream.on("close", async () => {
logger.info("IMDB: Cache Download complete");
try {
await parseData(tempFile);
logger.info("IMDB: Cache Finished");
} catch (e) {
console.log(e);
logger.error("IMDB: Cache failed - db write issue");
}
});
} catch (e) {
logger.log({ level: "error", message: e });
}
return data;
}

async function crawl(id) {
async function parseData(file) {
logger.info("IMDB: Cache Emptying old cache");
await Imdb.deleteMany({});
logger.info("IMDB: Cache cleared");
logger.info("IMDB: Cache parsing download, updating local cache");
return new Promise((resolve, reject) => {
let buffer = [];
lineReader.eachLine(file, async (line, last, cb) => {
let data = line.split("\t");
if (data[0] === "tconst" || (parseInt(data[2]) < 1000 && !last)) {
cb();
return;
}
if (buffer.length < 50000) {
buffer.push({
insertOne: {
document: {
id: data[0],
rating: data[1],
},
},
});
if (!last) {
cb();
return;
}
}
try {
await processBuffer(buffer);
buffer = [];
if (last) {
resolve();
}
cb();
} catch {
cb(false);
reject();
}
});
});
}

async function processBuffer(data) {
try {
let res = await axios.get(`https://www.imdb.com/title/${id}`);
let raw = cheerio.load(res.data);
let meta = JSON.parse(raw(`script[type='application/ld+json']`).html());
let rating = meta.aggregateRating;
delete rating["@type"];
let description = meta.description;
return {
rating: rating,
description: description,
};
} catch (err) {
return {
rating: false,
description: false,
};
await Imdb.bulkWrite(data);
} catch {
throw "IMDB: Error cannot write to Db";
}
}

module.exports = lookup;
module.exports = { lookup, storeCache };
8 changes: 8 additions & 0 deletions api/models/imdb.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
const mongoose = require("mongoose");

const ImdbSchema = mongoose.Schema({
id: String,
rating: String,
});

module.exports = mongoose.model("Imdb", ImdbSchema);
40 changes: 40 additions & 0 deletions api/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion api/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
"iso-639-1": "^2.1.8",
"joi": "^17.3.0",
"jsonwebtoken": "^8.5.1",
"line-reader": "^0.4.0",
"mongoose": "^5.12.1",
"multer": "^1.4.2",
"nodejs-nodemailer-outlook": "^1.2.3",
Expand All @@ -35,7 +36,8 @@
"saslprep": "^1.0.3",
"winston": "^3.3.3",
"xhr-request": "^1.1.0",
"xml-js": "^1.6.11"
"xml-js": "^1.6.11",
"zlib": "^1.0.5"
},
"devDependencies": {
"dotenv": "^8.2.0"
Expand Down
6 changes: 1 addition & 5 deletions api/tmdb/movie.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ const getConfig = require("../util/config");

const fanartLookup = require("../fanart");
const onServer = require("../plex/onServer");
const imdb = require("../meta/imdb");
const { lookup: imdb } = require("../meta/imdb");
const getLanguage = require("./languages");

const logger = require("../util/logger");
Expand Down Expand Up @@ -44,10 +44,6 @@ async function movieLookup(id, minified = false) {
movie.tile = findEnLogo(fanart.moviethumb);
}
}
if (minified) {
// Pre-fetch IMDB on minfied lookup but don't wait or return
imdb(movie.imdb_id);
}
try {
let collectionData = false;
let [
Expand Down
7 changes: 1 addition & 6 deletions api/tmdb/show.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ const axios = require("axios");
const getConfig = require("../util/config");
const fanartLookup = require("../fanart");
const onServer = require("../plex/onServer");
const imdb = require("../meta/imdb");
const { lookup: imdb } = require("../meta/imdb");
const getLanguage = require("./languages");

const logger = require("../util/logger");
Expand Down Expand Up @@ -39,11 +39,6 @@ async function showLookup(id, minified = false) {
return { error: "no id returned" };
}

if (minified) {
// Pre-fetch IMDB on minfied lookup but don't wait or return
imdb(external.imdb_id);
}

try {
let [
imdb_data,
Expand Down
4 changes: 4 additions & 0 deletions api/worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ const QuotaSystem = require("./requests/quotas");
const getConfig = require("./util/config");
const mongoose = require("mongoose");
const buildDiscovery = require("./discovery/build");
const { storeCache: imdbCache } = require("./meta/imdb");

class Worker {
async connnectDb() {
Expand All @@ -26,6 +27,7 @@ class Worker {
// return; // for debug local
try {
await this.connnectDb();
await imdbCache();
const libUpdate = new LibraryUpdate();
await libUpdate.scan();
buildDiscovery();
Expand Down Expand Up @@ -57,6 +59,7 @@ class Worker {
logger.log("verbose", `API: Registering Quota reset job`);
this.resetQuotas.start();
} catch (err) {
console.log(err);
logger.error("CRONW: Failed to start crons!");
}
}
Expand All @@ -72,6 +75,7 @@ class Worker {
break;
case 3:
new QuotaSystem().reset();
imdbCache();
default:
logger.log("warn", "CRONW: Invalid cron");
}
Expand Down

0 comments on commit 882605a

Please sign in to comment.