-
Notifications
You must be signed in to change notification settings - Fork 0
/
scraper.js
131 lines (109 loc) · 4.46 KB
/
scraper.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
const cheerio = require('cheerio');
const fetch = require('node-fetch');
const _ = require('lodash');
const { map, reduce, curry, compose } = require('lodash/fp');
/**
* Given a Cheerio Element and a guid string, return the text value
* of the child element whose value attribute matches the guid.
*
* @param {Cheerio Element} container The Cheerio Element to look up category guids in.
* @param {String} guid The category guid to look up the name for.
*/
function guidToText(container, guid) {
return container.children(`[value="${guid}"]`).text();
}
/**
* Consumes a Cheerio Element and returns a function that
* consumes a category guid and returns the text value associated with it.
*/
const getStatNameFromDropdown = compose(curry(guidToText), cheerio);
/**
* Given a container element, grab all top hero stats data from its children.
* @param {Cheerio Element} playtypeContainer Can either be the quickplay or competitive container.
*/
function getTopHeroStats(playtypeContainer) {
const topHeroNodes = playtypeContainer.find("section.career-section .progress-category[data-group-id=comparisons]");
const getTopHeroCategoryName = getStatNameFromDropdown(
'select[data-group-id=comparisons]',
playtypeContainer
);
const heroNodeToObject = compose(
heroNode => ({
hero: heroNode('.title').text(),
value: heroNode('.description').text()
}),
cheerio.load.bind(cheerio)
);
function buildTopHeroesObject(topHeroes, topHeroNode) {
const node = cheerio.load(topHeroNode);
const heroes = node('.progress-category-item');
const statCategory = getTopHeroCategoryName(topHeroNode.attribs['data-category-id']);
topHeroes[statCategory] = map(heroNodeToObject)(heroes);
return topHeroes;
}
return reduce(buildTopHeroesObject)({})(topHeroNodes);
}
/**
* Given a container element, grab all career stats data from its children.
* @param {Cheerio Element} playtypeContainer
*/
function getCareerStats(playtypeContainer) {
const statsNodes = playtypeContainer.find("section.career-section div[data-group-id=stats]");
const getStatsCategoryName = getStatNameFromDropdown(
'select[data-group-id=stats]',
playtypeContainer
);
const makeHeroStatsPair = compose(
row => [row.find('td:nth-child(1)').text(), row.find('td:nth-child(2)').text()],
cheerio
);
function buildCareerStatsObject(careerStats, statsNode) {
const heroStatsNode = cheerio(statsNode);
const statsRows = heroStatsNode.find('tbody > tr').toArray();
const heroStatsPairs = map(makeHeroStatsPair)(statsRows);
const category = getStatsCategoryName(heroStatsNode.attr('data-category-id'));
careerStats[category] = _.fromPairs(heroStatsPairs);
return careerStats;
}
return reduce(buildCareerStatsObject)({})(statsNodes);
}
/**
* Consumes a Promise that resolves to an HTML string.
*
* Extracts Overwatch Player stats and information from the documents and
* returns those stats as an object.
*
* @param {Promise} document A Promise that resolves to an HTML string.
*/
function getStatsFromDocument(document) {
// Always make document asynchronous.
document = Promise.resolve(document);
return document.then(doc => {
const parsed = {};
const $ = cheerio.load(doc);
const gamesWonText = $(".masthead-detail span").text().match(/\d+/);
const playerMasthead = $(".masthead-player");
const quickplayContainer = $("#quickplay");
const competitiveContainer = $("#competitive");
parsed.hero = playerMasthead.children(".header-masthead").text();
parsed.image = playerMasthead.children(".player-portrait").prop('src');
parsed.games_won = parseInt(gamesWonText, 10);
parsed.quickplay = {
top_heroes: getTopHeroStats(quickplayContainer),
career_stats: getCareerStats(quickplayContainer)
}
parsed.competitive = {
top_heroes: getTopHeroStats(competitiveContainer),
career_stats: getCareerStats(competitiveContainer)
}
return parsed;
});
}
function getPage(url) {
return fetch(url).then(res => res.text());
}
function buildUrl(battletag, region, platform) {
battletag = battletag.replace("#", "-");
return `https://playoverwatch.com/${region}/career/${platform}/${battletag}`;
}
module.exports = compose(getStatsFromDocument, getPage, buildUrl);