Skip to content

Commit

Permalink
Merge pull request #63 from UTDNebula/rmp-scraper
Browse files Browse the repository at this point in the history
Update RMP Scraper from Trends
  • Loading branch information
TyHil authored Oct 24, 2024
2 parents 7e17ad1 + a5b38e2 commit c6223de
Show file tree
Hide file tree
Showing 5 changed files with 126 additions and 156 deletions.
15 changes: 12 additions & 3 deletions src/data/builder.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { CourseHeader } from '~content';
import { requestProfessorFromRmp } from '~data/fetchFromRmp';

import { SCHOOL_ID } from './config';
import { SCHOOL_ID, SCHOOL_NAME } from './config';
import { fetchNebulaGrades, fetchNebulaProfessor } from './fetch';

export interface ProfessorProfileInterface {
Expand Down Expand Up @@ -43,9 +43,18 @@ export async function buildProfessorProfile(
);

const rmpsPromise = requestProfessorFromRmp({
professorName: professorSplit.profFirst + ' ' + professorSplit.profLast,
profFirst: professorSplit.profFirst,
profLast: professorSplit.profLast,
schoolId: SCHOOL_ID,
}).then((result) => (rmp = result));
schoolName: SCHOOL_NAME,
})
.then((result) => {
console.log(result);
if (result.message === 'success') {
rmp = result.data;
}
})
.catch((error) => console.error(error.message));

await Promise.all([
nebulaProfessorsPromise,
Expand Down
51 changes: 47 additions & 4 deletions src/data/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,58 @@ export const HEADERS = {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36',
'Content-Type': 'application/json',
Referer: 'https://www.ratemyprofessors.com/',
};

export const PROFESSOR_QUERY = {
query:
'query RatingsListQuery($id: ID!) {node(id: $id) {... on Teacher {legacyId school {id} courseCodes {courseName courseCount} firstName lastName numRatings avgDifficulty avgRating department wouldTakeAgainPercent teacherRatingTags { tagCount tagName } ratingsDistribution { total r1 r2 r3 r4 r5 } }}}',
variables: {},
export const PROFESSOR_SEARCH_QUERY = {
query: `
query TeacherSearchQuery($query: TeacherSearchQuery!) {
newSearch {
teachers(query: $query) {
edges {
node {
id
legacyId
firstName
lastName
school {
id
name
}
department
avgRating
numRatings
avgDifficulty
wouldTakeAgainPercent
teacherRatingTags {
tagName
tagCount
}
ratingsDistribution {
total
r1
r2
r3
r4
r5
}
}
}
}
}
}
`,
variables: {
query: {
text: '',
schoolID: '',
},
},
};

export const SCHOOL_ID = '1273';
export const SCHOOL_NAME = 'The University of Texas at Dallas';

export const RMP_GRAPHQL_URL = 'https://www.ratemyprofessors.com/graphql';

export const neededOrigins = [
Expand Down
5 changes: 3 additions & 2 deletions src/data/fetch.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import fetchWithCache, {
cacheIndexGrades,
cacheIndexProfessor,
expireTime,
} from '~data/fetchWithCache';

interface FetchProfessorParameters {
Expand All @@ -23,7 +24,7 @@ export async function fetchNebulaProfessor(
},
},
cacheIndexProfessor,
2629800000,
expireTime,
)
.then((data) => {
if (data.message !== 'success') {
Expand Down Expand Up @@ -52,7 +53,7 @@ export async function fetchNebulaGrades(
},
},
cacheIndexGrades,
2629800000,
expireTime,
)
.then((data) => {
if (data.message !== 'success') {
Expand Down
207 changes: 61 additions & 146 deletions src/data/fetchFromRmp.ts
Original file line number Diff line number Diff line change
@@ -1,161 +1,76 @@
import { HEADERS, PROFESSOR_QUERY, RMP_GRAPHQL_URL } from '~data/config';
import fetchWithCache, { cacheIndexRmp } from '~data/fetchWithCache';

function reportError(context, err) {
console.error('Error in ' + context + ': ' + err);
}

function getProfessorUrl(professorName: string, schoolId: string): string {
const url = new URL(
'https://www.ratemyprofessors.com/search/professors/' + schoolId + '?',
); //UTD
url.searchParams.append('q', professorName);
return url.href;
}

function getProfessorId(text: string, professorName: string): string {
const lowerCaseProfessorName = professorName.toLowerCase();

let pendingMatch = null;
const regex =
/"legacyId":(\d+).*?"numRatings":(\d+).*?"firstName":"(.*?)","lastName":"(.*?)"/g;
const allMatches: string[] = text.match(regex);
const highestNumRatings = 0;

if (allMatches) {
for (const fullMatch of allMatches) {
for (const match of fullMatch.matchAll(regex)) {
console.log(
match[3].split(' ')[0].toLowerCase() +
' ' +
match[4].toLowerCase() +
' ',
);
const numRatings = parseInt(match[2]);
if (
lowerCaseProfessorName.includes(
match[3].split(' ')[0].toLowerCase() + ' ' + match[4].toLowerCase(),
) &&
numRatings >= highestNumRatings
) {
pendingMatch = match[1];
}
}
}
}

return pendingMatch;
}

function getGraphQlUrlProp(professorId: string) {
HEADERS['Referer'] =
`https://www.ratemyprofessors.com/ShowRatings.jsp?tid=${professorId}`;
PROFESSOR_QUERY['variables']['id'] = btoa(`Teacher-${professorId}`);
import { HEADERS, PROFESSOR_SEARCH_QUERY, RMP_GRAPHQL_URL } from '~data/config';
import fetchWithCache, {
cacheIndexRmp,
expireTime,
} from '~data/fetchWithCache';

function getGraphQlUrlProp(name: string, schoolID: string) {
PROFESSOR_SEARCH_QUERY.variables.query.text = name;
PROFESSOR_SEARCH_QUERY.variables.query.schoolID = btoa('School-' + schoolID);
return {
method: 'POST',
headers: HEADERS,
body: JSON.stringify(PROFESSOR_QUERY),
body: JSON.stringify(PROFESSOR_SEARCH_QUERY),
};
}

function wait(delay) {
return new Promise((resolve) => setTimeout(resolve, delay));
}

function fetchRetry(url: string, delay: number, tries: number, fetchOptions) {
function onError(err) {
const triesLeft: number = tries - 1;
if (!triesLeft) {
throw err;
}
return wait(delay).then(() =>
fetchRetry(url, delay, triesLeft, fetchOptions),
);
}
return fetchWithCache(url, fetchOptions, cacheIndexRmp, 2629800000).catch(
onError,
);
}

async function validateResponse(response, fetchOptions) {
const notOk = response?.status !== 200;
if (notOk && response && response.url) {
const details = {
status: response.status,
statusText: response.statusText,
redirected: response.redirected,
url: response.url,
};
reportError(
'validateResponse',
'Status not OK for fetch request. Details are: ' +
JSON.stringify(details),
);
// If we don't have fetch options, we just use an empty object.
response = await fetchRetry(response?.url, 200, 3, fetchOptions || {});
}
return response;
}

function fetchWithGraphQl(graphQlUrlProp, resolve) {
try {
fetchWithCache(
RMP_GRAPHQL_URL,
graphQlUrlProp,
cacheIndexRmp,
2629800000,
).then((response) =>
validateResponse(response, graphQlUrlProp).then((rating) => {
if (
rating != null &&
Object.hasOwn(rating, 'data') &&
Object.hasOwn(rating['data'], 'node')
) {
rating = rating['data']['node'];
}
resolve(rating);
}),
);
} catch (err) {
reportError('fetchWithGraphQl', err);
resolve(null); ///
}
}

export interface RmpRequest {
professorName: string;
profFirst: string;
profLast: string;
schoolId: string;
schoolName: string;
}
export function requestProfessorFromRmp(
request: RmpRequest,
): Promise<RMPInterface> {
export function requestProfessorFromRmp({
profFirst,
profLast,
schoolId,
schoolName,
}: RmpRequest): Promise<RMPInterface> {
profFirst = profFirst.split(' ')[0];
const name = profFirst + ' ' + profLast;
// create fetch object for professor
const graphQlUrlProp = getGraphQlUrlProp(name, schoolId);
return new Promise((resolve, reject) => {
// url for promises
const professorUrl = getProfessorUrl(
request.professorName,
request.schoolId,
);

// fetch professor id from url
fetchWithCache(
professorUrl,
{ method: 'GET' },
cacheIndexRmp,
2629800000,
true,
)
.then((text) => {
const professorId = getProfessorId(text, request.professorName);

// create fetch object for professor id
const graphQlUrlProp = getGraphQlUrlProp(professorId);

// fetch professor info by id with graphQL
fetchWithGraphQl(graphQlUrlProp, resolve);
// fetch professor info by name with graphQL
fetchWithCache(RMP_GRAPHQL_URL, graphQlUrlProp, cacheIndexRmp, expireTime)
.then((response) => {
if (
response == null ||
!Object.hasOwn(response, 'data') ||
!Object.hasOwn(response.data, 'newSearch') ||
!Object.hasOwn(response.data.newSearch, 'teachers') ||
!Object.hasOwn(response.data.newSearch.teachers, 'edges')
) {
reject({ message: 'Data for professor not found' });
return;
}
//Remove profs not at UTD and with bad name match
const professors = response.data.newSearch.teachers.edges.filter(
(prof: { node: RMPInterface }) =>
prof.node.school.name === schoolName &&
prof.node.firstName.includes(profFirst) &&
prof.node.lastName.includes(profLast),
);
if (professors.length === 0) {
reject({ message: 'Data for professor not found' });
return;
}
//Pick prof instance with most ratings
let maxRatingsProfessor = professors[0];
for (let i = 1; i < professors.length; i++) {
if (
professors[i].node.numRatings > maxRatingsProfessor.node.numRatings
) {
maxRatingsProfessor = professors[i];
}
}
resolve({
message: 'success',
data: maxRatingsProfessor.node,
});
})
.catch((error) => {
reportError('requestProfessorFromRmp', error);
reject(error);
reject({ message: error.message });
});
});
}
Expand Down
4 changes: 3 additions & 1 deletion src/data/fetchWithCache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ const storage = new Storage({
//Increment these to reset cache on next deployment
export const cacheIndexProfessor = 0;
export const cacheIndexGrades = 0;
export const cacheIndexRmp = 0;
export const cacheIndexRmp = 1;

export const expireTime = 604800; //1 week

function getCache(key: string, cacheIndex: number) {
return new Promise((resolve, reject) => {
Expand Down

0 comments on commit c6223de

Please sign in to comment.