From 439babfda42edc539933a7cd63a7a9628742df72 Mon Sep 17 00:00:00 2001 From: kitswas <90329875+kitswas@users.noreply.github.com> Date: Tue, 25 Apr 2023 11:49:05 +0530 Subject: [PATCH] New top language algorithm implementation (#1732) * Reduced vercel maxDuration * Implemented new algorithm for Top Langs * Revert "Reduced vercel maxDuration" This reverts commit b0bc626efe12c738cf5005e7f11c7d2a07b6387a. * Added documentation * Fixed broken implementation * Update fetchTopLanguages.test.js Changed tests * Now uses the general formula The parameters p and q can be set by the user. * Updated tests and added new test * Added new test New test for order by repo count. * Updated documentation Added explanation and examples for new options. * Updated documentation This was overwritten in the merge commit. * docs: improve docs and fix tests * Renamed parameters Renamed `p` and `q` to `size_weight` and `count_weight`, respectively. * Updated the documentation Changes introduced in f2516d60a442dfdbb9e24ddda8743664bcb8064d --------- Co-authored-by: rickstaa --- api/top-langs.js | 4 +++ readme.md | 40 +++++++++++++++------ src/fetchers/top-languages-fetcher.js | 24 +++++++++++-- tests/fetchTopLanguages.test.js | 52 ++++++++++++++++++++++++--- 4 files changed, 104 insertions(+), 16 deletions(-) diff --git a/api/top-langs.js b/api/top-langs.js index e67d9533234417..cde0a9af08a93b 100644 --- a/api/top-langs.js +++ b/api/top-langs.js @@ -25,6 +25,8 @@ export default async (req, res) => { layout, langs_count, exclude_repo, + size_weight, + count_weight, custom_title, locale, border_radius, @@ -46,6 +48,8 @@ export default async (req, res) => { const topLangs = await fetchTopLanguages( username, parseArray(exclude_repo), + size_weight, + count_weight, ); const cacheSeconds = clampValue( diff --git a/readme.md b/readme.md index fe04a88e81bbab..76b46e0eaefb5c 100644 --- a/readme.md +++ b/readme.md @@ -310,6 +310,8 @@ You can provide multiple comma-separated values in the bg_color option to render - `custom_title` - Sets a custom title for the card _(string)_. Default `Most Used Languages`. - `disable_animations` - Disables all animations in the card _(boolean)_. Default: `false`. - `hide_progress` - It uses the compact layout option, hides percentages, and removes the bars. Default: `false`. +- `size_weight` - Configures language stats algorithm _(number)_ (see [Language stats algorithm](#Language-stats-algorithm)), defaults to 1. +- `count_weight` - Configures language stats algorithm _(number)_ (see [Language stats algorithm](#Language-stats-algorithm)), defaults to 0. > **Warning** > Language names should be URI-escaped, as specified in [Percent Encoding](https://en.wikipedia.org/wiki/Percent-encoding) @@ -359,7 +361,25 @@ Use [show_owner](#customization) variable to include the repo's owner username The top languages card shows a GitHub user's most frequently used top language. > **Note** -> Top Languages does not indicate my skill level or anything like that; it's a GitHub metric to determine which languages have the most code on GitHub. It is a new feature of github-readme-stats. +> Top Languages does not indicate the user's skill level or anything like that; it's a GitHub metric to determine which languages have the most code on GitHub. It is a new feature of github-readme-stats. + +### Language stats algorithm + +We use the following algorithm to calculate the languages percentages on the language card: + +```js +ranking_index = (byte_count ^ size_weight) * (repo_count ^ count_weight) +``` + +By default, only the byte count is used for determining the languages percentages shown on the language card (i.e. `size_weight=1` and `count_weight=0`). You can, however, use the `&size_weight=` and `&count_weight=` options to weight the language usage calculation. The values must be positive real numbers. [More details about the algorithm can be found here](https://github.com/anuraghazra/github-readme-stats/issues/1600#issuecomment-1046056305). + +- `&size_weight=1&count_weight=0` - _(default)_ Orders by byte count. +- `&size_weight=0.5&count_weight=0.5` - _(recommended)_ Uses both byte and repo count for ranking +- `&size_weight=0&count_weight=1` - Orders by repo count + +```md +[![Top Langs](https://github-readme-stats.vercel.app/api/top-langs/?username=anuraghazra&size_weight=0.5&count_weight=0.5)](https://github.com/anuraghazra/github-readme-stats) +``` ### Usage @@ -419,7 +439,7 @@ You can use the `&hide_progress=true` option to hide the percentages and the pro [![Top Langs](https://github-readme-stats.vercel.app/api/top-langs/?username=anuraghazra&layout=compact)](https://github.com/anuraghazra/github-readme-stats) -- Hidden progress bars +- Hidden progress bars [![Top Langs](https://github-readme-stats.vercel.app/api/top-langs/?username=anuraghazra&hide_progress=true)](https://github.com/anuraghazra/github-readme-stats) @@ -564,14 +584,14 @@ Since the GitHub API only allows 5k requests per hour, my `https://github-readme
:hammer_and_wrench: Step-by-step guide for deploying on other platforms -1. Fork or clone this repo as per your needs -2. Add `express` to the dependencies section of `package.json` -https://github.com/anuraghazra/github-readme-stats/blob/ba7c2f8b55eac8452e479c8bd38b044d204d0424/package.json#L54-L61 -3. Run `npm i` if needed (initial setup) -4. Run `node express.js` to start the server, or set the entry point to `express.js` in `package.json` if you're deploying on a managed service -https://github.com/anuraghazra/github-readme-stats/blob/ba7c2f8b55eac8452e479c8bd38b044d204d0424/package.json#L11 -5. You're done 🎉 -
+1. Fork or clone this repo as per your needs +2. Add `express` to the dependencies section of `package.json` + +3. Run `npm i` if needed (initial setup) +4. Run `node express.js` to start the server, or set the entry point to `express.js` in `package.json` if you're deploying on a managed service + +5. You're done 🎉 + ### Keep your fork up to date diff --git a/src/fetchers/top-languages-fetcher.js b/src/fetchers/top-languages-fetcher.js index 86d794435be088..45b2ba7d851837 100644 --- a/src/fetchers/top-languages-fetcher.js +++ b/src/fetchers/top-languages-fetcher.js @@ -54,7 +54,12 @@ const fetcher = (variables, token) => { * @param {string[]} exclude_repo List of repositories to exclude. * @returns {Promise} Top languages data. */ -const fetchTopLanguages = async (username, exclude_repo = []) => { +const fetchTopLanguages = async ( + username, + exclude_repo = [], + size_weight = 1, + count_weight = 0, +) => { if (!username) throw new MissingParamError(["username"]); const res = await retryer(fetcher, { login: username }); @@ -101,6 +106,8 @@ const fetchTopLanguages = async (username, exclude_repo = []) => { .sort((a, b) => b.size - a.size) .filter((name) => !repoToHide[name.name]); + let repoCount = 0; + repoNodes = repoNodes .filter((node) => node.languages.edges.length > 0) // flatten the list of language nodes @@ -111,9 +118,14 @@ const fetchTopLanguages = async (username, exclude_repo = []) => { // if we already have the language in the accumulator // & the current language name is same as previous name - // add the size to the language size. + // add the size to the language size and increase repoCount. if (acc[prev.node.name] && prev.node.name === acc[prev.node.name].name) { langSize = prev.size + acc[prev.node.name].size; + repoCount += 1; + } else { + // reset repoCount to 1 + // language must exist in at least one repo to be detected + repoCount = 1; } return { ...acc, @@ -121,10 +133,18 @@ const fetchTopLanguages = async (username, exclude_repo = []) => { name: prev.node.name, color: prev.node.color, size: langSize, + count: repoCount, }, }; }, {}); + Object.keys(repoNodes).forEach((name) => { + // comparison index calculation + repoNodes[name].size = + Math.pow(repoNodes[name].size, size_weight) * + Math.pow(repoNodes[name].count, count_weight); + }); + const topLangs = Object.keys(repoNodes) .sort((a, b) => repoNodes[b].size - repoNodes[a].size) .reduce((result, key) => { diff --git a/tests/fetchTopLanguages.test.js b/tests/fetchTopLanguages.test.js index 24416cd294525e..c3f558bf4236ff 100644 --- a/tests/fetchTopLanguages.test.js +++ b/tests/fetchTopLanguages.test.js @@ -60,20 +60,22 @@ const error = { }; describe("FetchTopLanguages", () => { - it("should fetch correct language data", async () => { + it("should fetch correct language data while using the new calculation", async () => { mock.onPost("https://api.github.com/graphql").reply(200, data_langs); - let repo = await fetchTopLanguages("anuraghazra"); + let repo = await fetchTopLanguages("anuraghazra", [], 0.5, 0.5); expect(repo).toStrictEqual({ HTML: { color: "#0f0", + count: 2, name: "HTML", - size: 200, + size: 20.000000000000004, }, javascript: { color: "#0ff", + count: 2, name: "javascript", - size: 200, + size: 20.000000000000004, }, }); }); @@ -85,17 +87,59 @@ describe("FetchTopLanguages", () => { expect(repo).toStrictEqual({ HTML: { color: "#0f0", + count: 1, name: "HTML", size: 100, }, javascript: { color: "#0ff", + count: 2, + name: "javascript", + size: 200, + }, + }); + }); + + it("should fetch correct language data while using the old calculation", async () => { + mock.onPost("https://api.github.com/graphql").reply(200, data_langs); + + let repo = await fetchTopLanguages("anuraghazra", [], 1, 0); + expect(repo).toStrictEqual({ + HTML: { + color: "#0f0", + count: 2, + name: "HTML", + size: 200, + }, + javascript: { + color: "#0ff", + count: 2, name: "javascript", size: 200, }, }); }); + it("should rank languages by the number of repositories they appear in", async () => { + mock.onPost("https://api.github.com/graphql").reply(200, data_langs); + + let repo = await fetchTopLanguages("anuraghazra", [], 0, 1); + expect(repo).toStrictEqual({ + HTML: { + color: "#0f0", + count: 2, + name: "HTML", + size: 2, + }, + javascript: { + color: "#0ff", + count: 2, + name: "javascript", + size: 2, + }, + }); + }); + it("should throw error", async () => { mock.onPost("https://api.github.com/graphql").reply(200, error);