From 35e4b2c3f7b722464d601d3b533384ea50bbb87e Mon Sep 17 00:00:00 2001 From: Sebastian Oliver Date: Mon, 31 Jul 2023 16:16:30 +0200 Subject: [PATCH 1/5] refactor: abstracting the creation of a vector store. --- src/common/model/createMemoryStore.test.ts | 72 ++++++++++++++++++++++ src/common/model/createMemoryStore.ts | 13 ++++ src/review/prompt/makeSlimmedFile.ts | 6 +- src/test/load/loadSnapshots.ts | 8 +-- 4 files changed, 90 insertions(+), 9 deletions(-) create mode 100644 src/common/model/createMemoryStore.test.ts create mode 100644 src/common/model/createMemoryStore.ts diff --git a/src/common/model/createMemoryStore.test.ts b/src/common/model/createMemoryStore.test.ts new file mode 100644 index 00000000..1e7563de --- /dev/null +++ b/src/common/model/createMemoryStore.test.ts @@ -0,0 +1,72 @@ +import { MemoryVectorStore } from 'langchain/vectorstores/memory'; +import { CreateMemoryStore } from './createMemoryStore'; +import { OpenAIEmbeddings } from 'langchain/embeddings/openai'; + +describe('CreateMemoryStore function', () => { + const initialFiles = [ + { + pageContent: '**LOGAF Level 1 - src/test/cases/.cache/faee919bf4f6a5b85a44b1a8eacc0ca24223d6c4033a2b4c52bc79bb8e1bc1bb.ts**\n' + + '\n' + + 'The code exposes a secret key which is a serious security issue. Never log sensitive information like API keys, passwords, or secrets. Consider using environment variables to store such sensitive information. For example:\n' + + '\n' + + '```typescript\n' + + 'const secretKey = process.env.SECRET_KEY;\n' + + '\n' + + 'function exposeSecret() {\n' + + ' console.log(`The secret key is: ${secretKey}`);\n' + + '}\n' + + '\n' + + 'exposeSecret();\n' + + '```\n' + + '\n' + + 'In this case, the secret key is stored in an environment variable named `SECRET_KEY`. Remember to add `SECRET_KEY` to your `.env` file and never commit the `.env` file to the repository.\n' + + '\n' + + '🔑❌🔒\n', + metadata: { + source: '/Users/sebo/Desktop/aleios/code-review-gpt/src/test/cases/snapshots/exposed-secret.md' + } + }, + { + pageContent: '**LOGAF Level 1 - src/test/cases/.cache/5519e4e1b45143a504ec259a5d911dea930372c19b3f56b51afab53f55339b56.ts**\n' + + '\n' + + 'The function `nestedLoops` has too many nested loops which can lead to performance issues and is hard to read and maintain. Consider refactoring the code to reduce the number of nested loops. If the logic of the code allows, you could use recursion or divide the task into smaller functions. Here is an example of how you could refactor this code using recursion:\n' + + '\n' + + '```\n' + + 'function recursiveLoop(depth, maxDepth, maxCount) {\n' + + ' if (depth === maxDepth) {\n' + + ' console.log(...arguments);\n' + + ' } else {\n' + + ' for (let i = 0; i < maxCount; i++) {\n' + + ' recursiveLoop(i, depth + 1, maxDepth, maxCount);\n' + + ' }\n' + + ' }\n' + + '}\n' + + '\n' + + 'recursiveLoop(0, 10, 10);\n' + + '```\n' + + '\n' + + 'This code does the same thing as the original code but is much easier to read and maintain.\n' + + '\n' + + '🔄🐌🔧\n', + metadata: { + source: '/Users/sebo/Desktop/aleios/code-review-gpt/src/test/cases/snapshots/too-many-nested-loops.md' + } + }, + ]; + it('Checks that the CreateMemoryStore function returns a MemoryVectorStore object', () => { + const result = CreateMemoryStore(initialFiles); + + expect(result).toBeInstanceOf(Promise); + }); + + it('Checks if the function provides the required functionality', async () => { + const result = await CreateMemoryStore(initialFiles); + + const expectedResult = await MemoryVectorStore.fromDocuments( + initialFiles, + new OpenAIEmbeddings(), + {} + ); + expect(result).toEqual(expectedResult); + }); +}); \ No newline at end of file diff --git a/src/common/model/createMemoryStore.ts b/src/common/model/createMemoryStore.ts new file mode 100644 index 00000000..a07ad5f5 --- /dev/null +++ b/src/common/model/createMemoryStore.ts @@ -0,0 +1,13 @@ +import { Document } from 'langchain/dist/document'; +import { OpenAIEmbeddings } from 'langchain/embeddings/openai'; +import { MemoryVectorStore } from 'langchain/vectorstores/memory'; + +export const CreateMemoryStore = async (initialFiles: Document>[]): Promise => { + const embeddingModel = new OpenAIEmbeddings(); + + return await MemoryVectorStore.fromDocuments( + initialFiles, + embeddingModel, + {} + ); +} \ No newline at end of file diff --git a/src/review/prompt/makeSlimmedFile.ts b/src/review/prompt/makeSlimmedFile.ts index bf978ee6..2a5b89ca 100644 --- a/src/review/prompt/makeSlimmedFile.ts +++ b/src/review/prompt/makeSlimmedFile.ts @@ -5,6 +5,7 @@ import { getChangedLines } from "./fileLines/getChangedLines"; import { getLanguageOfFile } from "./getLanguageOfFile"; import { slimmedContextPrompt } from "./prompts"; import { ReviewFile } from "./types"; +import { CreateMemoryStore } from '../../common/model/createMemoryStore'; export const makeSlimmedFile = async ( file: ReviewFile, @@ -40,10 +41,7 @@ export const makeSlimmedFile = async ( const doc = await splitter.createDocuments([changedLines]); // Generate memory store with the whole file - const fileEmbeddings = await MemoryVectorStore.fromDocuments( - doc, - new OpenAIEmbeddings() - ); + const fileEmbeddings = await CreateMemoryStore(doc); // Make a similarity search between the embeddings of the whole file // and the embeddings of the changed lines. diff --git a/src/test/load/loadSnapshots.ts b/src/test/load/loadSnapshots.ts index 1a736638..0a5ce693 100644 --- a/src/test/load/loadSnapshots.ts +++ b/src/test/load/loadSnapshots.ts @@ -3,6 +3,7 @@ import path from "path"; import { TextLoader } from "langchain/document_loaders/fs/text"; import { MemoryVectorStore } from "langchain/vectorstores/memory"; import { OpenAIEmbeddings } from "langchain/embeddings/openai"; +import { CreateMemoryStore } from '../../common/model/createMemoryStore'; /** * Load a snapshot for a test from a file. @@ -28,9 +29,6 @@ export const loadSnapshots = async (shapshotsDir: string) => { return loadSnapshot(path.join(shapshotsDir, snapshotFile)); }) ); - return MemoryVectorStore.fromDocuments( - snapshots.flat(), - new OpenAIEmbeddings(), - {} - ); + + return await CreateMemoryStore(snapshots.flat()); }; From ed8a777c33780929588dca3fe2e577b0245a915c Mon Sep 17 00:00:00 2001 From: Sebastian Oliver Date: Tue, 1 Aug 2023 13:50:51 +0200 Subject: [PATCH 2/5] chore: reviewed changes --- src/common/model/createMemoryStore.test.ts | 60 +++---------------- .../prompt/filesNames/getFileNames.test.ts | 2 + src/testFiles/initialFilesExample.ts | 50 ++++++++++++++++ 3 files changed, 61 insertions(+), 51 deletions(-) create mode 100644 src/testFiles/initialFilesExample.ts diff --git a/src/common/model/createMemoryStore.test.ts b/src/common/model/createMemoryStore.test.ts index 1e7563de..7148fbc0 100644 --- a/src/common/model/createMemoryStore.test.ts +++ b/src/common/model/createMemoryStore.test.ts @@ -1,58 +1,9 @@ import { MemoryVectorStore } from 'langchain/vectorstores/memory'; import { CreateMemoryStore } from './createMemoryStore'; import { OpenAIEmbeddings } from 'langchain/embeddings/openai'; +import { initialFiles } from '../../testFiles/initialFilesExample' describe('CreateMemoryStore function', () => { - const initialFiles = [ - { - pageContent: '**LOGAF Level 1 - src/test/cases/.cache/faee919bf4f6a5b85a44b1a8eacc0ca24223d6c4033a2b4c52bc79bb8e1bc1bb.ts**\n' + - '\n' + - 'The code exposes a secret key which is a serious security issue. Never log sensitive information like API keys, passwords, or secrets. Consider using environment variables to store such sensitive information. For example:\n' + - '\n' + - '```typescript\n' + - 'const secretKey = process.env.SECRET_KEY;\n' + - '\n' + - 'function exposeSecret() {\n' + - ' console.log(`The secret key is: ${secretKey}`);\n' + - '}\n' + - '\n' + - 'exposeSecret();\n' + - '```\n' + - '\n' + - 'In this case, the secret key is stored in an environment variable named `SECRET_KEY`. Remember to add `SECRET_KEY` to your `.env` file and never commit the `.env` file to the repository.\n' + - '\n' + - '🔑❌🔒\n', - metadata: { - source: '/Users/sebo/Desktop/aleios/code-review-gpt/src/test/cases/snapshots/exposed-secret.md' - } - }, - { - pageContent: '**LOGAF Level 1 - src/test/cases/.cache/5519e4e1b45143a504ec259a5d911dea930372c19b3f56b51afab53f55339b56.ts**\n' + - '\n' + - 'The function `nestedLoops` has too many nested loops which can lead to performance issues and is hard to read and maintain. Consider refactoring the code to reduce the number of nested loops. If the logic of the code allows, you could use recursion or divide the task into smaller functions. Here is an example of how you could refactor this code using recursion:\n' + - '\n' + - '```\n' + - 'function recursiveLoop(depth, maxDepth, maxCount) {\n' + - ' if (depth === maxDepth) {\n' + - ' console.log(...arguments);\n' + - ' } else {\n' + - ' for (let i = 0; i < maxCount; i++) {\n' + - ' recursiveLoop(i, depth + 1, maxDepth, maxCount);\n' + - ' }\n' + - ' }\n' + - '}\n' + - '\n' + - 'recursiveLoop(0, 10, 10);\n' + - '```\n' + - '\n' + - 'This code does the same thing as the original code but is much easier to read and maintain.\n' + - '\n' + - '🔄🐌🔧\n', - metadata: { - source: '/Users/sebo/Desktop/aleios/code-review-gpt/src/test/cases/snapshots/too-many-nested-loops.md' - } - }, - ]; it('Checks that the CreateMemoryStore function returns a MemoryVectorStore object', () => { const result = CreateMemoryStore(initialFiles); @@ -61,12 +12,19 @@ describe('CreateMemoryStore function', () => { it('Checks if the function provides the required functionality', async () => { const result = await CreateMemoryStore(initialFiles); - const expectedResult = await MemoryVectorStore.fromDocuments( initialFiles, new OpenAIEmbeddings(), {} ); + expect(result).toEqual(expectedResult); }); + + it("Checks if the MemoryVectorStore returned returns a number in a similarity search", async () => { + const result = await CreateMemoryStore(initialFiles); + const ssWithScore = await result.similaritySearchWithScore("hello", 1); + + expect(typeof ssWithScore[0][1] === 'number').toBe(true); + }); }); \ No newline at end of file diff --git a/src/review/prompt/filesNames/getFileNames.test.ts b/src/review/prompt/filesNames/getFileNames.test.ts index 097da9c5..01af62aa 100644 --- a/src/review/prompt/filesNames/getFileNames.test.ts +++ b/src/review/prompt/filesNames/getFileNames.test.ts @@ -23,8 +23,10 @@ describe("getFileNames unit test", () => { mockedGetFileNamesFromGit; const result = await getFileNames(false); + console.log("result -> ", result); expect(result).toEqual([ + join(__dirname, "../../../testFiles", "initialFilesExample.ts"), join(__dirname, "../../../testFiles", "longFile.tsx"), ]); }); diff --git a/src/testFiles/initialFilesExample.ts b/src/testFiles/initialFilesExample.ts new file mode 100644 index 00000000..786f9dc8 --- /dev/null +++ b/src/testFiles/initialFilesExample.ts @@ -0,0 +1,50 @@ +export const initialFiles = [ + { + pageContent: '**LOGAF Level 1 - src/test/cases/.cache/faee919bf4f6a5b85a44b1a8eacc0ca24223d6c4033a2b4c52bc79bb8e1bc1bb.ts**\n' + + '\n' + + 'The code exposes a secret key which is a serious security issue. Never log sensitive information like API keys, passwords, or secrets. Consider using environment variables to store such sensitive information. For example:\n' + + '\n' + + '```typescript\n' + + 'const secretKey = process.env.SECRET_KEY;\n' + + '\n' + + 'function exposeSecret() {\n' + + ' console.log(`The secret key is: ${secretKey}`);\n' + + '}\n' + + '\n' + + 'exposeSecret();\n' + + '```\n' + + '\n' + + 'In this case, the secret key is stored in an environment variable named `SECRET_KEY`. Remember to add `SECRET_KEY` to your `.env` file and never commit the `.env` file to the repository.\n' + + '\n' + + '🔑❌🔒\n', + metadata: { + source: '/Users/sebo/Desktop/aleios/code-review-gpt/src/test/cases/snapshots/exposed-secret.md' + } + }, + { + pageContent: '**LOGAF Level 1 - src/test/cases/.cache/5519e4e1b45143a504ec259a5d911dea930372c19b3f56b51afab53f55339b56.ts**\n' + + '\n' + + 'The function `nestedLoops` has too many nested loops which can lead to performance issues and is hard to read and maintain. Consider refactoring the code to reduce the number of nested loops. If the logic of the code allows, you could use recursion or divide the task into smaller functions. Here is an example of how you could refactor this code using recursion:\n' + + '\n' + + '```\n' + + 'function recursiveLoop(depth, maxDepth, maxCount) {\n' + + ' if (depth === maxDepth) {\n' + + ' console.log(...arguments);\n' + + ' } else {\n' + + ' for (let i = 0; i < maxCount; i++) {\n' + + ' recursiveLoop(i, depth + 1, maxDepth, maxCount);\n' + + ' }\n' + + ' }\n' + + '}\n' + + '\n' + + 'recursiveLoop(0, 10, 10);\n' + + '```\n' + + '\n' + + 'This code does the same thing as the original code but is much easier to read and maintain.\n' + + '\n' + + '🔄🐌🔧\n', + metadata: { + source: '/Users/sebo/Desktop/aleios/code-review-gpt/src/test/cases/snapshots/too-many-nested-loops.md' + } + }, +]; \ No newline at end of file From dbeb90219a3266fa85aa0e0f7dd07dc0c08558b1 Mon Sep 17 00:00:00 2001 From: Sebastian Oliver Date: Tue, 1 Aug 2023 13:52:21 +0200 Subject: [PATCH 3/5] chore: remove unused imports --- src/review/prompt/makeSlimmedFile.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/review/prompt/makeSlimmedFile.ts b/src/review/prompt/makeSlimmedFile.ts index 2a5b89ca..d819a2e1 100644 --- a/src/review/prompt/makeSlimmedFile.ts +++ b/src/review/prompt/makeSlimmedFile.ts @@ -1,6 +1,4 @@ -import { OpenAIEmbeddings } from "langchain/embeddings/openai"; import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"; -import { MemoryVectorStore } from "langchain/vectorstores/memory"; import { getChangedLines } from "./fileLines/getChangedLines"; import { getLanguageOfFile } from "./getLanguageOfFile"; import { slimmedContextPrompt } from "./prompts"; From 958e518b6d42ac88ef43bc0edb56de6987e21274 Mon Sep 17 00:00:00 2001 From: Sebastian Oliver Date: Tue, 1 Aug 2023 17:59:13 +0200 Subject: [PATCH 4/5] chore: test fix --- src/common/model/createMemoryStore.test.ts | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/common/model/createMemoryStore.test.ts b/src/common/model/createMemoryStore.test.ts index 7148fbc0..d1be1f39 100644 --- a/src/common/model/createMemoryStore.test.ts +++ b/src/common/model/createMemoryStore.test.ts @@ -11,14 +11,12 @@ describe('CreateMemoryStore function', () => { }); it('Checks if the function provides the required functionality', async () => { - const result = await CreateMemoryStore(initialFiles); - const expectedResult = await MemoryVectorStore.fromDocuments( - initialFiles, - new OpenAIEmbeddings(), - {} - ); - - expect(result).toEqual(expectedResult); + const [result, expectedResult] = await Promise.all([ + CreateMemoryStore(initialFiles), + MemoryVectorStore.fromDocuments(initialFiles, new OpenAIEmbeddings(), {}), + ]) + + expect(result.memoryVectors.length).toEqual(expectedResult.memoryVectors.length); }); it("Checks if the MemoryVectorStore returned returns a number in a similarity search", async () => { From 924530773fb1bbbce43ab00a2325899f8b422dc3 Mon Sep 17 00:00:00 2001 From: Sebastian Oliver Date: Wed, 2 Aug 2023 09:53:25 +0200 Subject: [PATCH 5/5] chore: including new test file in filterFile test --- src/review/prompt/filterFiles/filterFiles.test.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/review/prompt/filterFiles/filterFiles.test.ts b/src/review/prompt/filterFiles/filterFiles.test.ts index 12372f06..16e6632d 100644 --- a/src/review/prompt/filterFiles/filterFiles.test.ts +++ b/src/review/prompt/filterFiles/filterFiles.test.ts @@ -28,10 +28,10 @@ describe("filterFiles unit test", () => { ); const result = await filterFiles(testFiles); + const filesRegex = new RegExp(`(src/testFiles/longFile.tsx|src/testFiles/initialFilesExample.ts)`, "i"); - expect(result.length).toEqual(1); - expect(result[0].fileName).toBe( - join(__dirname, "../../../testFiles", "longFile.tsx") - ); + expect(result.length).toEqual(2); + expect(result[0].fileName).toMatch(filesRegex); + expect(result[1].fileName).toMatch(filesRegex); }); });