Skip to content

Commit

Permalink
Merge pull request #136 from olasunkanmi-SE/RAG
Browse files Browse the repository at this point in the history
Change embedding model from Xenova/all-MiniLM-L6-v2 to text-embedding…
olasunkanmi-SE authored Jan 22, 2025

Verified

This commit was signed with the committer’s verified signature.
LogFlames Elias Lundell
2 parents bc53b3a + 35ea6d5 commit 4084bd9
Showing 14 changed files with 463 additions and 360 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -7,4 +7,5 @@ node_modules
.sentryclirc
config.ts
patterns
webviewUi/dist
webviewUi/dist
samples
2 changes: 1 addition & 1 deletion src/application/constant.ts
Original file line number Diff line number Diff line change
@@ -71,6 +71,6 @@ export const EmbeddingsConfig = {
maxRetries: 3,
retryDelay: 1000,
rateLimit: 1500,
embeddingModel: "Xenova/all-MiniLM-L6-v2",
embeddingModel: "text-embedding-004",
textModel: "gemini-1.5-flash",
};
10 changes: 4 additions & 6 deletions src/application/interfaces/code.repository.interface.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import { ResultSet, Row } from "@libsql/client/.";
import { ResultSet, Row } from "@libsql/client";

export interface ICodeRepository {
CreateTable(values: string): Promise<ResultSet[] | undefined>;
searchSimilarFunctions(
queryEmbeddings: number[],
limit: number,
): Promise<Row[] | undefined>;
createFunctionsTable(): Promise<ResultSet | undefined>;
insertFunctions(values: string): Promise<ResultSet | undefined>;
searchSimilarFunctions(queryEmbeddings: number[], limit: number): Promise<Row[] | undefined>;
}
15 changes: 4 additions & 11 deletions src/application/utils.ts
Original file line number Diff line number Diff line change
@@ -14,9 +14,7 @@ export const formatText = (text?: string): string => {
return "";
};

export const getConfigValue: GetConfigValueType<any> = <T>(
key: string,
): T | undefined => {
export const getConfigValue: GetConfigValueType<any> = <T>(key: string): T | undefined => {
return vscode.workspace.getConfiguration().get<T>(key);
};

@@ -68,11 +66,7 @@ export const getGenerativeAiModel = (): string | undefined => {
return getConfigValue("generativeAi.option");
};

export function getUri(
webview: vscode.Webview,
extensionUri: vscode.Uri,
pathList: string[],
) {
export function getUri(webview: vscode.Webview, extensionUri: vscode.Uri, pathList: string[]) {
return webview.asWebviewUri(vscode.Uri.joinPath(extensionUri, ...pathList));
}

@@ -81,8 +75,7 @@ export function getUri(
// and ensure script integrity when using Content Security Policy (CSP)
export const getNonce = () => {
let text = "";
const possible =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
const possible = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
for (let i = 0; i < 32; i++) {
text += possible.charAt(Math.floor(Math.random() * possible.length));
}
@@ -95,5 +88,5 @@ export const handleError = (error: unknown, message?: string): void => {
};

export const showInfoMessage = (message?: string): void => {
vscode.window.showErrorMessage(`${message}`);
vscode.window.showInformationMessage(`${message}`);
};
137 changes: 40 additions & 97 deletions src/extension.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
import * as vscode from "vscode";
import {
APP_CONFIG,
generativeAiModels,
OLA_ACTIONS,
USER_MESSAGE,
} from "./application/constant";
import { APP_CONFIG, generativeAiModels, OLA_ACTIONS, USER_MESSAGE } from "./application/constant";
import { getConfigValue } from "./application/utils";
import { Comments } from "./events/comment";
import { ExplainCode } from "./events/explain";
@@ -22,37 +17,33 @@ import { AnthropicWebViewProvider } from "./providers/anthropic-web-view-provide
import { CodeActionsProvider } from "./providers/code-actions-provider";
import { GeminiWebViewProvider } from "./providers/gemini-web-view-provider";
import { GroqWebViewProvider } from "./providers/groq-web-view-provider";
import { CodeIndexingService } from "./services/code-indexing-service";
import { FileUploader } from "./services/file-uploader";
import { setUpGenerativeAiModel } from "./services/generative-ai-model-manager";
import { Brain } from "./services/memory";
import { TypeScriptAtsMapper } from "./services/typescript-ats.service";
import { CodeStructureMapper } from "./services/code-structure.mapper.service";
import { dbManager } from "./infrastructure/repository/data-base-manager";

const {
geminiKey,
geminiModel,
groqApiKey,
groqModel,
anthropicApiKey,
anthropicModel,
grokApiKey,
grokModel,
} = APP_CONFIG;
const { geminiKey, geminiModel, groqApiKey, groqModel, anthropicApiKey, anthropicModel, grokApiKey, grokModel } =
APP_CONFIG;

const connectDB = async () => {
await dbManager.connect("file:/Users/olasunkanmi/Documents/Github/codebuddy/patterns/dev.db");
};

export async function activate(context: vscode.ExtensionContext) {
try {
Brain.getInstance();
await connectDB();
const fileUpload = new FileUploader(context);
await fileUpload.createFile("allx.db");

const getKnowledgeBase = async () => {
const codeMapper = new TypeScriptAtsMapper();
const mappedCode = await codeMapper.buildCodebaseMap();
const ats = Object.values(mappedCode).flatMap((repo) =>
Object.values(repo.modules)
);
const mapper = new CodeStructureMapper(ats);
return mapper.normalizeData();
};
getKnowledgeBase();
const files = await fileUpload.getFiles();
const names = await fileUpload.getFileNames();
console.log(files, names);

const index = CodeIndexingService.createInstance();
const result = index.insertFunctionsinDB();
console.log(result);
const {
comment,
review,
@@ -68,52 +59,19 @@ export async function activate(context: vscode.ExtensionContext) {
generateCodeChart,
inlineChat,
} = OLA_ACTIONS;
const getComment = new Comments(
`${USER_MESSAGE} generates the code comments...`,
context
);
const getInLineChat = new InLineChat(
`${USER_MESSAGE} generates a response...`,
context
);
const generateOptimizeCode = new OptimizeCode(
`${USER_MESSAGE} optimizes the code...`,
context
);
const generateRefactoredCode = new RefactorCode(
`${USER_MESSAGE} refactors the code...`,
context
);
const explainCode = new ExplainCode(
`${USER_MESSAGE} explains the code...`,
context
);
const generateReview = new ReviewCode(
`${USER_MESSAGE} reviews the code...`,
context
);
const codeChartGenerator = new CodeChartGenerator(
`${USER_MESSAGE} creates the code chart...`,
context
);
const codePattern = new FileUploader(context);
const knowledgeBase = new ReadFromKnowledgeBase(
`${USER_MESSAGE} generate your code pattern...`,
context
);
const generateCommitMessage = new GenerateCommitMessage(
`${USER_MESSAGE} generates a commit message...`,
context
);
const generateInterviewQuestions = new InterviewMe(
`${USER_MESSAGE} generates interview questions...`,
context
);
const getComment = new Comments(`${USER_MESSAGE} generates the code comments...`, context);
const getInLineChat = new InLineChat(`${USER_MESSAGE} generates a response...`, context);
const generateOptimizeCode = new OptimizeCode(`${USER_MESSAGE} optimizes the code...`, context);
const generateRefactoredCode = new RefactorCode(`${USER_MESSAGE} refactors the code...`, context);
const explainCode = new ExplainCode(`${USER_MESSAGE} explains the code...`, context);
const generateReview = new ReviewCode(`${USER_MESSAGE} reviews the code...`, context);
const codeChartGenerator = new CodeChartGenerator(`${USER_MESSAGE} creates the code chart...`, context);
const codePattern = fileUpload;
const knowledgeBase = new ReadFromKnowledgeBase(`${USER_MESSAGE} generate your code pattern...`, context);
const generateCommitMessage = new GenerateCommitMessage(`${USER_MESSAGE} generates a commit message...`, context);
const generateInterviewQuestions = new InterviewMe(`${USER_MESSAGE} generates interview questions...`, context);

const generateUnitTests = new GenerateUnitTest(
`${USER_MESSAGE} generates unit tests...`,
context
);
const generateUnitTests = new GenerateUnitTest(`${USER_MESSAGE} generates unit tests...`, context);

const actionMap = {
[comment]: () => getComment.execute(),
@@ -123,11 +81,7 @@ export async function activate(context: vscode.ExtensionContext) {
[interviewMe]: () => generateInterviewQuestions.execute(),
[generateUnitTest]: () => generateUnitTests.execute(),
[fix]: (errorMessage: string) =>
new FixError(
`${USER_MESSAGE} finds a solution to the error...`,
context,
errorMessage
).execute(errorMessage),
new FixError(`${USER_MESSAGE} finds a solution to the error...`, context, errorMessage).execute(errorMessage),
[explain]: () => explainCode.execute(),
[pattern]: () => codePattern.uploadFileHandler(),
[knowledge]: () => knowledgeBase.execute(),
@@ -136,18 +90,17 @@ export async function activate(context: vscode.ExtensionContext) {
[inlineChat]: () => getInLineChat.execute(),
};

const subscriptions: vscode.Disposable[] = Object.entries(actionMap).map(
([action, handler]) => vscode.commands.registerCommand(action, handler)
const subscriptions: vscode.Disposable[] = Object.entries(actionMap).map(([action, handler]) =>
vscode.commands.registerCommand(action, handler)
);

const selectedGenerativeAiModel = getConfigValue("generativeAi.option");

const quickFix = new CodeActionsProvider();
const quickFixCodeAction: vscode.Disposable =
vscode.languages.registerCodeActionsProvider(
{ scheme: "file", language: "*" },
quickFix
);
const quickFixCodeAction: vscode.Disposable = vscode.languages.registerCodeActionsProvider(
{ scheme: "file", language: "*" },
quickFix
);

const modelConfigurations: {
[key: string]: {
@@ -180,25 +133,15 @@ export async function activate(context: vscode.ExtensionContext) {
if (selectedGenerativeAiModel in modelConfigurations) {
const modelConfig = modelConfigurations[selectedGenerativeAiModel];
const { key, model, webviewProviderClass } = modelConfig;
setUpGenerativeAiModel(
context,
model,
key,
webviewProviderClass,
subscriptions,
quickFixCodeAction
);
setUpGenerativeAiModel(context, model, key, webviewProviderClass, subscriptions, quickFixCodeAction);
}
} catch (error) {
Brain.clear();
vscode.window.showErrorMessage(
"An Error occured while setting up generative AI model"
);
vscode.window.showErrorMessage("An Error occured while setting up generative AI model");
console.log(error);
}
}

export function deactivate(context: vscode.ExtensionContext) {
//TODO once the application is rewritten in React, delete the pattern file on deactivate
context.subscriptions.forEach((subscription) => subscription.dispose());
}
105 changes: 43 additions & 62 deletions src/infrastructure/repository/code-repository.ts
Original file line number Diff line number Diff line change
@@ -1,77 +1,77 @@
import { Client, createClient, ResultSet, Row } from "@libsql/client";
import { Client, ResultSet, Row } from "@libsql/client";
import { ICodeRepository } from "../../application/interfaces/code.repository.interface";
import { Logger } from "../logger/logger";
import { createTableQuery, insertDataQuery, selectFunctionProps } from "./sql";
import { dbManager } from "./data-base-manager";
import { createIndex, createTableQuery, insertDataQuery, selectFunctionProps } from "./sql";

export class CodeRepository implements ICodeRepository {
private client: Client | undefined;
private readonly client: Client | undefined;
private static instance: CodeRepository;
private readonly logger: Logger;
private constructor() {
this.logger = new Logger("CodeRepository");
this.client = dbManager.getClient();
}

public static async createInstance(): Promise<CodeRepository> {
public static async getInstance(): Promise<CodeRepository> {
if (!CodeRepository.instance) {
CodeRepository.instance = new CodeRepository();
await CodeRepository.instance.init();
}
return CodeRepository.instance;
}

private async connectDB(): Promise<Client> {
async createFunctionsTable(): Promise<ResultSet | undefined> {
let transaction;
try {
return (this.client = createClient({
url: "file:dev.db",
}));
} catch (error) {
this.logger.error("Failed to initialize database", error);
throw error;
}
}

private async init(): Promise<void> {
try {
this.client = await this.connectDB();
} catch (error) {
this.logger.error("Failed to initialize database", error);
throw error;
}
}

public static getInstance(): CodeRepository {
if (!CodeRepository.instance) {
CodeRepository.instance = new CodeRepository();
}
return CodeRepository.instance;
}

async CreateTable(): Promise<ResultSet[] | undefined> {
try {
const query = createTableQuery();
const table = await this.client?.batch(query, "write");
transaction = await this.client?.transaction();
const table = await transaction?.execute(createTableQuery());
if (table) {
this.logger.info("Database initialized successfully");
}
await transaction?.execute(createIndex());
await transaction?.commit();
return table;
} catch (error) {
if (transaction) {
await transaction.rollback();
}
this.logger.error("Failed to initialize database", error);
throw error;
} finally {
if (transaction) {
transaction.close();
}
}
}

async InsertData(values: string) {
async insertFunctions(values: string): Promise<ResultSet | undefined> {
let retries = 0;
const maxRetries = 5;
const retryDelay = 100;
try {
const query = insertDataQuery(values);
const table = await this.client?.batch(query, "write");
if (table) {
this.logger.info("Database initialized successfully");
}
return table;
await this.createFunctionsTable();
} catch (error) {
this.logger.error("Failed to initialize database", error);
this.logger.error("Failed to create table", error);
throw error;
}
while (retries < maxRetries) {
try {
const query = insertDataQuery(values);
const result = await this.client?.execute(query);
if (result) {
this.logger.info("Database initialized successfully");
}
return result;
} catch (error: any) {
if (error.code === "SQLITE_BUSY") {
retries++;
await new Promise((resolve) => setTimeout(resolve, retryDelay));
} else {
this.logger.error("Failed to initialize database", error);
throw new Error(`Failed to insert into table after ${maxRetries} retries`);
}
}
}
}

async searchSimilarFunctions(queryEmbeddings: number[], limit: number): Promise<Row[] | undefined> {
@@ -87,23 +87,4 @@ export class CodeRepository implements ICodeRepository {
throw error;
}
}

async healthCheck(): Promise<boolean> {
try {
return Boolean(await this.client?.execute("SELECT 1"));
} catch (error) {
this.logger.error("Failed to connect to database", error);
return false;
}
}

async close(): Promise<void> {
try {
await this.client?.close();
this.logger.info("Database connection closed");
} catch (error) {
this.logger.error("Failed to close database connection", error);
throw error;
}
}
}
104 changes: 104 additions & 0 deletions src/infrastructure/repository/data-base-manager.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import { Client, createClient } from "@libsql/client";
import * as vscode from "vscode";
import { Logger } from "../logger/logger";

class DatabaseManager {
private client: Client | undefined;
private static instance: DatabaseManager;
private readonly logger: Logger;
private constructor() {
this.logger = new Logger("DatabaseManager");
}

/**
* Returns a singleton instance of the DatabaseManager, lazy-loading it if necessary.
* This ensures that only one instance of the DatabaseManager is created throughout the application.
*/
public static getInstance(): DatabaseManager {
if (!DatabaseManager.instance) {
DatabaseManager.instance = new DatabaseManager();
}
return DatabaseManager.instance;
}

/**
* Retrieves a client instance, throwing an error if no client is connected.
* Use this method to access the connected client, ensuring that a connection is established before proceeding.
*/
public getClient(): Client {
if (!this.client) {
throw new Error("Database not connected.");
}
return this.client;
}

/**
* Disconnects the current client, releasing any allocated resources and resetting the client reference.
* A confirmation message is displayed upon successful disconnection.
*/
public async disconnect() {
if (this.client) {
await this.client.close();
this.client = undefined;
vscode.window.showInformationMessage("Database disconnected");
}
}

/**
* Establishes a connection to the database with retry mechanism.
* Attempts to connect up to 3 times with exponential backoff.
* Performs a health check after each connection attempt.
* Throws an error if all connection attempts fail.
*
* @param url The database connection URL
* @throws Error if connection fails after max retries
*/
public async connect(url: string): Promise<void> {
if (this.client) {
this.logger.info("Client already connected");
return;
}

const maxRetries = 3;
let retryCount = 0;

while (retryCount < maxRetries) {
try {
this.client = createClient({
url: url,
});

const connected = await this.healthCheck();
if (connected) {
this.logger.info("Database connected successfully");
return;
}
} catch (error) {
this.logger.error(`Failed to connect to database`, error);
retryCount++;
if (retryCount >= maxRetries) {
this.logger.error(`Failed to connect to database after ${maxRetries} attempts}`, error);
throw error;
}
await new Promise((resolve) => setTimeout(resolve, 1000 * retryCount));
}
}
}

/**
* Performs a health check on the database connection.
* Executes a simple query to verify connectivity.
*
* @returns A boolean indicating whether the health check passed
*/
async healthCheck(): Promise<boolean> {
try {
return Boolean(await this.client?.execute("SELECT 1"));
} catch (error) {
this.logger.error("Failed to connect to database", error);
return false;
}
}
}

export const dbManager = DatabaseManager.getInstance();
21 changes: 9 additions & 12 deletions src/infrastructure/repository/sql.ts
Original file line number Diff line number Diff line change
@@ -1,23 +1,20 @@
export const createIndex = () =>
` CREATE INDEX IF NOT EXISTS code_functions_idx ON code_functions (libsql_vector_idx(embedding))`;
export const createTableQuery = () => {
return [
"DROP TABLE IF EXISTS code_functions",
`CREATE TABLE IF NOT EXISTS code_functions (
return `
CREATE TABLE IF NOT EXISTS code_functions (
class_name TEXT NOT NULL,
function_name TEXT NOT NULL,
file_path TEXT NOT NULL,
created_at TEXT NOT NULL,
embedding F32_BLOB(768) NOT NULL
)`,
"CREATE INDEX IF NOT EXISTS code_functions_idx ON code_functions (libsql_vector_idx(embedding))",
];
)
`;
};

export const insertDataQuery = (values: string) => {
return [
`INSERT INTO code_functions (class_name, function_name, file_path, created_at, embedding)
VALUES ${values}`,
];
};
export const insertDataQuery = (values: string) =>
`INSERT INTO code_functions (class_name, function_name, file_path, created_at, embedding)
VALUES ${values}`;

export const selectFunctionProps = () => {
return `
Original file line number Diff line number Diff line change
@@ -1,27 +1,51 @@
import { EmbeddingService } from "./embedding-service";
import { ResultSet } from "@libsql/client/.";
import { APP_CONFIG } from "../application/constant";
import { IFunctionData } from "../application/interfaces";
import { getConfigValue } from "../application/utils";
import { Logger } from "../infrastructure/logger/logger";
import { CodeRepository } from "../infrastructure/repository/code-repository";
import { CodeStructureMapper } from "./code-structure.mapper.service";
import { EmbeddingService } from "./embedding-service";
import { TypeScriptAtsMapper } from "./typescript-ats.service";
import { CodeRepository } from "../infrastructure/repository/code-repository";
import { ResultSet } from "@libsql/client/.";

export class ContextService {
/**
* Provides a centralized service for managing code indexing, including building function structure maps,
* generating function descriptions, generating embeddings, and inserting function data into a database.
*/
export class CodeIndexingService {
logger: Logger;
embeddingService: EmbeddingService;
codeRepository: CodeRepository | undefined;
private static instance: CodeIndexingService;
constructor() {
this.logger = new Logger("ContextService");
this.logger = new Logger("CodeIndexingService");
const apiKey = this.getAPIKey();
this.embeddingService = new EmbeddingService(apiKey);
}

/**
* Creates a singleton instance of the CodeIndexingService class.
* @returns {CodeIndexingService} The CodeIndexingService instance.
*/
public static createInstance(): CodeIndexingService {
if (!CodeIndexingService.instance) {
CodeIndexingService.instance = new CodeIndexingService();
}
return CodeIndexingService.instance;
}

/**
* Retrieves an instance of the CodeRepository, which is used to interact with the database.
* @returns {Promise<void>} A promise that resolves when the repository is initialized.
*/
async getCodeRepository() {
this.codeRepository = await CodeRepository.createInstance();
this.codeRepository = await CodeRepository.getInstance();
}

/**
* Retrieves the Gemini API key from the application configuration, which is required for code indexing.
* @returns {string} The API key.
*/
getAPIKey(): string {
const { geminiKey } = APP_CONFIG;
const apiKey = getConfigValue(geminiKey);
@@ -32,9 +56,13 @@ export class ContextService {
return apiKey;
}

/**
* Builds a function structure map using the TypeScript ATS mapper and CodeStructureMapper services.
* @returns {Promise<Partial<IFunctionData>[]>} A promise that resolves with an array of function data.
*/
async buildFunctionStructureMap(): Promise<Partial<IFunctionData>[]> {
try {
const codeATS = TypeScriptAtsMapper.getInstance();
const codeATS = await TypeScriptAtsMapper.getInstance();
if (!codeATS) {
throw new Error("Failed to get TypeScriptAtsMapper instance");
}
@@ -46,11 +74,15 @@ export class ContextService {
const mapper = new CodeStructureMapper(ats);
return mapper.normalizeData();
} catch (error) {
console.error("Error building function structure map:", error);
this.logger.error("Error building function structure map:", error);
throw error;
}
}

/**
* Generates function descriptions using the EmbeddingService.
* @returns {Promise<IFunctionData[]>} A promise that resolves with an array of function data.
*/
async generateFunctionDescription(): Promise<IFunctionData[]> {
try {
const functions = (await this.buildFunctionStructureMap()) as IFunctionData[];
@@ -64,6 +96,10 @@ export class ContextService {
}
}

/**
* Generates embeddings for the given functions using the EmbeddingService.
* @returns {Promise<IFunctionData[]>} A promise that resolves with an array of function data.
*/
async generateEmbeddings(): Promise<IFunctionData[]> {
const functionsWithDescription = await this.generateFunctionDescription();
functionsWithDescription.forEach((item) => {
@@ -75,21 +111,26 @@ export class ContextService {
return functionWithEmbeddings;
}

async InsertFunctionsinDB(): Promise<ResultSet[] | undefined> {
const dataToInsert = await this.generateEmbeddings();
/**
* Inserts function data into the database using the CodeRepository.
* @returns {Promise<ResultSet | undefined>} A promise that resolves with the result set or undefined.
*/
async insertFunctionsinDB(): Promise<ResultSet | undefined> {
await this.getCodeRepository();
if (!this.codeRepository) {
this.logger.info("Unable to connect to the DB");
throw new Error("Unable to connect to DB");
}

const valuesString = dataToInsert
.map(
(value) =>
`('${value.className}', '${value.name}', '${value.path}', '${value.processedAt}', vector32('[${(value.embedding ?? []).join(",")}]'))`
)
.join(",");
const result = await this.codeRepository?.InsertData(valuesString);
return result;
const dataToInsert = await this.generateEmbeddings();
if (dataToInsert?.length) {
const valuesString = dataToInsert
.map(
(value) =>
`('${value.className}', '${value.name}', '${value.path}', '${value.processedAt}', vector32('[${(value.embedding ?? []).join(",")}]'))`
)
.join(",");
const result = await this.codeRepository?.insertFunctions(valuesString);
return result;
}
}
}
120 changes: 101 additions & 19 deletions src/services/embedding-service.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
import { GoogleGenerativeAI } from "@google/generative-ai";
import { FeatureExtractionPipeline } from "@xenova/transformers";
import { EmbedContentResponse, GoogleGenerativeAI } from "@google/generative-ai";
import { EmbeddingsConfig } from "../application/constant";
import { IFunctionData } from "../application/interfaces";
import { Logger } from "../infrastructure/logger/logger";
import { CodeStructureMapper } from "./code-structure.mapper.service";
import { TypeScriptAtsMapper } from "./typescript-ats.service";

interface EmbeddingServiceOptions {
batchSize: number;
@@ -20,6 +17,12 @@ interface BatchProcessResult {
generateComments: IFunctionData[];
}

/**
* EmbeddingService is responsible for generating embeddings and text comments for functions.
* It handles rate limiting, retries, and error logging.
* @export
* @class EmbeddingService
*/
export class EmbeddingService {
private static readonly DEFAULT_OPTIONS: Required<EmbeddingServiceOptions> = EmbeddingsConfig;

@@ -34,35 +37,64 @@ export class EmbeddingService {
}

this.options = { ...EmbeddingService.DEFAULT_OPTIONS };
//update this to 120000
this.requestInterval = (60 * 1000) / this.options.rateLimit;
this.genAI = new GoogleGenerativeAI(this.apiKey);
this.logger = new Logger("EmbeddingService");
}

/**
* Introduces a delay in the execution of the code, allowing for asynchronous processing.
* This delay is used to prevent excessive requests to the Google Generative AI model and ensure compliance with the rate limit.
* @private
* @async
* @param {number} ms - The duration of the delay in milliseconds.
* @returns {Promise<void>}
* @memberof EmbeddingService
*/
private async delay(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}

/**
* Calculates the time to wait before making the next request to the Google Generative AI model.
* This calculation is based on the rate limit and the time elapsed since the last request.
* @private
* @param {number} lastRequestTime - The time of the last request in milliseconds.
* @returns {number} The time to wait before making the next request in milliseconds.
* @memberof EmbeddingService
*/
private calculateWaitTime(lastRequestTime: number): number {
const elapsed = Date.now() - lastRequestTime;
return Math.max(0, this.requestInterval - elapsed);
}

private async generateEmbedding(text: string): Promise<number[]> {
try {
const { pipeline } = await import("@xenova/transformers");
const extractor: FeatureExtractionPipeline = await pipeline("feature-extraction", this.options.embeddingModel);
const result = await extractor(text, {
pooling: "mean",
normalize: true,
});
return Array.from(result.data);
} catch (error) {
this.logger.error("Failed to generate embedding", { error, text });
throw new Error("Embedding generation failed");
}
/**
* Generates an embedding for the given text using the configured AI model.
* The embedding is a numerical representation of the text that can be used for various tasks, such as clustering and classification.
* @async
* @param {string} text - The text to generate an embedding for.
* @returns {Promise<number[]>} The generated embedding.
* @memberof EmbeddingService
*/
async generateEmbedding(text: string) {
const model = this.genAI.getGenerativeModel({
model: this.options.embeddingModel,
});
const result: EmbedContentResponse = await model.embedContent(text);
const embedding = result.embedding.values;
return embedding;
}

/**
* Generates text for the given function data using the configured AI model.
* The generated text is a human-readable description of the function's purpose and behavior.
* @private
* @async
* @param {IFunctionData} item - The function data to generate text for.
* @returns {Promise<IFunctionData>} The function data with the generated text.
* @memberof EmbeddingService
*/
private async generateText(item: IFunctionData): Promise<IFunctionData> {
try {
const model = this.genAI.getGenerativeModel({
@@ -84,6 +116,14 @@ export class EmbeddingService {
}
}

/**
* Builds a prompt for generating text based on the given function content.
* The prompt is a string that provides context and guidance for the AI model to generate high-quality text.
* @private
* @param {string} content - The function content to build a prompt for.
* @returns {string} The built prompt.
* @memberof EmbeddingService
*/
private buildPrompt(content: string): string {
return `
Generate function level comment for this code.
@@ -96,6 +136,15 @@ export class EmbeddingService {
`.trim();
}

/**
* Generates embeddings for the given function data using the configured AI model.
* The generated embeddings are numerical representations of the function data that can be used for various tasks, such as clustering and classification.
* @private
* @async
* @param {IFunctionData} item - The function data to generate embeddings for.
* @returns {Promise<IFunctionData>} The function data with the generated embeddings.
* @memberof EmbeddingService
*/
private async generateFunctionEmbeddings(item: IFunctionData): Promise<IFunctionData> {
try {
const embedding = await this.generateEmbedding(item.compositeText);
@@ -113,6 +162,17 @@ export class EmbeddingService {
}
}

/**
* Processes a batch of function data with retries, ensuring that all data is processed successfully.
* The batch is processed in a loop, with retries attempted if any errors occur during processing.
* @private
* @async
* @param {IFunctionData[]} batch - The batch of function data to process.
* @param {number} lastRequestTime - The time of the last request in milliseconds.
* @param {boolean} forEmbedding - Whether to generate embeddings or text for the function data.
* @returns {Promise<BatchProcessResult>} The result of the batch processing, including any generated embeddings or text.
* @memberof EmbeddingService
*/
private async processBatchWithRetry(
batch: IFunctionData[],
lastRequestTime: number,
@@ -160,10 +220,22 @@ export class EmbeddingService {
await this.delay(this.options.retryDelay * retries);
}
}

return { generateEmbeddings: [], generateComments: [] };
// If retry is eq or greater than retries.
// Save json.stringify({ generateEmbeddings, generateComments }) to storage
// can replay later to conclude the process
return { generateEmbeddings, generateComments };
}

/**
* Processes the given function data, generating embeddings or text as specified.
* The processing is done in batches, with retries attempted if any errors occur during processing.
* @public
* @async
* @param {IFunctionData[]} data - The function data to process.
* @param {boolean} [forEmbedding=false] - Whether to generate embeddings or text for the function data.
* @returns {Promise<IFunctionData[]>} The processed function data, including any generated embeddings or text.
* @memberof EmbeddingService
*/
public async processFunctions(data: IFunctionData[], forEmbedding = false): Promise<IFunctionData[]> {
try {
const result = await this.processWithRateLimit(data, forEmbedding);
@@ -180,6 +252,16 @@ export class EmbeddingService {
}
}

/**
* Processes the given function data with rate limiting, ensuring that the processing is done within the allowed rate limit.
* The processing is done in batches, with retries attempted if any errors occur during processing.
* @private
* @async
* @param {IFunctionData[]} data - The function data to process.
* @param {boolean} forEmbedding - Whether to generate embeddings or text for the function data.
* @returns {Promise<{ successful: IFunctionData[]; failed: IFunctionData[] }>} The result of the processing, including any successful and failed function data.
* @memberof EmbeddingService
*/
private async processWithRateLimit(
data: IFunctionData[],
forEmbedding: boolean
40 changes: 17 additions & 23 deletions src/services/file-system.service.ts
Original file line number Diff line number Diff line change
@@ -32,57 +32,51 @@ export class FileSystemService {
throw Error("root workspace folder not found");
}

const directories = await vscode.workspace.fs.readDirectory(
workSpaceInfo.root,
);
const directories = await vscode.workspace.fs.readDirectory(workSpaceInfo.root);

const directory = directories.filter(
([name, type]) => type === vscode.FileType.Directory && name === dir,
);
const directory = directories.filter(([name, type]) => type === vscode.FileType.Directory && name === dir);

if (!directory) {
throw Error(`${dir} does not exist within this workspace`);
}

const directoryFiles = directory.map(async ([file]) => {
const srcUri = vscode.Uri.joinPath(workSpaceInfo.root, file);
const srcFiles = await vscode.workspace.findFiles(
new vscode.RelativePattern(srcUri, pattern),
);
const srcFiles = await vscode.workspace.findFiles(new vscode.RelativePattern(srcUri, pattern));
return srcFiles.map((file) => file.fsPath);
});

const srcFilePaths = await Promise.all(directoryFiles);
return srcFilePaths.flat();
} catch (error) {
handleError(
error,
`Error fetching the files from ${dir} with pattern ${pattern}`,
);
handleError(error, `Error fetching the files from ${dir} with pattern ${pattern}`);
throw error;
}
}

async readFile(fileName: string): Promise<{
buffer: Uint8Array;
string: string;
filePath: string;
}> {
async readFile(fileName: string): Promise<any> {
try {
const rootUri = this.getRootUri();
let fileUri: vscode.Uri;
let fileUri: vscode.Uri | undefined;

if (fileName === FSPROPS.TSCONFIG_FILE) {
fileUri = vscode.Uri.joinPath(rootUri, FSPROPS.TSCONFIG_FILE);
const tsconfigFiles = await vscode.workspace.findFiles(new vscode.RelativePattern(rootUri, "**tsconfig.json"));
if (tsconfigFiles?.length > 0) {
fileUri = tsconfigFiles[0];
}
// fileUri = vscode.Uri.joinPath(rootUri, FSPROPS.TSCONFIG_FILE);
} else {
throw Error("Unknown fileName");
}
let fileContent: any;
if (fileUri) {
fileContent = await vscode.workspace.fs.readFile(fileUri);
}

const fileContent = await vscode.workspace.fs.readFile(fileUri);
return {
buffer: fileContent,
string: Buffer.from(fileContent).toString("utf8"),
filePath: fileUri.fsPath,
string: fileContent ? Buffer.from(fileContent).toString("utf8") : "",
filePath: fileUri ? fileUri.fsPath : "",
};
} catch (error) {
handleError(error, `Error while reading file ${fileName}`);
57 changes: 36 additions & 21 deletions src/services/file-uploader.ts
Original file line number Diff line number Diff line change
@@ -24,18 +24,15 @@ export class FileUploader implements IFileUploader {
const content = await fs.promises.readFile(file.fsPath, "utf8");
const fileName = path.basename(file.fsPath);
const files = await this.getFiles();
if (files.length > 0) {
await this.deleteFiles(files);
}
// if (files.length > 0) {
// await this.deleteFiles(files);
// }
// Create a global state this.context.globalState
const filePath = path.join(this.fileDir, fileName);
await fs.promises.writeFile(filePath, content);
vscode.window.showInformationMessage(
`KnowledgeBase uploaded successfully`,
);
vscode.window.showInformationMessage(`KnowledgeBase uploaded successfully`);
} catch (error: any) {
vscode.window.showErrorMessage(
`Failed to upload pattern: ${error.message}`,
);
vscode.window.showErrorMessage(`Failed to upload pattern: ${error.message}`);
throw error;
}
}
@@ -54,9 +51,7 @@ export class FileUploader implements IFileUploader {
const content = await fs.promises.readFile(fullPath, "utf8");
return content;
} catch (error: any) {
vscode.window.showErrorMessage(
`Error reading from knowledgeBase: ${error.message}`,
);
vscode.window.showErrorMessage(`Error reading from knowledgeBase: ${error.message}`);
throw error;
}
}
@@ -76,9 +71,7 @@ export class FileUploader implements IFileUploader {
});
await Promise.all(deletePromises);
} catch (error: any) {
vscode.window.showErrorMessage(
`Unable to delete files: ${error.message}`,
);
vscode.window.showErrorMessage(`Unable to delete files: ${error.message}`);
throw error;
}
}
@@ -93,9 +86,7 @@ export class FileUploader implements IFileUploader {
const files = await fs.promises.readdir(this.fileDir);
return files.map((file) => path.join(this.fileDir, file));
} catch (error: any) {
vscode.window.showErrorMessage(
`Error fetching the files ${error.message}`,
);
vscode.window.showErrorMessage(`Error fetching the files ${error.message}`);
throw error;
}
}
@@ -119,11 +110,35 @@ export class FileUploader implements IFileUploader {
try {
await this.uploadFile(file[0]);
} catch (error: any) {
vscode.window.showErrorMessage(
`Failed to upload file: ${error.message}`,
);
vscode.window.showErrorMessage(`Failed to upload file: ${error.message}`);
throw error;
}
}
}

/**
* Creates a new file asynchronously.
* This function checks if the file exists, and if not, creates it.
* @param filename - The name of the file to create
* @param content - The content to write to the file
* @throws {Error} If the file cannot be created
*/
async createFile(filename: string): Promise<boolean> {
try {
let created = false;
const filePath = path.join(this.fileDir, filename);
if (!fs.existsSync(filePath)) {
await fs.promises.writeFile(filePath, "");
vscode.window.showInformationMessage(`File ${filename} created successfully`);
created = true;
} else {
created = false;
vscode.window.showInformationMessage(`File ${filename} already exists`);
}
return created;
} catch (error: any) {
vscode.window.showErrorMessage(`Failed to create file: ${error.message}`);
throw error;
}
}
}
129 changes: 41 additions & 88 deletions src/services/typescript-ats.service.ts
Original file line number Diff line number Diff line change
@@ -24,15 +24,19 @@ export class TypeScriptAtsMapper implements ITypeScriptCodeMapper {
private fsService: FileSystemService | undefined;
private static instance: TypeScriptAtsMapper;
constructor() {
this.initializeTypescriptProgram();
if (!this.fsService) {
this.fileSysService();
}
}

public static getInstance(): TypeScriptAtsMapper {
public async init() {
await this.initializeTypescriptProgram();
}

public static async getInstance(): Promise<TypeScriptAtsMapper> {
if (!TypeScriptAtsMapper.instance) {
TypeScriptAtsMapper.instance = new TypeScriptAtsMapper();
await TypeScriptAtsMapper.instance.init();
}
return TypeScriptAtsMapper.instance;
}
@@ -45,6 +49,8 @@ export class TypeScriptAtsMapper implements ITypeScriptCodeMapper {
* Initializes a TypeScript program by reading the TS configuration file and creating a new program instance.
* This method sets up the program and type checker for further compilation and analysis.
*/
// TODO The way to return the src file should be dynamic.
// Users may have mono repos or may open the folder at an higher level folder
private async initializeTypescriptProgram(): Promise<void> {
try {
if (!this.fsService) {
@@ -57,25 +63,22 @@ export class TypeScriptAtsMapper implements ITypeScriptCodeMapper {
const { string, filePath } = fileContent;
const configFile = ts.readConfigFile(filePath, () => string);

const compilerOptions: ts.ParsedCommandLine =
ts.parseJsonConfigFileContent(
configFile.config,
ts.sys,
this.fsService?.getRootFilePath()
);

this.program = ts.createProgram(
compilerOptions.fileNames,
compilerOptions.options
const compilerOptions: ts.ParsedCommandLine = ts.parseJsonConfigFileContent(
configFile.config,
ts.sys,
this.fsService?.getRootFilePath()
);

this.program = ts.createProgram(compilerOptions.fileNames, compilerOptions.options);

this.typeChecker = this.getTypeChecker();
} catch (error: any) {
handleError(error, `unable to initialize knowledgebase extractions`);
throw error;
}
}

//the root folder should be relative to src folder
getRootFolder(): string {
return vscode.workspace.workspaceFolders?.[0].uri.fsPath ?? process.cwd();
}
@@ -89,10 +92,7 @@ export class TypeScriptAtsMapper implements ITypeScriptCodeMapper {
* @param sourceFile The source file containing the class declaration.
* @returns An IClassInfo object containing the name, methods, properties, interfaces, and enums of the class.
*/
extractClassMetaData(
node: ts.ClassDeclaration,
sourceFile: ts.SourceFile
): IClassInfo {
extractClassMetaData(node: ts.ClassDeclaration, sourceFile: ts.SourceFile): IClassInfo {
try {
const className: string | undefined = node?.name?.getText(sourceFile);
const classInfo: IClassInfo = {
@@ -118,8 +118,7 @@ export class TypeScriptAtsMapper implements ITypeScriptCodeMapper {
sourceFile: ts.SourceFile,
info: IClassInfo | IModuleInfo
): void {
const functionInfo: IFunctionInfo | null =
this.getFunctionDetails(node, sourceFile) ?? null;
const functionInfo: IFunctionInfo | null = this.getFunctionDetails(node, sourceFile) ?? null;
if (functionInfo) {
info?.functions?.push(functionInfo);
}
@@ -130,11 +129,7 @@ export class TypeScriptAtsMapper implements ITypeScriptCodeMapper {
* to the class or module information object if valid. This aggregation helps build
* a complete representation of the class/module structure.
*/
private aggergateProperties(
node: ts.PropertyDeclaration,
sourceFile: ts.SourceFile,
info: IClassInfo | IModuleInfo
) {
private aggergateProperties(node: ts.PropertyDeclaration, sourceFile: ts.SourceFile, info: IClassInfo | IModuleInfo) {
const propertyInfo = this.extractPropertyParameters(node, sourceFile);
if (propertyInfo) {
info?.properties?.push(propertyInfo);
@@ -162,11 +157,7 @@ export class TypeScriptAtsMapper implements ITypeScriptCodeMapper {
* class or module information object. Helps maintain a complete type system
* representation within the code structure.
*/
private aggregateEnums(
node: ts.EnumDeclaration,
sourceFile: ts.SourceFile,
info: IClassInfo | IModuleInfo
) {
private aggregateEnums(node: ts.EnumDeclaration, sourceFile: ts.SourceFile, info: IClassInfo | IModuleInfo) {
const enumInfo = this.extractEnumInfo(node, sourceFile);
if (enumInfo) {
info?.enums?.push(enumInfo);
@@ -227,22 +218,16 @@ export class TypeScriptAtsMapper implements ITypeScriptCodeMapper {
* @param sourceFile
* @returns An object with 'name' and 'type' properties.
*/
extractPropertyParameters(
node: ts.PropertyDeclaration,
sourceFile: ts.SourceFile
): IProperty {
extractPropertyParameters(node: ts.PropertyDeclaration, sourceFile: ts.SourceFile): IProperty {
try {
const name: string = node.name.getText(sourceFile);
let type;

if (node.type) {
type = this.getTypeAtLocation(node);
} else {
const inferredType: ts.Type | undefined =
this.typeChecker?.getTypeAtLocation(node);
type = inferredType
? this.typeChecker?.typeToString(inferredType)
: undefined;
const inferredType: ts.Type | undefined = this.typeChecker?.getTypeAtLocation(node);
type = inferredType ? this.typeChecker?.typeToString(inferredType) : undefined;
}
const property = {
name,
@@ -282,10 +267,7 @@ export class TypeScriptAtsMapper implements ITypeScriptCodeMapper {
}
}

extractArrowFunctionParameters(
node: ts.ArrowFunction,
sourceFile: ts.SourceFile
): IProperty[] {
extractArrowFunctionParameters(node: ts.ArrowFunction, sourceFile: ts.SourceFile): IProperty[] {
const properties = node.parameters.map((param) => {
const name = param.name.getText(sourceFile);
const type = param.type ? this.getTypeAtLocation(param) : undefined;
@@ -315,17 +297,9 @@ export class TypeScriptAtsMapper implements ITypeScriptCodeMapper {

const name: string = node.name.getText(sourceFile);
const content: string = this.getFunctionNodeText(node, sourceFile);
const parameters: IProperty[] = this.extractFunctionParameters(
node,
sourceFile
);
const parameters: IProperty[] = this.extractFunctionParameters(node, sourceFile);

const details = this.functionDetailsMapper(
name,
content,
parameters,
node
);
const details = this.functionDetailsMapper(name, content, parameters, node);
return details;
} catch (error: any) {
handleError(error, "unable to get function details");
@@ -365,9 +339,7 @@ export class TypeScriptAtsMapper implements ITypeScriptCodeMapper {
* @returns A string representation of the function or method type, or undefined if type checking is unavailable.
*/
getTypeAtLocation(node: DeclarationOrFunctionNode): string | undefined {
const type = this.typeChecker?.typeToString(
this.typeChecker.getTypeAtLocation(node)
);
const type = this.typeChecker?.typeToString(this.typeChecker.getTypeAtLocation(node));
return type;
}

@@ -416,10 +388,7 @@ export class TypeScriptAtsMapper implements ITypeScriptCodeMapper {
* @param relativePath The relative path of the module.
* @returns The module information.
*/
private extractModuleInfo(
sourceFile: ts.SourceFile,
relativePath: string
): IModuleInfo {
private extractModuleInfo(sourceFile: ts.SourceFile, relativePath: string): IModuleInfo {
return {
path: path.normalize(relativePath),
classes: [],
@@ -487,11 +456,10 @@ export class TypeScriptAtsMapper implements ITypeScriptCodeMapper {
const repoNames: string = path.basename(normalizedRootDir);
codebaseMap[repoNames] = { modules: {} };

const tsFiles: string[] | undefined =
await this.fsService?.getFilesFromDirectory(
FSPROPS.SRC_DIRECTORY,
FSPROPS.TS_FILE_PATTERN
);
const tsFiles: string[] | undefined = await this.fsService?.getFilesFromDirectory(
FSPROPS.SRC_DIRECTORY,
FSPROPS.TS_FILE_PATTERN
);
if (!tsFiles?.length) {
throw Error(`No Typescript files found`);
}
@@ -503,10 +471,7 @@ export class TypeScriptAtsMapper implements ITypeScriptCodeMapper {
throw Error(`No source file found for ${filePath}`);
}

const moduleInfo: IModuleInfo = this.extractModuleInfo(
sourceFile,
moduleRalativePath
);
const moduleInfo: IModuleInfo = this.extractModuleInfo(sourceFile, moduleRalativePath);
ts.forEachChild(sourceFile, (node) => {
if (ts.isClassDeclaration(node)) {
const classInfo = this.extractClassMetaData(node, sourceFile);
@@ -545,20 +510,15 @@ export class TypeScriptAtsMapper implements ITypeScriptCodeMapper {
}
}

extractInterfaceInfo(
node: ts.InterfaceDeclaration,
sourceFile: ts.SourceFile
): IInterfaceInfo {
extractInterfaceInfo(node: ts.InterfaceDeclaration, sourceFile: ts.SourceFile): IInterfaceInfo {
try {
const interfaceName: string = node.name.getText(sourceFile);

const properties: IProperty[] = node.members
.filter(ts.isPropertySignature)
.map((prop) => {
const name = prop.name.getText(sourceFile);
const type = prop.type ? this.getTypeAtLocation(prop) : "any";
return { name, type };
});
const properties: IProperty[] = node.members.filter(ts.isPropertySignature).map((prop) => {
const name = prop.name.getText(sourceFile);
const type = prop.type ? this.getTypeAtLocation(prop) : "any";
return { name, type };
});

return {
name: interfaceName,
@@ -571,16 +531,11 @@ export class TypeScriptAtsMapper implements ITypeScriptCodeMapper {
}
}

extractEnumInfo(
node: ts.EnumDeclaration,
sourceFile: ts.SourceFile
): IEnumInfo {
extractEnumInfo(node: ts.EnumDeclaration, sourceFile: ts.SourceFile): IEnumInfo {
const enumName = node.name.getText(sourceFile);
const members = node.members.map((member) => {
const name = member.name.getText(sourceFile);
const value = member.initializer
? member.initializer.getText(sourceFile)
: undefined;
const value = member.initializer ? member.initializer.getText(sourceFile) : undefined;
return { name, value };
});

@@ -594,9 +549,7 @@ export class TypeScriptAtsMapper implements ITypeScriptCodeMapper {
buildDependencyGraph(sourceFile: ts.SourceFile): string[] {
const imports = sourceFile.statements.filter(ts.isImportDeclaration);
return imports.map((i) => {
return ts
.createPrinter()
.printNode(ts.EmitHint.Unspecified, i, sourceFile);
return ts.createPrinter().printNode(ts.EmitHint.Unspecified, i, sourceFile);
});
}
}
1 change: 1 addition & 0 deletions tsconfig.json
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@
"skipLibCheck": true,
"sourceMap": true,
"rootDir": "src",
"moduleResolution": "node",
"strict": true /* enable all strict type-checking options */
/* Additional Checks */
// "noImplicitReturns": true, /* Report error when not all code paths in function return a value. */

0 comments on commit 4084bd9

Please sign in to comment.