diff --git a/apps/dashboard/components/logs/llm-search/components/search-input.tsx b/apps/dashboard/components/logs/llm-search/components/search-input.tsx index 3903ebe48f..6d47accfdc 100644 --- a/apps/dashboard/components/logs/llm-search/components/search-input.tsx +++ b/apps/dashboard/components/logs/llm-search/components/search-input.tsx @@ -11,6 +11,7 @@ type SearchInputProps = { inputRef: React.RefObject; }; +const LLM_LIMITS_MAX_QUERY_LENGTH = 120; export const SearchInput = ({ value, placeholder, @@ -39,6 +40,7 @@ export const SearchInput = ({ value={value} onKeyDown={onKeyDown} onChange={onChange} + maxLength={LLM_LIMITS_MAX_QUERY_LENGTH} placeholder={placeholder} className="truncate text-accent-12 font-medium text-[13px] bg-transparent border-none outline-none focus:ring-0 focus:outline-none placeholder:text-accent-12 selection:bg-gray-6 w-full" disabled={isProcessing && searchMode !== "allowTypeDuringSearch"} diff --git a/apps/dashboard/lib/trpc/routers/logs/llm-search/index.ts b/apps/dashboard/lib/trpc/routers/logs/llm-search/index.ts index d71beefc62..1e00382310 100644 --- a/apps/dashboard/lib/trpc/routers/logs/llm-search/index.ts +++ b/apps/dashboard/lib/trpc/routers/logs/llm-search/index.ts @@ -1,5 +1,5 @@ import { env } from "@/lib/env"; -import { ratelimit, requireUser, requireWorkspace, t, withRatelimit } from "@/lib/trpc/trpc"; +import { requireUser, requireWorkspace, t, withLlmAccess } from "@/lib/trpc/trpc"; import OpenAI from "openai"; import { z } from "zod"; import { getStructuredSearchFromLLM } from "./utils"; @@ -13,8 +13,8 @@ const openai = env().OPENAI_API_KEY export const llmSearch = t.procedure .use(requireUser) .use(requireWorkspace) - .use(withRatelimit(ratelimit.read)) + .use(withLlmAccess()) .input(z.object({ query: z.string(), timestamp: z.number() })) - .mutation(async ({ input }) => { - return await getStructuredSearchFromLLM(openai, input.query, input.timestamp); + .mutation(async ({ input, ctx }) => { + return await getStructuredSearchFromLLM(openai, ctx.validatedQuery, input.timestamp); }); diff --git a/apps/dashboard/lib/trpc/routers/logs/llm-search/utils.ts b/apps/dashboard/lib/trpc/routers/logs/llm-search/utils.ts index c7e20f72ae..b415567135 100644 --- a/apps/dashboard/lib/trpc/routers/logs/llm-search/utils.ts +++ b/apps/dashboard/lib/trpc/routers/logs/llm-search/utils.ts @@ -19,10 +19,7 @@ export async function getStructuredSearchFromLLM( const completion = await openai.beta.chat.completions.parse({ // Don't change the model only a few models allow structured outputs model: "gpt-4o-mini", - temperature: 0.2, // Range 0-2, lower = more focused/deterministic - top_p: 0.1, // Alternative to temperature, controls randomness - frequency_penalty: 0.5, // Range -2 to 2, higher = less repetition - presence_penalty: 0.5, // Range -2 to 2, higher = more topic diversity + temperature: 0.1, // Range 0-2, lower = more focused/deterministic n: 1, // Number of completions to generate messages: [ { diff --git a/apps/dashboard/lib/trpc/trpc.ts b/apps/dashboard/lib/trpc/trpc.ts index 4a682137b0..e8f5d6dbc2 100644 --- a/apps/dashboard/lib/trpc/trpc.ts +++ b/apps/dashboard/lib/trpc/trpc.ts @@ -2,6 +2,7 @@ import { env } from "@/lib/env"; import { TRPCError, initTRPC } from "@trpc/server"; import { Ratelimit } from "@unkey/ratelimit"; import superjson from "superjson"; +import { z } from "zod"; import type { Context } from "./context"; export const t = initTRPC.context().create({ transformer: superjson }); @@ -125,3 +126,64 @@ export const withRatelimit = (ratelimit: Ratelimit | undefined) => return next(); }); + +export const LLM_LIMITS = { + MIN_QUERY_LENGTH: 3, + MAX_QUERY_LENGTH: 120, + MAX_TOKENS_ESTIMATE: 30, // ~4 chars per token for 120 chars + RATE_LIMIT: 10, + RATE_DURATION: "60s", +} as const; + +const llmRatelimit = env().UNKEY_ROOT_KEY + ? new Ratelimit({ + rootKey: env().UNKEY_ROOT_KEY ?? "", + namespace: "trpc_llm", + limit: LLM_LIMITS.RATE_LIMIT, + duration: LLM_LIMITS.RATE_DURATION, + }) + : null; + +const llmQuerySchema = z.object({ + query: z + .string() + .trim() + .min(LLM_LIMITS.MIN_QUERY_LENGTH, "Query must be at least 3 characters") + .max(LLM_LIMITS.MAX_QUERY_LENGTH, "Query cannot exceed 120 characters"), +}); + +export const withLlmAccess = () => + t.middleware(async ({ next, ctx, rawInput }) => { + if (llmRatelimit) { + const response = await llmRatelimit.limit(ctx.user!.id); + if (!response.success) { + throw new TRPCError({ + code: "TOO_MANY_REQUESTS", + message: `LLM rate limit exceeded. You can make ${LLM_LIMITS.RATE_LIMIT} requests per minute.`, + }); + } + } + + let validatedInput: z.infer; + try { + validatedInput = llmQuerySchema.parse(rawInput); + } catch (error) { + if (error instanceof z.ZodError) { + const firstError = error.errors[0]; + throw new TRPCError({ + code: "BAD_REQUEST", + message: firstError?.message || "Invalid query format", + }); + } + throw new TRPCError({ + code: "BAD_REQUEST", + message: "Invalid input format", + }); + } + + return next({ + ctx: { + validatedQuery: validatedInput.query, + }, + }); + });