Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ type SearchInputProps = {
inputRef: React.RefObject<HTMLInputElement>;
};

const LLM_LIMITS_MAX_QUERY_LENGTH = 120;
export const SearchInput = ({
value,
placeholder,
Expand Down Expand Up @@ -39,6 +40,7 @@ export const SearchInput = ({
value={value}
onKeyDown={onKeyDown}
onChange={onChange}
maxLength={LLM_LIMITS_MAX_QUERY_LENGTH}
placeholder={placeholder}
className="truncate text-accent-12 font-medium text-[13px] bg-transparent border-none outline-none focus:ring-0 focus:outline-none placeholder:text-accent-12 selection:bg-gray-6 w-full"
disabled={isProcessing && searchMode !== "allowTypeDuringSearch"}
Expand Down
8 changes: 4 additions & 4 deletions apps/dashboard/lib/trpc/routers/logs/llm-search/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { env } from "@/lib/env";
import { ratelimit, requireUser, requireWorkspace, t, withRatelimit } from "@/lib/trpc/trpc";
import { requireUser, requireWorkspace, t, withLlmAccess } from "@/lib/trpc/trpc";
import OpenAI from "openai";
import { z } from "zod";
import { getStructuredSearchFromLLM } from "./utils";
Expand All @@ -13,8 +13,8 @@ const openai = env().OPENAI_API_KEY
export const llmSearch = t.procedure
.use(requireUser)
.use(requireWorkspace)
.use(withRatelimit(ratelimit.read))
.use(withLlmAccess())
.input(z.object({ query: z.string(), timestamp: z.number() }))
.mutation(async ({ input }) => {
return await getStructuredSearchFromLLM(openai, input.query, input.timestamp);
.mutation(async ({ input, ctx }) => {
return await getStructuredSearchFromLLM(openai, ctx.validatedQuery, input.timestamp);
});
5 changes: 1 addition & 4 deletions apps/dashboard/lib/trpc/routers/logs/llm-search/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,7 @@ export async function getStructuredSearchFromLLM(
const completion = await openai.beta.chat.completions.parse({
// Don't change the model only a few models allow structured outputs
model: "gpt-4o-mini",
temperature: 0.2, // Range 0-2, lower = more focused/deterministic
top_p: 0.1, // Alternative to temperature, controls randomness
frequency_penalty: 0.5, // Range -2 to 2, higher = less repetition
presence_penalty: 0.5, // Range -2 to 2, higher = more topic diversity
temperature: 0.1, // Range 0-2, lower = more focused/deterministic
n: 1, // Number of completions to generate
messages: [
{
Expand Down
62 changes: 62 additions & 0 deletions apps/dashboard/lib/trpc/trpc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { env } from "@/lib/env";
import { TRPCError, initTRPC } from "@trpc/server";
import { Ratelimit } from "@unkey/ratelimit";
import superjson from "superjson";
import { z } from "zod";
import type { Context } from "./context";

export const t = initTRPC.context<Context>().create({ transformer: superjson });
Expand Down Expand Up @@ -125,3 +126,64 @@ export const withRatelimit = (ratelimit: Ratelimit | undefined) =>

return next();
});

export const LLM_LIMITS = {
MIN_QUERY_LENGTH: 3,
MAX_QUERY_LENGTH: 120,
MAX_TOKENS_ESTIMATE: 30, // ~4 chars per token for 120 chars
RATE_LIMIT: 10,
RATE_DURATION: "60s",
} as const;

const llmRatelimit = env().UNKEY_ROOT_KEY
? new Ratelimit({
rootKey: env().UNKEY_ROOT_KEY ?? "",
namespace: "trpc_llm",
limit: LLM_LIMITS.RATE_LIMIT,
duration: LLM_LIMITS.RATE_DURATION,
})
: null;

const llmQuerySchema = z.object({
query: z
.string()
.trim()
.min(LLM_LIMITS.MIN_QUERY_LENGTH, "Query must be at least 3 characters")
.max(LLM_LIMITS.MAX_QUERY_LENGTH, "Query cannot exceed 120 characters"),
});

export const withLlmAccess = () =>
t.middleware(async ({ next, ctx, rawInput }) => {
if (llmRatelimit) {
const response = await llmRatelimit.limit(ctx.user!.id);
if (!response.success) {
throw new TRPCError({
code: "TOO_MANY_REQUESTS",
message: `LLM rate limit exceeded. You can make ${LLM_LIMITS.RATE_LIMIT} requests per minute.`,
});
}
}

let validatedInput: z.infer<typeof llmQuerySchema>;
try {
validatedInput = llmQuerySchema.parse(rawInput);
} catch (error) {
if (error instanceof z.ZodError) {
const firstError = error.errors[0];
throw new TRPCError({
code: "BAD_REQUEST",
message: firstError?.message || "Invalid query format",
});
}
throw new TRPCError({
code: "BAD_REQUEST",
message: "Invalid input format",
});
}

return next({
ctx: {
validatedQuery: validatedInput.query,
},
});
});
Loading