unkeyed · mcstepp · Jun 11, 2025 · Jun 10, 2025 · Jun 10, 2025
@@ -11,6 +11,7 @@ type SearchInputProps = {
   inputRef: React.RefObject<HTMLInputElement>;
 };
 
+const LLM_LIMITS_MAX_QUERY_LENGTH = 120;
 export const SearchInput = ({
   value,
   placeholder,
@@ -39,6 +40,7 @@ export const SearchInput = ({
       value={value}
       onKeyDown={onKeyDown}
       onChange={onChange}
+      maxLength={LLM_LIMITS_MAX_QUERY_LENGTH}
       placeholder={placeholder}
       className="truncate text-accent-12 font-medium text-[13px] bg-transparent border-none outline-none focus:ring-0 focus:outline-none placeholder:text-accent-12 selection:bg-gray-6 w-full"
       disabled={isProcessing && searchMode !== "allowTypeDuringSearch"}

@@ -1,5 +1,5 @@
 import { env } from "@/lib/env";
-import { ratelimit, requireUser, requireWorkspace, t, withRatelimit } from "@/lib/trpc/trpc";
+import { requireUser, requireWorkspace, t, withLlmAccess } from "@/lib/trpc/trpc";
 import OpenAI from "openai";
 import { z } from "zod";
 import { getStructuredSearchFromLLM } from "./utils";
@@ -13,8 +13,8 @@ const openai = env().OPENAI_API_KEY
 export const llmSearch = t.procedure
   .use(requireUser)
   .use(requireWorkspace)
-  .use(withRatelimit(ratelimit.read))
+  .use(withLlmAccess())
   .input(z.object({ query: z.string(), timestamp: z.number() }))
-  .mutation(async ({ input }) => {
-    return await getStructuredSearchFromLLM(openai, input.query, input.timestamp);
+  .mutation(async ({ input, ctx }) => {
+    return await getStructuredSearchFromLLM(openai, ctx.validatedQuery, input.timestamp);
   });
@@ -19,10 +19,7 @@ export async function getStructuredSearchFromLLM(
     const completion = await openai.beta.chat.completions.parse({
       // Don't change the model only a few models allow structured outputs
       model: "gpt-4o-mini",
-      temperature: 0.2, // Range 0-2, lower = more focused/deterministic
-      top_p: 0.1, // Alternative to temperature, controls randomness
-      frequency_penalty: 0.5, // Range -2 to 2, higher = less repetition
-      presence_penalty: 0.5, // Range -2 to 2, higher = more topic diversity
+      temperature: 0.1, // Range 0-2, lower = more focused/deterministic
       n: 1, // Number of completions to generate
       messages: [
         {

@@ -2,6 +2,7 @@ import { env } from "@/lib/env";
 import { TRPCError, initTRPC } from "@trpc/server";
 import { Ratelimit } from "@unkey/ratelimit";
 import superjson from "superjson";
+import { z } from "zod";
 import type { Context } from "./context";
 
 export const t = initTRPC.context<Context>().create({ transformer: superjson });
@@ -125,3 +126,64 @@ export const withRatelimit = (ratelimit: Ratelimit | undefined) =>
 
     return next();
   });
+
+export const LLM_LIMITS = {
+  MIN_QUERY_LENGTH: 3,
+  MAX_QUERY_LENGTH: 120,
+  MAX_TOKENS_ESTIMATE: 30, // ~4 chars per token for 120 chars
+  RATE_LIMIT: 10,
+  RATE_DURATION: "60s",
+} as const;
+
+const llmRatelimit = env().UNKEY_ROOT_KEY
+  ? new Ratelimit({
+      rootKey: env().UNKEY_ROOT_KEY ?? "",
+      namespace: "trpc_llm",
+      limit: LLM_LIMITS.RATE_LIMIT,
+      duration: LLM_LIMITS.RATE_DURATION,
+    })
+  : null;
+
+const llmQuerySchema = z.object({
+  query: z
+    .string()
+    .trim()
+    .min(LLM_LIMITS.MIN_QUERY_LENGTH, "Query must be at least 3 characters")
+    .max(LLM_LIMITS.MAX_QUERY_LENGTH, "Query cannot exceed 120 characters"),
+});
+
+export const withLlmAccess = () =>
+  t.middleware(async ({ next, ctx, rawInput }) => {
+    if (llmRatelimit) {
+      const response = await llmRatelimit.limit(ctx.user!.id);
+      if (!response.success) {
+        throw new TRPCError({
+          code: "TOO_MANY_REQUESTS",
+          message: `LLM rate limit exceeded. You can make ${LLM_LIMITS.RATE_LIMIT} requests per minute.`,
+        });
+      }
+    }
+
+    let validatedInput: z.infer<typeof llmQuerySchema>;
+    try {
+      validatedInput = llmQuerySchema.parse(rawInput);
+    } catch (error) {
+      if (error instanceof z.ZodError) {
+        const firstError = error.errors[0];
+        throw new TRPCError({
+          code: "BAD_REQUEST",
+          message: firstError?.message || "Invalid query format",
+        });
+      }
+      throw new TRPCError({
+        code: "BAD_REQUEST",
+        message: "Invalid input format",
+      });
+    }
+
+    return next({
+      ctx: {
+        validatedQuery: validatedInput.query,
+      },
+    });
+  });