diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 21833f795e..6b2425725a 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -49,11 +49,9 @@ jobs: run: bun scripts/lint/no-duplicate-deps.ts - name: Check package.json ordering run: bun scripts/format/sort-package-json.ts --check - # TODO: remove continue-on-error once the existing typeof/cast backlog is cleared. - # Pre-push hook already blocks new violations — these report on the backlog. - name: Custom lint rules (typeof guards, raw regex, process.env) run: bun lint:custom - continue-on-error: true + # TODO: remove continue-on-error once the type-cast backlog (130) is cleared. - name: Check unsafe type casts run: bun check:casts:strict continue-on-error: true diff --git a/apps/admin/app/login/page.tsx b/apps/admin/app/login/page.tsx index 4f8b28e9dd..ca2411d391 100644 --- a/apps/admin/app/login/page.tsx +++ b/apps/admin/app/login/page.tsx @@ -12,14 +12,12 @@ import { Input } from '@packrat/web-ui/components/input'; import { Label } from '@packrat/web-ui/components/label'; import { storeToken } from 'admin-app/lib/auth'; import { useCFAccessIdentity } from 'admin-app/lib/cfAccess'; +import { adminEnv } from 'admin-app/lib/env'; import { Package, Shield } from 'lucide-react'; import { useRouter } from 'next/navigation'; import { useEffect, useState } from 'react'; -const API_BASE = process.env.NEXT_PUBLIC_API_URL; -if (!API_BASE) { - throw new Error('NEXT_PUBLIC_API_URL must be set (root .env.local → PUBLIC_API_URL)'); -} +const API_BASE = adminEnv.NEXT_PUBLIC_API_URL; export default function LoginPage() { const router = useRouter(); diff --git a/apps/admin/lib/api.ts b/apps/admin/lib/api.ts index 744e895249..69e03a5801 100644 --- a/apps/admin/lib/api.ts +++ b/apps/admin/lib/api.ts @@ -8,11 +8,9 @@ import { clearToken, getAuthHeader } from './auth'; import { getCFAccessJWT } from './cfAccess'; +import { adminEnv } from './env'; -const API_BASE = process.env.NEXT_PUBLIC_API_URL; -if (!API_BASE) { - throw new Error('NEXT_PUBLIC_API_URL must be set (root .env.local → PUBLIC_API_URL)'); -} +const API_BASE = adminEnv.NEXT_PUBLIC_API_URL; async function buildAuthHeaders(): Promise> { const cfJwt = await getCFAccessJWT(); diff --git a/apps/admin/lib/env.ts b/apps/admin/lib/env.ts new file mode 100644 index 0000000000..5ab4843eed --- /dev/null +++ b/apps/admin/lib/env.ts @@ -0,0 +1,21 @@ +/** + * Admin app environment shim. + * Parses `process.env` once at module load using Zod and exports a typed result. + * + * Adding a new variable: declare it on `adminEnvSchema`, mark it + * `.optional()` unless every caller genuinely requires it. + */ + +import { z } from 'zod'; + +const adminEnvSchema = z.object({ + NEXT_PUBLIC_API_URL: z.string().url(), +}); + +export type AdminEnv = z.infer; + +/** + * Typed env parsed from `process.env` at module load. Throws a Zod + * validation error if any value fails its schema constraint. + */ +export const adminEnv = adminEnvSchema.parse(process.env); diff --git a/apps/expo/utils/format-ai-response.ts b/apps/expo/utils/format-ai-response.ts index 7ade1c0316..12fff3c2ef 100644 --- a/apps/expo/utils/format-ai-response.ts +++ b/apps/expo/utils/format-ai-response.ts @@ -1,3 +1,10 @@ +// ── Formatting regex constants ── +const BULLET_LINE_PATTERN = /^\s*[-*]\s+(.+)$/gm; +const SENTENCE_BOUNDARY_PATTERN = /([.?!])\s*(?=[A-Z])/g; +const BOLD_MARKDOWN_PATTERN = /\*\*(.+?)\*\*/g; +const ITALIC_MARKDOWN_PATTERN = /\*(.+?)\*/g; +const MARKDOWN_HEADER_PATTERN = /^#+\s+(.+)$/gm; + /** * Formats AI responses to improve readability in the chat UI * - Converts markdown lists to plain text with proper spacing @@ -6,17 +13,17 @@ */ export function formatAIResponse(text: string): string { // Convert markdown lists to plain text with emoji bullets - let formatted = text.replace(/^\s*[-*]\s+(.+)$/gm, '• $1'); + let formatted = text.replace(BULLET_LINE_PATTERN, '• $1'); // Add proper spacing after periods, question marks, and exclamation points - formatted = formatted.replace(/([.?!])\s*(?=[A-Z])/g, '$1\n\n'); + formatted = formatted.replace(SENTENCE_BOUNDARY_PATTERN, '$1\n\n'); // Convert markdown emphasis to plain text - formatted = formatted.replace(/\*\*(.+?)\*\*/g, '$1'); - formatted = formatted.replace(/\*(.+?)\*/g, '$1'); + formatted = formatted.replace(BOLD_MARKDOWN_PATTERN, '$1'); + formatted = formatted.replace(ITALIC_MARKDOWN_PATTERN, '$1'); // Handle markdown headers - formatted = formatted.replace(/^#+\s+(.+)$/gm, '$1'); + formatted = formatted.replace(MARKDOWN_HEADER_PATTERN, '$1'); return formatted.trim(); } diff --git a/apps/guides/scripts/enhance-content.ts b/apps/guides/scripts/enhance-content.ts index 438b70b8f6..04f4da387d 100644 --- a/apps/guides/scripts/enhance-content.ts +++ b/apps/guides/scripts/enhance-content.ts @@ -4,6 +4,9 @@ import matter from 'gray-matter'; import path from 'path'; import { type ContentEnhancementOptions, enhanceGuideContent } from '../lib/enhanceGuideContent'; +// ── Script regex constants ── +const TIMESTAMP_UNSAFE_CHARS = /[:.]/g; + // Configuration const CONTENT_DIR = path.join(process.cwd(), 'content/posts'); const BACKUP_DIR = path.join(process.cwd(), 'content/backups'); @@ -41,7 +44,7 @@ function ensureBackupDir(): void { */ function createBackup(filePath: string): string { const fileName = path.basename(filePath); - const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); + const timestamp = new Date().toISOString().replace(TIMESTAMP_UNSAFE_CHARS, '-'); const backupPath = path.join(BACKUP_DIR, `${timestamp}-${fileName}`); fs.copyFileSync(filePath, backupPath); @@ -107,10 +110,8 @@ function getContentFiles(pattern?: string): string[] { .map((file) => path.join(CONTENT_DIR, file)); if (pattern) { - // Escape special regex characters to prevent regex injection - const escapedPattern = pattern.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); - const regex = new RegExp(escapedPattern, 'i'); - return files.filter((file) => regex.test(path.basename(file))); + const lowerPattern = pattern.toLowerCase(); + return files.filter((file) => path.basename(file).toLowerCase().includes(lowerPattern)); } return files; diff --git a/lefthook.yml b/lefthook.yml index 8edf99c2f0..00cb9c150b 100644 --- a/lefthook.yml +++ b/lefthook.yml @@ -14,10 +14,12 @@ pre-push: commands: # Only gates on checks that are currently clean. # Add each check back here as its backlog is cleared. - # Remaining backlog (CI continue-on-error): no-raw-regex, no-raw-process-env, check-type-casts + # Remaining backlog (CI continue-on-error): check-type-casts clean-checks: run: > bun scripts/lint/no-raw-typeof.ts && + bun scripts/lint/no-raw-regex.ts && + bun packages/env/scripts/no-raw-process-env.ts && bun scripts/lint/no-circular-deps.ts && bun scripts/lint/no-duplicate-deps.ts && bun scripts/lint/no-duplicate-guards.ts && diff --git a/packages/analytics/src/core/data-export.ts b/packages/analytics/src/core/data-export.ts index c521a061a3..0a39b0226c 100644 --- a/packages/analytics/src/core/data-export.ts +++ b/packages/analytics/src/core/data-export.ts @@ -11,6 +11,7 @@ import { DBConfig, QUALITY_WEIGHTS } from './constants'; import { SQLFragments } from './query-builder'; const FILE_EXTENSION_PATTERN = /\.\w+$/; +const TIMESTAMP_UNSAFE_CHARS = /[:.]/g; // ── Types ──────────────────────────────────────────────────────────────── @@ -75,7 +76,7 @@ export class DataExporter { mkdirSync(outputDir, { recursive: true }); - const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19); + const timestamp = new Date().toISOString().replace(TIMESTAMP_UNSAFE_CHARS, '-').slice(0, 19); const filename = `packrat_export_${timestamp}.${format}`; const filepath = `${outputDir}/${filename}`; diff --git a/packages/analytics/src/core/entity-resolver.ts b/packages/analytics/src/core/entity-resolver.ts index d3858d7582..42b15bec14 100644 --- a/packages/analytics/src/core/entity-resolver.ts +++ b/packages/analytics/src/core/entity-resolver.ts @@ -25,24 +25,26 @@ const MAX_BLOCK_SIZE = 5000; const URL_QUERY_OR_HASH_PATTERN = /[?#].*$/; const FILE_EXTENSION_PATTERN = /\.\w+$/; const WHITESPACE_SPLIT_PATTERN = /\s+/; +const GENDER_SIZE_WORDS = /\b(men'?s?|women'?s?|unisex|kids?|youth)\b/gi; +const SIZE_ABBREVIATIONS = /\b(xs|s|m|l|xl|xxl|one size)\b/gi; +const NON_ALPHANUMERIC_SPACES = /[^a-z0-9\s]/g; +const MULTIPLE_SPACES = /\s+/g; +const NON_ALPHANUMERIC = /[^a-z0-9]/g; // ── Normalization ───────────────────────────────────────────────────── function normalizeName(name: string): string { return name .toLowerCase() - .replace(/\b(men'?s?|women'?s?|unisex|kids?|youth)\b/gi, '') - .replace(/\b(xs|s|m|l|xl|xxl|one size)\b/gi, '') - .replace(/[^a-z0-9\s]/g, '') - .replace(/\s+/g, ' ') + .replace(GENDER_SIZE_WORDS, '') + .replace(SIZE_ABBREVIATIONS, '') + .replace(NON_ALPHANUMERIC_SPACES, '') + .replace(MULTIPLE_SPACES, ' ') .trim(); } function normalizeBrand(brand: string): string { - return brand - .toLowerCase() - .replace(/[^a-z0-9]/g, '') - .trim(); + return brand.toLowerCase().replace(NON_ALPHANUMERIC, '').trim(); } function canonicalId(brand: string, name: string): string { diff --git a/packages/api/src/routes/knowledgeBase/reader.ts b/packages/api/src/routes/knowledgeBase/reader.ts index 54831a55df..00f8ab5b55 100644 --- a/packages/api/src/routes/knowledgeBase/reader.ts +++ b/packages/api/src/routes/knowledgeBase/reader.ts @@ -3,40 +3,69 @@ import { Elysia, status } from 'elysia'; import { parseHTML } from 'linkedom'; import { z } from 'zod'; +// \u2500\u2500 HTML \u2192 Markdown conversion patterns \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 +const WHITESPACE_RUNS = /\s{2,}/g; +const NEWLINE_RUNS = /\n{2,}/g; +const TAB_CHAR = /\t/g; +const NON_BREAKING_SPACE = /\u00a0/g; +const LEADING_TRAILING_WHITESPACE = /^\s+|\s+$/g; +const BOILERPLATE_FOOTER = /(We appreciate the time and effort.*|Steve)$/gim; +const HTML_H1 = /]*>([\s\S]*?)<\/h1>/gi; +const HTML_H2 = /]*>([\s\S]*?)<\/h2>/gi; +const HTML_H3 = /]*>([\s\S]*?)<\/h3>/gi; +const HTML_H4 = /]*>([\s\S]*?)<\/h4>/gi; +const HTML_H5 = /]*>([\s\S]*?)<\/h5>/gi; +const HTML_H6 = /]*>([\s\S]*?)<\/h6>/gi; +const HTML_LI = /]*>([\s\S]*?)<\/li>/gi; +const HTML_UL = /]*>|<\/ul>/gi; +const HTML_OL = /]*>|<\/ol>/gi; +const HTML_STRONG = /]*>([\s\S]*?)<\/strong>/gi; +const HTML_B = /]*>([\s\S]*?)<\/b>/gi; +const HTML_EM = /]*>([\s\S]*?)<\/em>/gi; +const HTML_I = /]*>([\s\S]*?)<\/i>/gi; +const HTML_A = /]*href=["']([^"']*)["'][^>]*>([\s\S]*?)<\/a>/gi; +const HTML_IMG = /]*alt=["']([^"']*)["'][^>]*>/gi; +const HTML_BR = //gi; +const HTML_P_OPEN = /]*>/gi; +const HTML_P_CLOSE = /<\/p>/gi; +const TRIPLE_PLUS_NEWLINES = /\n{3,}/g; +const LINE_LEADING_WHITESPACE = /^[ \t]+/gm; +const HTML_TAGS = /<[^>]*>/g; + // Utility to clean up text for embeddings function cleanTextForEmbedding(text: string): string { return text - .replace(/\s{2,}/g, ' ') - .replace(/\n{2,}/g, '\n') - .replace(/\t/g, ' ') - .replace(/\u00a0/g, ' ') - .replace(/^\s+|\s+$/g, '') - .replace(/(We appreciate the time and effort.*|Steve)$/gim, '') + .replace(WHITESPACE_RUNS, ' ') + .replace(NEWLINE_RUNS, '\n') + .replace(TAB_CHAR, ' ') + .replace(NON_BREAKING_SPACE, ' ') + .replace(LEADING_TRAILING_WHITESPACE, '') + .replace(BOILERPLATE_FOOTER, '') .trim(); } function htmlToMarkdown(html: string): string { let result = html - .replace(/]*>([\s\S]*?)<\/h1>/gi, '# $1\n') - .replace(/]*>([\s\S]*?)<\/h2>/gi, '## $1\n') - .replace(/]*>([\s\S]*?)<\/h3>/gi, '### $1\n') - .replace(/]*>([\s\S]*?)<\/h4>/gi, '#### $1\n') - .replace(/]*>([\s\S]*?)<\/h5>/gi, '##### $1\n') - .replace(/]*>([\s\S]*?)<\/h6>/gi, '###### $1\n') - .replace(/]*>([\s\S]*?)<\/li>/gi, '- $1\n') - .replace(/]*>|<\/ul>/gi, '') - .replace(/]*>|<\/ol>/gi, '') - .replace(/]*>([\s\S]*?)<\/strong>/gi, '**$1**') - .replace(/]*>([\s\S]*?)<\/b>/gi, '**$1**') - .replace(/]*>([\s\S]*?)<\/em>/gi, '*$1*') - .replace(/]*>([\s\S]*?)<\/i>/gi, '*$1*') - .replace(/]*href=["']([^"']*)["'][^>]*>([\s\S]*?)<\/a>/gi, '[$2]($1)') - .replace(/]*alt=["']([^"']*)["'][^>]*>/gi, '![$1]()') - .replace(//gi, '\n') - .replace(/]*>/gi, '') - .replace(/<\/p>/gi, '\n') - .replace(/\n{3,}/g, '\n\n') - .replace(/^[ \t]+/gm, '') + .replace(HTML_H1, '# $1\n') + .replace(HTML_H2, '## $1\n') + .replace(HTML_H3, '### $1\n') + .replace(HTML_H4, '#### $1\n') + .replace(HTML_H5, '##### $1\n') + .replace(HTML_H6, '###### $1\n') + .replace(HTML_LI, '- $1\n') + .replace(HTML_UL, '') + .replace(HTML_OL, '') + .replace(HTML_STRONG, '**$1**') + .replace(HTML_B, '**$1**') + .replace(HTML_EM, '*$1*') + .replace(HTML_I, '*$1*') + .replace(HTML_A, '[$2]($1)') + .replace(HTML_IMG, '![$1]()') + .replace(HTML_BR, '\n') + .replace(HTML_P_OPEN, '') + .replace(HTML_P_CLOSE, '\n') + .replace(TRIPLE_PLUS_NEWLINES, '\n\n') + .replace(LINE_LEADING_WHITESPACE, '') .trim(); // Strip any remaining HTML tags in multiple passes to avoid incomplete @@ -45,7 +74,7 @@ function htmlToMarkdown(html: string): string { let iterations = 0; while (result !== prev && iterations++ < 10) { prev = result; - result = result.replace(/<[^>]*>/g, ''); + result = result.replace(HTML_TAGS, ''); } return result; } diff --git a/packages/api/src/routes/packTemplates/index.ts b/packages/api/src/routes/packTemplates/index.ts index 65677cf6e1..5a65841180 100644 --- a/packages/api/src/routes/packTemplates/index.ts +++ b/packages/api/src/routes/packTemplates/index.ts @@ -24,6 +24,7 @@ import { z } from 'zod'; // --------------------------------------------------------------------------- const QUERY_STRIP_RE = /[?&].*$/; +const STRIP_HYPHENS = /-/g; function generateContentIdFromUrl(url: string): string { const normalizedUrl = url.toLowerCase().replace(QUERY_STRIP_RE, ''); @@ -323,7 +324,7 @@ export const packTemplatesRoutes = new Elysia({ prefix: '/pack-templates' }) : { items: [] as never[] }; const now = new Date(); - const templateId = `pt_${crypto.randomUUID().replace(/-/g, '').slice(0, 21)}`; + const templateId = `pt_${crypto.randomUUID().replace(STRIP_HYPHENS, '').slice(0, 21)}`; const { newTemplate, insertedItems } = await db.transaction(async (tx) => { const [createdTemplate] = await tx @@ -348,7 +349,7 @@ export const packTemplatesRoutes = new Elysia({ prefix: '/pack-templates' }) const itemRecords = analysis.items.map((detected, index) => { const catalogMatches = batchResult.items[index] ?? []; const bestMatch = catalogMatches[0]; - const itemId = `pti_${crypto.randomUUID().replace(/-/g, '').slice(0, 21)}`; + const itemId = `pti_${crypto.randomUUID().replace(STRIP_HYPHENS, '').slice(0, 21)}`; return { id: itemId, diff --git a/packages/api/src/routes/trailConditions/reports.ts b/packages/api/src/routes/trailConditions/reports.ts index e6f3b3cfea..47b854582c 100644 --- a/packages/api/src/routes/trailConditions/reports.ts +++ b/packages/api/src/routes/trailConditions/reports.ts @@ -6,6 +6,11 @@ import { and, desc, eq, gte, ilike, type SQL } from 'drizzle-orm'; import { Elysia, status } from 'elysia'; import { z } from 'zod'; +// ── LIKE-clause escape patterns ─────────────────────────────────────── +const LIKE_ESCAPE_BACKSLASH = /\\/g; +const LIKE_ESCAPE_PERCENT = /%/g; +const LIKE_ESCAPE_UNDERSCORE = /_/g; + const CreateReportRequestSchema = z.object({ id: z.string().describe('Client-generated report ID'), trailName: z.string().min(1), @@ -57,9 +62,9 @@ export const trailConditionRoutes = new Elysia() const normalized = trailName.trim(); if (normalized.length > 0) { const escaped = normalized - .replace(/\\/g, '\\\\') - .replace(/%/g, '\\%') - .replace(/_/g, '\\_'); + .replace(LIKE_ESCAPE_BACKSLASH, '\\\\') + .replace(LIKE_ESCAPE_PERCENT, '\\%') + .replace(LIKE_ESCAPE_UNDERSCORE, '\\_'); conditions.push(ilike(trailConditionReports.trailName, `%${escaped}%`)); } } diff --git a/packages/api/src/routes/wildlife/index.ts b/packages/api/src/routes/wildlife/index.ts index 85dd534004..13af169724 100644 --- a/packages/api/src/routes/wildlife/index.ts +++ b/packages/api/src/routes/wildlife/index.ts @@ -6,6 +6,10 @@ import { getPresignedUrl } from '@packrat/api/utils/getPresignedUrl'; import { Elysia, status } from 'elysia'; import { z } from 'zod'; +// ── Slug normalization patterns ─────────────────────────────────────── +const SPACES_AND_DOTS = /[\s.]+/g; +const NON_SLUG_CHARS = /[^a-z0-9-]/g; + const IdentifyRequestSchema = z.object({ image: z.string().describe('Uploaded image key in R2'), }); @@ -55,10 +59,7 @@ export const wildlifeRoutes = new Elysia({ prefix: '/wildlife' }).use(authPlugin // Map AI results with stable IDs derived from scientific name const slugify = (name: string) => - name - .toLowerCase() - .replaceAll(/[\s.]+/g, '-') - .replaceAll(/[^a-z0-9-]/g, ''); + name.toLowerCase().replaceAll(SPACES_AND_DOTS, '-').replaceAll(NON_SLUG_CHARS, ''); const results = identification.results.map((r, index) => { const id = r.scientificName?.trim() diff --git a/packages/api/src/services/embeddingService.ts b/packages/api/src/services/embeddingService.ts index 997ff1dd06..d3223f0e0c 100644 --- a/packages/api/src/services/embeddingService.ts +++ b/packages/api/src/services/embeddingService.ts @@ -3,6 +3,9 @@ import { DEFAULT_MODELS } from '@packrat/api/utils/ai/models'; import { type AIProvider, createAIProvider } from '@packrat/api/utils/ai/provider'; import { embed, embedMany } from 'ai'; +// ── Embedding text normalization ────────────────────────────────────── +const NEWLINE = /\n/g; + type GenerateEmbeddingBaseParams = { openAiApiKey: string; provider: AIProvider; @@ -28,7 +31,7 @@ export const generateEmbedding = async ( const aiProvider = createAIProvider(providerConfig); // OpenAI recommends replacing newlines with spaces for best results - const input = value.replace(/\n/g, ' '); + const input = value.replace(NEWLINE, ' '); const { embedding } = await embed({ model: aiProvider.embedding(DEFAULT_MODELS.OPENAI_EMBEDDING), @@ -48,7 +51,7 @@ export const generateManyEmbeddings = async ( const { values, ...providerConfig } = params; // Filter out empty/whitespace-only strings - const cleanValues = values.map((v) => v?.replace(/\n/g, ' ').trim()).filter(Boolean); + const cleanValues = values.map((v) => v?.replace(NEWLINE, ' ').trim()).filter(Boolean); if (cleanValues.length === 0) { return []; } diff --git a/packages/api/src/services/executeSqlAiTool.ts b/packages/api/src/services/executeSqlAiTool.ts index f9434c085a..c171685b84 100644 --- a/packages/api/src/services/executeSqlAiTool.ts +++ b/packages/api/src/services/executeSqlAiTool.ts @@ -1,6 +1,9 @@ import { sql } from 'drizzle-orm'; import { createReadOnlyDb } from '../db'; +// ── SQL complexity patterns ─────────────────────────────────────────── +const SQL_JOIN_KEYWORD = /\bjoin\b/g; + interface Params { query: string; limit: number; @@ -76,7 +79,7 @@ function isReadOnlyQuery(query: string): boolean { function validateQueryComplexity(query: string): { valid: boolean; error?: string } { const normalizedQuery = query.toLowerCase(); - const joinCount = (normalizedQuery.match(/\bjoin\b/g) || []).length; + const joinCount = (normalizedQuery.match(SQL_JOIN_KEYWORD) || []).length; if (joinCount > 5) { return { valid: false, error: 'Query too complex: maximum 5 joins allowed' }; } diff --git a/packages/api/src/services/r2-bucket.ts b/packages/api/src/services/r2-bucket.ts index 869b43a749..c8d1bbae84 100644 --- a/packages/api/src/services/r2-bucket.ts +++ b/packages/api/src/services/r2-bucket.ts @@ -15,6 +15,9 @@ import { import type { Env } from '@packrat/api/types/env'; import { isDate, isFunction, isNumber, isObject, isString } from '@packrat/guards'; +// ── ETag normalization ──────────────────────────────────────────────── +const STRIP_DOUBLE_QUOTES = /"/g; + // Define our own types to avoid conflicts with Cloudflare Workers types interface R2HTTPMetadata { contentType?: string; @@ -614,7 +617,7 @@ export class R2BucketService { key, version: isString(response.VersionId) ? response.VersionId : '', size: isNumber(response.ContentLength) ? response.ContentLength : 0, - etag: isString(response.ETag) ? response.ETag.replace(/"/g, '') : '', + etag: isString(response.ETag) ? response.ETag.replace(STRIP_DOUBLE_QUOTES, '') : '', httpEtag: isString(response.ETag) ? response.ETag : '', checksums: this.createChecksums(response), uploaded: toUploaded(response.LastModified), diff --git a/packages/api/src/utils/csv-utils.ts b/packages/api/src/utils/csv-utils.ts index 9bdf8e6d3f..61282eaa65 100644 --- a/packages/api/src/utils/csv-utils.ts +++ b/packages/api/src/utils/csv-utils.ts @@ -2,6 +2,28 @@ import { isString } from '@packrat/guards'; import type { NewCatalogItem } from '../db/schema'; import { AvailabilitySchema, WeightUnitSchema } from '../types'; +// ── CSV sanitization regex constants ── +const NEWLINE_CHARS = /[\r\n]+/g; +const SINGLE_QUOTE_TO_DOUBLE = /'/g; +const WRAPPING_QUOTES = /^"|"$/g; +const PYTHON_NONE = /\bNone\b/g; +const PYTHON_TRUE = /\bTrue\b/g; +const PYTHON_FALSE = /\bFalse\b/g; +const CURLY_SINGLE_QUOTES = /[‘’‛‹›]/g; +const CURLY_DOUBLE_QUOTES = /[“”„‟«»]/g; +const BACKTICK_CHARS = /[`]/g; +const UNQUOTED_OBJECT_KEY = /([{,]\s*)'([^']+?)'\s*:/g; +const SINGLE_QUOTED_VALUE = /:\s*'(.*?)'(?=\s*[},])/g; +const ESCAPE_BACKSLASHES = /\\/g; +const ESCAPE_DOUBLE_QUOTES = /"/g; +const CONTROL_CHARS = /\\n|\\r|\\b|\\t|\\f|\r?\n|\r|\b|\t|\f/g; +const UNICODE_LINE_SEPARATORS = /\u2028|\u2029/g; +const HEX_ESCAPE = /\\x([0-9A-Fa-f]{2})/g; +const LONE_BACKSLASH = /([^\\])\\(?![\\/"'bfnrtu])/g; +const TRAILING_COMMA = /,\s*([}\]])/g; +const ESCAPED_DOUBLE_QUOTE = /\\"/g; +const NON_NUMERIC_PRICE = /[^0-9.]/g; + export function mapCsvRowToItem({ values, fieldMap, @@ -13,7 +35,7 @@ export function mapCsvRowToItem({ // --- Optional Scalars --- item.description = fieldMap.description !== undefined - ? values[fieldMap.description]?.replace(/[\r\n]+/g, ' ').trim() + ? values[fieldMap.description]?.replace(NEWLINE_CHARS, ' ').trim() : undefined; const name = fieldMap.name !== undefined ? values[fieldMap.name]?.trim() : undefined; @@ -93,7 +115,7 @@ export function mapCsvRowToItem({ item.variants = JSON.parse(val); } catch { try { - item.variants = JSON.parse(val.replace(/'/g, '"')); + item.variants = JSON.parse(val.replace(SINGLE_QUOTE_TO_DOUBLE, '"')); } catch { item.variants = []; } @@ -165,7 +187,7 @@ export function mapCsvRowToItem({ for (const field of stringFields) { const index = fieldMap[field]; if (index !== undefined && values[index]) { - item[field] = values[index].replace(/^"|"$/g, '').trim(); + item[field] = values[index].replace(WRAPPING_QUOTES, '').trim(); } } @@ -174,7 +196,7 @@ export function mapCsvRowToItem({ const availabilityValue = values[fieldMap.availability]; if (availabilityValue) { const parsedAvailability = AvailabilitySchema.safeParse( - availabilityValue.replace(/^"|"$/g, '').trim(), + availabilityValue.replace(WRAPPING_QUOTES, '').trim(), ); if (parsedAvailability.success) { item.availability = parsedAvailability.data; @@ -221,36 +243,36 @@ export function normalizeJsonString(value: string): string { .trim() // Replace Python-style null/booleans with JS equivalents - .replace(/\bNone\b/g, 'null') - .replace(/\bTrue\b/g, 'true') - .replace(/\bFalse\b/g, 'false') + .replace(PYTHON_NONE, 'null') + .replace(PYTHON_TRUE, 'true') + .replace(PYTHON_FALSE, 'false') // Normalize smart/special quotes to standard quotes - .replace(/[‘’‛‹›]/g, "'") - .replace(/[“”„‟«»]/g, '"') - .replace(/[`]/g, '') + .replace(CURLY_SINGLE_QUOTES, "'") + .replace(CURLY_DOUBLE_QUOTES, '"') + .replace(BACKTICK_CHARS, '') // Convert object keys from 'key': to "key": - .replace(/([{,]\s*)'([^']+?)'\s*:/g, '$1"$2":') + .replace(UNQUOTED_OBJECT_KEY, '$1"$2":') // Convert string values from 'value' to "escaped value" - .replace(/:\s*'(.*?)'(?=\s*[},])/g, (_, val) => { + .replace(SINGLE_QUOTED_VALUE, (_, val) => { const escaped = val - .replace(/\\/g, '\\\\') // Escape backslashes - .replace(/"/g, '\\"') // Escape double quotes - .replace(/\\n|\\r|\\b|\\t|\\f|\r?\n|\r|\b|\t|\f/g, '') // Remove newlines/control chars - .replace(/\u2028|\u2029/g, ''); // Remove special Unicode line separators + .replace(ESCAPE_BACKSLASHES, '\\\\') // Escape backslashes + .replace(ESCAPE_DOUBLE_QUOTES, '\\"') // Escape double quotes + .replace(CONTROL_CHARS, '') // Remove newlines/control chars + .replace(UNICODE_LINE_SEPARATORS, ''); // Remove special Unicode line separators return `: "${escaped}"`; }) // Decode \xNN hex escapes to characters - .replace(/\\x([0-9A-Fa-f]{2})/g, (_, hex) => String.fromCharCode(parseInt(hex, 16))) + .replace(HEX_ESCAPE, (_, hex) => String.fromCharCode(parseInt(hex, 16))) // Escape lone backslashes (e.g., \ not followed by valid escape) - .replace(/([^\\])\\(?![\\/"'bfnrtu])/g, '$1\\\\') + .replace(LONE_BACKSLASH, '$1\\\\') // Remove trailing commas before closing braces/brackets - .replace(/,\s*([}\]])/g, '$1') + .replace(TRAILING_COMMA, '$1') ); } @@ -280,7 +302,7 @@ export function parseFaqs(input: string): Array<{ question: string; answer: stri // Remove outer quotes let cleaned = input.trim(); if (cleaned.startsWith('"') && cleaned.endsWith('"')) { - cleaned = cleaned.slice(1, -1).replace(/\\"/g, '"'); + cleaned = cleaned.slice(1, -1).replace(ESCAPED_DOUBLE_QUOTE, '"'); } // Replace smart quotes @@ -305,6 +327,6 @@ export function parseFaqs(input: string): Array<{ question: string; answer: stri export function parsePrice(priceStr: string): number | null { if (!priceStr) return null; - const price = parseFloat(priceStr.replace(/[^0-9.]/g, '')); + const price = parseFloat(priceStr.replace(NON_NUMERIC_PRICE, '')); return Number.isNaN(price) ? null : price; } diff --git a/packages/api/src/utils/format-ai-response.ts b/packages/api/src/utils/format-ai-response.ts index 7ade1c0316..12fff3c2ef 100644 --- a/packages/api/src/utils/format-ai-response.ts +++ b/packages/api/src/utils/format-ai-response.ts @@ -1,3 +1,10 @@ +// ── Formatting regex constants ── +const BULLET_LINE_PATTERN = /^\s*[-*]\s+(.+)$/gm; +const SENTENCE_BOUNDARY_PATTERN = /([.?!])\s*(?=[A-Z])/g; +const BOLD_MARKDOWN_PATTERN = /\*\*(.+?)\*\*/g; +const ITALIC_MARKDOWN_PATTERN = /\*(.+?)\*/g; +const MARKDOWN_HEADER_PATTERN = /^#+\s+(.+)$/gm; + /** * Formats AI responses to improve readability in the chat UI * - Converts markdown lists to plain text with proper spacing @@ -6,17 +13,17 @@ */ export function formatAIResponse(text: string): string { // Convert markdown lists to plain text with emoji bullets - let formatted = text.replace(/^\s*[-*]\s+(.+)$/gm, '• $1'); + let formatted = text.replace(BULLET_LINE_PATTERN, '• $1'); // Add proper spacing after periods, question marks, and exclamation points - formatted = formatted.replace(/([.?!])\s*(?=[A-Z])/g, '$1\n\n'); + formatted = formatted.replace(SENTENCE_BOUNDARY_PATTERN, '$1\n\n'); // Convert markdown emphasis to plain text - formatted = formatted.replace(/\*\*(.+?)\*\*/g, '$1'); - formatted = formatted.replace(/\*(.+?)\*/g, '$1'); + formatted = formatted.replace(BOLD_MARKDOWN_PATTERN, '$1'); + formatted = formatted.replace(ITALIC_MARKDOWN_PATTERN, '$1'); // Handle markdown headers - formatted = formatted.replace(/^#+\s+(.+)$/gm, '$1'); + formatted = formatted.replace(MARKDOWN_HEADER_PATTERN, '$1'); return formatted.trim(); } diff --git a/packages/api/test/setup.ts b/packages/api/test/setup.ts index be36105070..02f9a53148 100644 --- a/packages/api/test/setup.ts +++ b/packages/api/test/setup.ts @@ -5,6 +5,9 @@ import { afterAll, beforeAll, beforeEach, vi } from 'vitest'; import * as schema from '../src/db/schema'; import { clearCurrentTestUsers } from './utils/test-helpers'; +// ── Setup regex constants ── +const MARKDOWN_EXT_PATTERN = /\.(mdx?|md)$/; + // Route @neondatabase/serverless through the local wsproxy (docker-compose.test.yml), // so tests use the same driver as production against Docker Postgres. // wsproxy upgrades on /v1 and reads the target from ?address= (resolved inside the @@ -439,7 +442,7 @@ vi.mock('@packrat/api/services/r2-bucket', () => { put: vi.fn(async (key: string, _value: unknown, _options?: unknown) => { return createMockR2Object({ key, - title: key.replace(/\.(mdx?|md)$/, ''), + title: key.replace(MARKDOWN_EXT_PATTERN, ''), category: 'general', categories: ['general'], description: 'Mock guide', diff --git a/packages/env/scripts/no-raw-process-env.ts b/packages/env/scripts/no-raw-process-env.ts index 2ea22c1067..a9a567c9bc 100644 --- a/packages/env/scripts/no-raw-process-env.ts +++ b/packages/env/scripts/no-raw-process-env.ts @@ -40,6 +40,18 @@ const ALLOWED: string[] = [ 'packages/api/container_src/server.ts', 'packages/analytics/test/core/env.test.ts', 'apps/expo/app.config.ts', + // The check script itself (process.env appears in string literals / regex) + 'packages/env/scripts/no-raw-process-env.ts', + // Startup script — passes process.env to a validator; correct pattern for Node scripts + 'packages/api/scripts/validate-cloudflare-api-env.ts', + // Analytics env shim — this IS the shim, same as packages/env/src/*.ts above + 'packages/analytics/src/core/env.ts', + // One-off sync script, not app code + 'apps/guides/scripts/sync-to-r2.ts', + // Test files that mutate process.env to exercise env-validation logic + 'packages/api/src/utils/__tests__/', + // Admin env shim — parses process.env once at module load + 'apps/admin/lib/env.ts', ]; // Directories to skip entirely diff --git a/packages/mcp/src/tools/packs.ts b/packages/mcp/src/tools/packs.ts index 973290c7ef..38608be5f2 100644 --- a/packages/mcp/src/tools/packs.ts +++ b/packages/mcp/src/tools/packs.ts @@ -3,6 +3,9 @@ import { err, ok } from '../client'; import { ItemCategory, PackCategory } from '../enums'; import type { AgentContext } from '../types'; +// ── Tool regex constants ── +const STRIP_HYPHENS = /-/g; + interface PackDetailResponse { items?: Array<{ name: string; @@ -84,7 +87,7 @@ export function registerPackTools(agent: AgentContext): void { }, async ({ name, description, category, is_public, tags }) => { try { - const id = `p_${crypto.randomUUID().replace(/-/g, '').slice(0, 12)}`; + const id = `p_${crypto.randomUUID().replace(STRIP_HYPHENS, '').slice(0, 12)}`; const now = new Date().toISOString(); const data = await agent.api.post('/packs', { id, @@ -192,7 +195,7 @@ export function registerPackTools(agent: AgentContext): void { notes, }) => { try { - const id = `i_${crypto.randomUUID().replace(/-/g, '').slice(0, 12)}`; + const id = `i_${crypto.randomUUID().replace(STRIP_HYPHENS, '').slice(0, 12)}`; const now = new Date().toISOString(); const data = await agent.api.post(`/packs/${pack_id}/items`, { id, diff --git a/packages/mcp/src/tools/trail-conditions.ts b/packages/mcp/src/tools/trail-conditions.ts index d25fdb516f..111f9bd13f 100644 --- a/packages/mcp/src/tools/trail-conditions.ts +++ b/packages/mcp/src/tools/trail-conditions.ts @@ -3,6 +3,9 @@ import { err, ok } from '../client'; import { CrossingDifficulty, TrailCondition, TrailSurface } from '../enums'; import type { AgentContext } from '../types'; +// ── Tool regex constants ── +const STRIP_HYPHENS = /-/g; + export function registerTrailConditionTools(agent: AgentContext): void { // ── Get trail conditions ────────────────────────────────────────────────── @@ -84,7 +87,7 @@ export function registerTrailConditionTools(agent: AgentContext): void { notes, }) => { try { - const id = `tcr_${crypto.randomUUID().replace(/-/g, '').slice(0, 12)}`; + const id = `tcr_${crypto.randomUUID().replace(STRIP_HYPHENS, '').slice(0, 12)}`; const now = new Date().toISOString(); const data = await agent.api.post('/trail-conditions', { id, diff --git a/packages/mcp/src/tools/trips.ts b/packages/mcp/src/tools/trips.ts index fef3488661..adbaa46483 100644 --- a/packages/mcp/src/tools/trips.ts +++ b/packages/mcp/src/tools/trips.ts @@ -2,6 +2,9 @@ import { z } from 'zod'; import { err, ok } from '../client'; import type { AgentContext } from '../types'; +// ── Tool regex constants ── +const STRIP_HYPHENS = /-/g; + export function registerTripTools(agent: AgentContext): void { // ── List trips ──────────────────────────────────────────────────────────── @@ -88,7 +91,7 @@ export function registerTripTools(agent: AgentContext): void { pack_id, }) => { try { - const id = `t_${crypto.randomUUID().replace(/-/g, '').slice(0, 12)}`; + const id = `t_${crypto.randomUUID().replace(STRIP_HYPHENS, '').slice(0, 12)}`; const now = new Date().toISOString(); const data = await agent.api.post('/trips', { id, diff --git a/packages/web-ui/src/components/chart.tsx b/packages/web-ui/src/components/chart.tsx index 685802ac85..75e4db74ff 100644 --- a/packages/web-ui/src/components/chart.tsx +++ b/packages/web-ui/src/components/chart.tsx @@ -8,6 +8,9 @@ import { cn } from '../lib/utils'; // Format: { THEME_NAME: CSS_SELECTOR } const THEMES = { light: '', dark: '.dark' } as const; +// ── Component regex constants ── +const STRIP_COLONS = /:/g; + export type ChartConfig = { [k in string]: { label?: React.ReactNode; @@ -42,7 +45,7 @@ const ChartContainer = React.forwardRef< } >(({ id, className, children, config, ...props }, ref) => { const uniqueId = React.useId(); - const chartId = `chart-${id || uniqueId.replace(/:/g, '')}`; + const chartId = `chart-${id || uniqueId.replace(STRIP_COLONS, '')}`; return ( diff --git a/scripts/lint/no-raw-regex.ts b/scripts/lint/no-raw-regex.ts index 646556f84a..26bc3f8c82 100644 --- a/scripts/lint/no-raw-regex.ts +++ b/scripts/lint/no-raw-regex.ts @@ -41,8 +41,12 @@ const REGEX_PATTERN = const EXCLUDED_DIRS = new Set(['node_modules', 'dist', 'build']); -// The reference implementation is allowed to use raw regex -const EXCLUDED_FILES = new Set(['packages/analytics/src/core/enrichment.ts']); +// Files explicitly allowed to use raw regex. +// alltrails.ts: builds regex from a dynamic `property` argument — can't be a static constant. +const EXCLUDED_FILES = new Set([ + 'packages/analytics/src/core/enrichment.ts', + 'packages/api/src/routes/alltrails.ts', +]); function isTargetFile(name: string): boolean { return /\.(ts|tsx|cts|mts)$/.test(name) && !/\.(test|spec)\.(ts|tsx|cts|mts)$/.test(name);