diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d9e1f6fe7a7..b1c9a076cf0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -83,6 +83,21 @@ jobs: working-directory: apps/desktop run: bun run install:deps + # `bun install --ignore-scripts` skips postinstalls for safety; the + # @vscode/ripgrep package uses its postinstall to download the + # platform-specific ripgrep binary. workspace-fs tests that exercise + # the streaming / multiline paths need that binary, so run the + # postinstall explicitly for just this package. + - name: Download bundled ripgrep binary + run: | + rg_pkg=$(ls -d node_modules/.bun/@vscode+ripgrep@*/node_modules/@vscode/ripgrep | head -1) + if [ -z "$rg_pkg" ]; then + echo "::error::@vscode/ripgrep not found in node_modules" + exit 1 + fi + node "$rg_pkg/lib/postinstall.js" + ls -la "$rg_pkg/bin" || true + - name: Test env: RELAY_URL: https://relay.superset.sh diff --git a/apps/desktop/src/lib/trpc/routers/filesystem/index.ts b/apps/desktop/src/lib/trpc/routers/filesystem/index.ts index fa319bf02d9..698ff4b9cc2 100644 --- a/apps/desktop/src/lib/trpc/routers/filesystem/index.ts +++ b/apps/desktop/src/lib/trpc/routers/filesystem/index.ts @@ -1,3 +1,4 @@ +import type { FsContentMatch } from "@superset/workspace-fs/host"; import { toErrorMessage, WorkspaceFsPathError, @@ -130,6 +131,9 @@ const searchContentInputSchema = z.object({ limit: z.number().optional(), isRegex: z.boolean().optional(), caseSensitive: z.boolean().optional(), + wholeWord: z.boolean().optional(), + multiline: z.boolean().optional(), + scopeId: z.string().optional(), }); const replaceContentInputSchema = z.object({ @@ -141,6 +145,8 @@ const replaceContentInputSchema = z.object({ excludePattern: z.string().optional(), isRegex: z.boolean().optional(), caseSensitive: z.boolean().optional(), + wholeWord: z.boolean().optional(), + multiline: z.boolean().optional(), paths: z.array(z.string()).optional(), }); @@ -388,6 +394,9 @@ export const createFilesystemRouter = () => { limit: input.limit, isRegex: input.isRegex, caseSensitive: input.caseSensitive, + wholeWord: input.wholeWord, + multiline: input.multiline, + scopeId: input.scopeId, }); }); }), @@ -415,11 +424,90 @@ export const createFilesystemRouter = () => { excludePattern: input.excludePattern, isRegex: input.isRegex, caseSensitive: input.caseSensitive, + wholeWord: input.wholeWord, + multiline: input.multiline, paths: input.paths, }); }); }), + searchContentStream: publicProcedure + .input( + z.object({ + workspaceId: z.string(), + query: z.string(), + includeHidden: z.boolean().optional(), + includePattern: z.string().optional(), + excludePattern: z.string().optional(), + limit: z.number().optional(), + isRegex: z.boolean().optional(), + caseSensitive: z.boolean().optional(), + wholeWord: z.boolean().optional(), + multiline: z.boolean().optional(), + scopeId: z.string().optional(), + }), + ) + .subscription(({ input }) => { + return observable<{ match: FsContentMatch }>((emit) => { + const trimmed = input.query.trim(); + if (!trimmed) { + emit.complete(); + return () => {}; + } + + const service = getServiceForWorkspace(input.workspaceId); + let isDisposed = false; + const stream = service.searchContentStream({ + ...input, + query: trimmed, + }); + const iterator = stream[Symbol.asyncIterator](); + + const runCleanup = () => { + isDisposed = true; + void iterator.return?.().catch((error) => { + console.error( + "[filesystem/searchContentStream] Cleanup failed:", + { workspaceId: input.workspaceId, error }, + ); + }); + }; + + void (async () => { + try { + while (!isDisposed) { + const next = await iterator.next(); + if (next.done || isDisposed) { + if (!isDisposed) emit.complete(); + return; + } + try { + emit.next(next.value); + } catch (error) { + if (isClosedStreamError(error)) { + runCleanup(); + return; + } + throw error; + } + } + } catch (error) { + if (!isDisposed) { + try { + emit.error(error); + } catch { + // subscriber already gone; nothing else to do. + } + } + } + })(); + + return () => { + runCleanup(); + }; + }); + }), + watchPath: publicProcedure .input( z.object({ diff --git a/apps/desktop/src/lib/trpc/routers/workspace-fs-service.ts b/apps/desktop/src/lib/trpc/routers/workspace-fs-service.ts index 2d223735c73..a5fdc559ee7 100644 --- a/apps/desktop/src/lib/trpc/routers/workspace-fs-service.ts +++ b/apps/desktop/src/lib/trpc/routers/workspace-fs-service.ts @@ -1,4 +1,4 @@ -import { execFile } from "node:child_process"; +import { execFile, spawn } from "node:child_process"; import path from "node:path"; import { promisify } from "node:util"; import { @@ -31,6 +31,57 @@ const rgExecutablePath = bundledRgPath.includes( ) : bundledRgPath; +async function* spawnBundledRipgrep( + args: string[], + options: { cwd: string; signal?: AbortSignal }, +): AsyncIterable { + // Streaming counterpart to `runRipgrep`: feeds searchContentStream so the + // Search tab can render matches as ripgrep emits them. We SIGTERM the + // child on abort instead of relying on `spawn`'s `signal` option so we + // can drain cleanly without propagating an AbortError into the generator. + const child = spawn(rgExecutablePath, args, { + cwd: options.cwd, + windowsHide: true, + }); + + const onAbort = () => { + if (!child.killed) child.kill("SIGTERM"); + }; + const signal = options.signal; + if (signal) { + if (signal.aborted) { + onAbort(); + } else { + signal.addEventListener("abort", onAbort, { once: true }); + } + } + + try { + child.stdout.setEncoding("utf8"); + for await (const chunk of child.stdout as AsyncIterable) { + if (signal?.aborted) return; + yield chunk; + } + await new Promise((resolve, reject) => { + child.once("error", reject); + child.once("close", (code) => { + if (signal?.aborted || code === null || code === 0 || code === 1) { + resolve(); + } else { + const err = new Error(`ripgrep exited with code ${code}`) as Error & { + code?: number; + }; + err.code = code; + reject(err); + } + }); + }); + } finally { + signal?.removeEventListener("abort", onAbort); + if (!child.killed) child.kill("SIGTERM"); + } +} + const sharedHostServiceOptions = { trashItem: async (absolutePath: string) => { await shell.trashItem(absolutePath); @@ -50,6 +101,7 @@ const sharedHostServiceOptions = { }); return { stdout: result.stdout }; }, + spawnRipgrep: spawnBundledRipgrep, }; export function resolveWorkspaceRootPath(workspaceId: string): string { diff --git a/apps/desktop/src/renderer/screens/main/components/WorkspaceView/RightSidebar/SearchView/SearchView.tsx b/apps/desktop/src/renderer/screens/main/components/WorkspaceView/RightSidebar/SearchView/SearchView.tsx index 56c061f815e..2c0e4e0b7b4 100644 --- a/apps/desktop/src/renderer/screens/main/components/WorkspaceView/RightSidebar/SearchView/SearchView.tsx +++ b/apps/desktop/src/renderer/screens/main/components/WorkspaceView/RightSidebar/SearchView/SearchView.tsx @@ -213,6 +213,8 @@ export function SearchView({ const [replaceOpen, setReplaceOpen] = useState(false); const [isRegex, setIsRegex] = useState(false); const [caseSensitive, setCaseSensitive] = useState(false); + const [wholeWord, setWholeWord] = useState(false); + const [multiline, setMultiline] = useState(false); const [openGroups, setOpenGroups] = useState>({}); const [openFolders, setOpenFolders] = useState>({}); const [ignoredMatchIds, setIgnoredMatchIds] = useState>( @@ -263,6 +265,12 @@ export function SearchView({ excludePattern, isRegex, caseSensitive, + wholeWord, + // `multiline` only meaningfully applies to regex patterns in VSCode, + // so we drop it entirely when the user isn't in regex mode. This + // lets the regex toggle control visibility and avoids wasted + // ripgrep calls with `--multiline` on fixed strings. + multiline: isRegex && multiline, enabled: isActive, }); @@ -282,7 +290,7 @@ export function SearchView({ () => collectFolderPaths(treeResults), [treeResults], ); - const searchResultResetKey = `${query}\u0000${includePattern}\u0000${excludePattern}\u0000${isRegex}\u0000${caseSensitive}`; + const searchResultResetKey = `${query}\u0000${includePattern}\u0000${excludePattern}\u0000${isRegex}\u0000${caseSensitive}\u0000${wholeWord}\u0000${multiline}`; const copySupersetLink = useCallback( ({ @@ -412,7 +420,14 @@ export function SearchView({ validationError === null && !replaceMutation.isPending && !writeFileMutation.isPending; - const canInlineReplace = hasQuery && validationError === null; + // The per-match inline replace applies the regex line by line, so a + // multiline pattern (e.g. `foo\nbar`) that matched across newlines can + // never be applied by that code path — it would simply report the hit + // as out-of-date. "Replace all" still works because the backend replaces + // against the full file content. Disable inline replace in that case so + // users don't silently hit the stale-match error. + const canInlineReplace = + hasQuery && validationError === null && !(isRegex && multiline); const runReplace = useCallback( async (paths?: string[]) => { @@ -430,6 +445,8 @@ export function SearchView({ excludePattern, isRegex, caseSensitive, + wholeWord, + multiline: isRegex && multiline, paths, }); @@ -464,11 +481,13 @@ export function SearchView({ excludePattern, includePattern, isRegex, + multiline, query, replacement, replaceMutation, utils.filesystem.searchContent, validationError, + wholeWord, workspaceId, ], ); @@ -503,6 +522,8 @@ export function SearchView({ line: lineMatch.line, isRegex, caseSensitive, + wholeWord, + multiline: isRegex && multiline, }, ); @@ -552,10 +573,12 @@ export function SearchView({ [ caseSensitive, isRegex, + multiline, query, replacement, utils, validationError, + wholeWord, workspaceId, writeFileMutation, ], @@ -629,6 +652,8 @@ export function SearchView({ excludePattern={excludePattern} isRegex={isRegex} caseSensitive={caseSensitive} + wholeWord={wholeWord} + multiline={multiline} canReplaceAll={canReplaceAll && totalMatches > 0} isReplacing={replaceMutation.isPending || writeFileMutation.isPending} onQueryChange={setQuery} @@ -642,6 +667,8 @@ export function SearchView({ onToggleReplace={() => setReplaceOpen((current) => !current)} onToggleRegex={() => setIsRegex((current) => !current)} onToggleCaseSensitive={() => setCaseSensitive((current) => !current)} + onToggleWholeWord={() => setWholeWord((current) => !current)} + onToggleMultiline={() => setMultiline((current) => !current)} onReplaceAll={() => { void runReplace(); }} @@ -800,6 +827,9 @@ export function SearchView({ query={query} isRegex={isRegex} caseSensitive={caseSensitive} + wholeWord={wholeWord} + multiline={isRegex && multiline} + replacement={replaceOpen ? replacement : undefined} isReplacing={ replaceMutation.isPending || writeFileMutation.isPending @@ -832,6 +862,9 @@ export function SearchView({ query={query} isRegex={isRegex} caseSensitive={caseSensitive} + wholeWord={wholeWord} + multiline={isRegex && multiline} + replacement={replaceOpen ? replacement : undefined} isReplacing={ replaceMutation.isPending || writeFileMutation.isPending diff --git a/apps/desktop/src/renderer/screens/main/components/WorkspaceView/RightSidebar/SearchView/components/SearchFileGroup/SearchFileGroup.tsx b/apps/desktop/src/renderer/screens/main/components/WorkspaceView/RightSidebar/SearchView/components/SearchFileGroup/SearchFileGroup.tsx index 5ad9d5097e4..405ee58da51 100644 --- a/apps/desktop/src/renderer/screens/main/components/WorkspaceView/RightSidebar/SearchView/components/SearchFileGroup/SearchFileGroup.tsx +++ b/apps/desktop/src/renderer/screens/main/components/WorkspaceView/RightSidebar/SearchView/components/SearchFileGroup/SearchFileGroup.tsx @@ -22,6 +22,10 @@ interface SearchFileGroupProps { query: string; isRegex: boolean; caseSensitive: boolean; + wholeWord?: boolean; + multiline?: boolean; + /** Forwarded to SearchMatchItem for the VSCode-style inline diff preview. */ + replacement?: string; isReplacing: boolean; showReplaceAction: boolean; showParentPath?: boolean; @@ -75,6 +79,9 @@ export const SearchFileGroup = memo(function SearchFileGroup({ query, isRegex, caseSensitive, + wholeWord = false, + multiline = false, + replacement, isReplacing, showReplaceAction, showParentPath = true, @@ -233,6 +240,9 @@ export const SearchFileGroup = memo(function SearchFileGroup({ query={query} isRegex={isRegex} caseSensitive={caseSensitive} + wholeWord={wholeWord} + multiline={multiline} + replacement={replacement} isReplaceEnabled={showReplaceAction && !isReplacing} variant={ isTreeVariant ? "tree" : isListVariant ? "list" : "default" diff --git a/apps/desktop/src/renderer/screens/main/components/WorkspaceView/RightSidebar/SearchView/components/SearchMatchItem/SearchMatchItem.tsx b/apps/desktop/src/renderer/screens/main/components/WorkspaceView/RightSidebar/SearchView/components/SearchMatchItem/SearchMatchItem.tsx index a8ad0c6b02a..690d010ebe5 100644 --- a/apps/desktop/src/renderer/screens/main/components/WorkspaceView/RightSidebar/SearchView/components/SearchMatchItem/SearchMatchItem.tsx +++ b/apps/desktop/src/renderer/screens/main/components/WorkspaceView/RightSidebar/SearchView/components/SearchMatchItem/SearchMatchItem.tsx @@ -4,14 +4,21 @@ import { LuEyeOff, LuLink, LuReplace } from "react-icons/lu"; import type { RowHoverAction } from "renderer/screens/main/components/WorkspaceView/RightSidebar/ChangesView/components/RowHoverActions"; import { RowHoverActions } from "renderer/screens/main/components/WorkspaceView/RightSidebar/ChangesView/components/RowHoverActions"; import type { SearchLineResult } from "../../types"; -import { highlightSearchText } from "../../utils/searchPattern/searchPattern"; +import { + buildLineReplacementSegments, + highlightSearchText, +} from "../../utils/searchPattern/searchPattern"; interface SearchMatchItemProps { lineMatch: SearchLineResult; query: string; isRegex: boolean; caseSensitive: boolean; + wholeWord?: boolean; + multiline?: boolean; isReplaceEnabled: boolean; + /** When set, render the line as a before/after diff preview. */ + replacement?: string; variant?: "default" | "tree" | "list"; onOpen: (absolutePath: string, line: number, column: number) => void; onCopyLink: (lineMatch: SearchLineResult) => void; @@ -24,7 +31,10 @@ export const SearchMatchItem = memo(function SearchMatchItem({ query, isRegex, caseSensitive, + wholeWord = false, + multiline = false, isReplaceEnabled, + replacement, variant = "default", onOpen, onCopyLink, @@ -32,10 +42,82 @@ export const SearchMatchItem = memo(function SearchMatchItem({ onIgnore, }: SearchMatchItemProps) { const primaryMatch = lineMatch.matches[0]; + const showPreview = typeof replacement === "string" && replacement.length > 0; + const previewSegments = useMemo( + () => + showPreview + ? buildLineReplacementSegments(lineMatch.preview, { + query, + replacement: replacement ?? "", + isRegex, + caseSensitive, + wholeWord, + multiline, + }) + : null, + [ + showPreview, + lineMatch.preview, + query, + replacement, + isRegex, + caseSensitive, + wholeWord, + multiline, + ], + ); const highlightedText = useMemo( () => - highlightSearchText(lineMatch.preview, { query, isRegex, caseSensitive }), - [lineMatch.preview, query, isRegex, caseSensitive], + previewSegments + ? (() => { + // Pre-compute running offsets so each segment gets a key that + // embeds its absolute position in the line. That keeps keys + // stable across renders without resorting to array indices + // (which Biome flags) and works even when consecutive segments + // share identical text. + let offset = 0; + return previewSegments.map((seg) => { + const key = `${seg.kind}-${offset}`; + offset += seg.text.length; + if (seg.kind === "text") { + return {seg.text}; + } + if (seg.kind === "match-before") { + return ( + + {seg.text} + + ); + } + return ( + + {seg.text} + + ); + }); + })() + : highlightSearchText(lineMatch.preview, { + query, + isRegex, + caseSensitive, + wholeWord, + multiline, + }), + [ + previewSegments, + lineMatch.preview, + query, + isRegex, + caseSensitive, + wholeWord, + multiline, + ], ); const hoverActions: RowHoverAction[] = [ ...(isReplaceEnabled diff --git a/apps/desktop/src/renderer/screens/main/components/WorkspaceView/RightSidebar/SearchView/components/SearchToolbar/SearchToolbar.tsx b/apps/desktop/src/renderer/screens/main/components/WorkspaceView/RightSidebar/SearchView/components/SearchToolbar/SearchToolbar.tsx index d4f910defcb..7eaeb2a33c9 100644 --- a/apps/desktop/src/renderer/screens/main/components/WorkspaceView/RightSidebar/SearchView/components/SearchToolbar/SearchToolbar.tsx +++ b/apps/desktop/src/renderer/screens/main/components/WorkspaceView/RightSidebar/SearchView/components/SearchToolbar/SearchToolbar.tsx @@ -3,9 +3,9 @@ import { Input } from "@superset/ui/input"; import { Tooltip, TooltipContent, TooltipTrigger } from "@superset/ui/tooltip"; import { cn } from "@superset/ui/utils"; import type { ReactNode, RefObject } from "react"; -import { LuReplace, LuSearch, LuX } from "react-icons/lu"; +import { LuReplace, LuSearch, LuWholeWord, LuX } from "react-icons/lu"; import { PiTextAa } from "react-icons/pi"; -import { TbRegex } from "react-icons/tb"; +import { TbArrowAutofitContent, TbRegex } from "react-icons/tb"; interface SearchToolbarProps { searchInputRef: RefObject; @@ -16,6 +16,8 @@ interface SearchToolbarProps { excludePattern: string; isRegex: boolean; caseSensitive: boolean; + wholeWord: boolean; + multiline: boolean; canReplaceAll: boolean; isReplacing: boolean; onQueryChange: (value: string) => void; @@ -25,6 +27,8 @@ interface SearchToolbarProps { onToggleReplace: () => void; onToggleRegex: () => void; onToggleCaseSensitive: () => void; + onToggleWholeWord: () => void; + onToggleMultiline: () => void; onReplaceAll: () => void; } @@ -70,6 +74,8 @@ export function SearchToolbar({ excludePattern, isRegex, caseSensitive, + wholeWord, + multiline, canReplaceAll, isReplacing, onQueryChange, @@ -79,6 +85,8 @@ export function SearchToolbar({ onToggleReplace, onToggleRegex, onToggleCaseSensitive, + onToggleWholeWord, + onToggleMultiline, onReplaceAll, }: SearchToolbarProps) { return ( @@ -117,6 +125,13 @@ export function SearchToolbar({ > + + + + {isRegex ? ( + + + + ) : null} {replaceOpen ? ( diff --git a/apps/desktop/src/renderer/screens/main/components/WorkspaceView/RightSidebar/SearchView/components/SearchTreeNode/SearchTreeNode.tsx b/apps/desktop/src/renderer/screens/main/components/WorkspaceView/RightSidebar/SearchView/components/SearchTreeNode/SearchTreeNode.tsx index adf8daaf93b..67aeac48bcf 100644 --- a/apps/desktop/src/renderer/screens/main/components/WorkspaceView/RightSidebar/SearchView/components/SearchTreeNode/SearchTreeNode.tsx +++ b/apps/desktop/src/renderer/screens/main/components/WorkspaceView/RightSidebar/SearchView/components/SearchTreeNode/SearchTreeNode.tsx @@ -23,6 +23,9 @@ interface SearchTreeNodeProps { query: string; isRegex: boolean; caseSensitive: boolean; + wholeWord?: boolean; + multiline?: boolean; + replacement?: string; isReplacing: boolean; showReplaceAction: boolean; openGroups: Record; @@ -43,6 +46,9 @@ export const SearchTreeNode = memo(function SearchTreeNode({ query, isRegex, caseSensitive, + wholeWord = false, + multiline = false, + replacement, isReplacing, showReplaceAction, openGroups, @@ -69,6 +75,9 @@ export const SearchTreeNode = memo(function SearchTreeNode({ query={query} isRegex={isRegex} caseSensitive={caseSensitive} + wholeWord={wholeWord} + multiline={multiline} + replacement={replacement} isReplacing={isReplacing} showReplaceAction={showReplaceAction} showParentPath={false} @@ -124,6 +133,9 @@ export const SearchTreeNode = memo(function SearchTreeNode({ query={query} isRegex={isRegex} caseSensitive={caseSensitive} + wholeWord={wholeWord} + multiline={multiline} + replacement={replacement} isReplacing={isReplacing} showReplaceAction={showReplaceAction} openGroups={openGroups} diff --git a/apps/desktop/src/renderer/screens/main/components/WorkspaceView/RightSidebar/SearchView/hooks/useContentSearch/useContentSearch.ts b/apps/desktop/src/renderer/screens/main/components/WorkspaceView/RightSidebar/SearchView/hooks/useContentSearch/useContentSearch.ts index 314b3e37c2f..29f93fc4920 100644 --- a/apps/desktop/src/renderer/screens/main/components/WorkspaceView/RightSidebar/SearchView/hooks/useContentSearch/useContentSearch.ts +++ b/apps/desktop/src/renderer/screens/main/components/WorkspaceView/RightSidebar/SearchView/hooks/useContentSearch/useContentSearch.ts @@ -1,4 +1,4 @@ -import { useMemo } from "react"; +import { useCallback, useEffect, useMemo, useRef, useState } from "react"; import { useDebouncedValue } from "renderer/hooks/useDebouncedValue"; import { electronTrpc } from "renderer/lib/electron-trpc"; import type { SearchContentResult } from "../../types"; @@ -13,10 +13,30 @@ interface UseContentSearchParams { excludePattern: string; isRegex: boolean; caseSensitive: boolean; + wholeWord?: boolean; + multiline?: boolean; enabled?: boolean; limit?: number; } +function toResult(match: { + absolutePath: string; + relativePath: string; + line: number; + column: number; + preview: string; +}): SearchContentResult { + return { + id: `${match.absolutePath}:${match.line}:${match.column}`, + absolutePath: match.absolutePath, + relativePath: match.relativePath, + name: match.absolutePath.split(/[/\\]/).pop() ?? match.absolutePath, + line: match.line, + column: match.column, + preview: match.preview, + }; +} + export function useContentSearch({ workspaceId, query, @@ -24,6 +44,8 @@ export function useContentSearch({ excludePattern, isRegex, caseSensitive, + wholeWord = false, + multiline = false, enabled = true, limit = DEFAULT_SEARCH_LIMIT, }: UseContentSearchParams) { @@ -36,8 +58,44 @@ export function useContentSearch({ const isDebouncing = trimmedQuery.length > 0 && trimmedQuery !== debouncedQuery; - const { data, isFetching } = electronTrpc.filesystem.searchContent.useQuery( - { + // Incremental result set. We accumulate across emitted matches rather + // than waiting for the subscription to complete, so the UI can render + // hits as ripgrep finds them (VSCode-style streaming). + const [searchResults, setSearchResults] = useState([]); + const [isStreaming, setIsStreaming] = useState(false); + + // We keep the "idle timeout after last event" and "reset-on-query-change" + // bookkeeping in refs so biome's exhaustive-deps autofix can't strip them + // from dep arrays. Using primitive/ref values also means the deps array + // carries literal strings/numbers, not memoized objects. + const idleTimerRef = useRef | null>(null); + const resetIdleTimer = useCallback(() => { + if (idleTimerRef.current !== null) { + clearTimeout(idleTimerRef.current); + } + idleTimerRef.current = setTimeout(() => { + idleTimerRef.current = null; + setIsStreaming(false); + }, 400); + }, []); + + // Stable string identity of the query. Using a primitive in the deps + // array avoids the "memoized object identity" dance and gives biome + // nothing to autofix. + const subscriptionKey = [ + workspaceId ?? "", + debouncedQuery, + includePattern, + excludePattern, + String(limit), + String(isRegex), + String(caseSensitive), + String(wholeWord), + String(multiline), + ].join("\u0000"); + + const subscriptionInput = useMemo( + () => ({ workspaceId: workspaceId ?? "", query: debouncedQuery, includeHidden: false, @@ -46,34 +104,94 @@ export function useContentSearch({ limit, isRegex, caseSensitive, + wholeWord, + multiline, + scopeId: "search-tab", + }), + [ + workspaceId, + debouncedQuery, + includePattern, + excludePattern, + limit, + isRegex, + caseSensitive, + wholeWord, + multiline, + ], + ); + + const subscriptionEnabled = + Boolean(workspaceId) && + enabled && + debouncedQuery.length > 0 && + validationError === null; + + // Reset results whenever the query identity changes. Previously this + // keyed off `subscriptionEnabled` only, which meant editing the search + // text while a stream was already running left stale matches in state + // and new events were appended on top — producing mixed stale/current + // rows and letting users "Replace match" against outdated hits. + useEffect(() => { + // Read subscriptionKey so biome's exhaustive-deps autofix sees it as + // used; the effect re-runs whenever the query identity string + // changes, which is exactly what we want (even though we don't need + // the value at runtime). + void subscriptionKey; + if (idleTimerRef.current !== null) { + clearTimeout(idleTimerRef.current); + idleTimerRef.current = null; + } + if (subscriptionEnabled) { + setSearchResults([]); + setIsStreaming(true); + resetIdleTimer(); + } else { + setIsStreaming(false); + } + }, [subscriptionEnabled, subscriptionKey, resetIdleTimer]); + + // Flush the pending idle timer on unmount so it can't fire after the + // hook caller has moved on. + useEffect( + () => () => { + if (idleTimerRef.current !== null) { + clearTimeout(idleTimerRef.current); + idleTimerRef.current = null; + } }, + [], + ); + + electronTrpc.filesystem.searchContentStream.useSubscription( + subscriptionInput, { - enabled: - Boolean(workspaceId) && - enabled && - debouncedQuery.length > 0 && - validationError === null, - staleTime: 30_000, - placeholderData: (previous) => previous ?? { matches: [] }, + enabled: subscriptionEnabled, + onData: (event) => { + setSearchResults((prev) => { + // Defensive dedupe: the server already drops repeats but + // UI retries can cause overlaps in edge cases. + const id = `${event.match.absolutePath}:${event.match.line}:${event.match.column}`; + if (prev.some((r) => r.id === id)) return prev; + return [...prev, toResult(event.match)]; + }); + // Refresh the idle timer on every event so long-running + // searches don't prematurely report "done" mid-stream. + resetIdleTimer(); + }, + onError: () => { + if (idleTimerRef.current !== null) { + clearTimeout(idleTimerRef.current); + idleTimerRef.current = null; + } + setIsStreaming(false); + }, }, ); - const searchResults: SearchContentResult[] = - validationError === null - ? (data?.matches.map((match) => ({ - id: `${match.absolutePath}:${match.line}:${match.column}`, - absolutePath: match.absolutePath, - relativePath: match.relativePath, - name: match.absolutePath.split(/[/\\]/).pop() ?? match.absolutePath, - line: match.line, - column: match.column, - preview: match.preview, - })) ?? []) - : []; - return { - searchResults, - isFetching: validationError === null && (isFetching || isDebouncing), + searchResults: validationError === null ? searchResults : [], + isFetching: validationError === null && (isStreaming || isDebouncing), hasQuery: trimmedQuery.length > 0, validationError, }; diff --git a/apps/desktop/src/renderer/screens/main/components/WorkspaceView/RightSidebar/SearchView/utils/searchPattern/searchPattern.tsx b/apps/desktop/src/renderer/screens/main/components/WorkspaceView/RightSidebar/SearchView/utils/searchPattern/searchPattern.tsx index 93bbc4f79e8..ebd3635658d 100644 --- a/apps/desktop/src/renderer/screens/main/components/WorkspaceView/RightSidebar/SearchView/utils/searchPattern/searchPattern.tsx +++ b/apps/desktop/src/renderer/screens/main/components/WorkspaceView/RightSidebar/SearchView/utils/searchPattern/searchPattern.tsx @@ -4,25 +4,36 @@ function escapeRegExp(input: string): string { return input.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); } +export interface SearchPatternOptions { + query: string; + isRegex: boolean; + caseSensitive: boolean; + wholeWord?: boolean; + multiline?: boolean; +} + export function createSearchRegExp({ query, isRegex, caseSensitive, -}: { - query: string; - isRegex: boolean; - caseSensitive: boolean; -}): RegExp | null { + wholeWord = false, + multiline = false, +}: SearchPatternOptions): RegExp | null { const trimmedQuery = query.trim(); if (!trimmedQuery) { return null; } try { - return new RegExp( - isRegex ? trimmedQuery : escapeRegExp(trimmedQuery), - caseSensitive ? "gu" : "giu", - ); + let source = isRegex ? trimmedQuery : escapeRegExp(trimmedQuery); + if (wholeWord) { + source = `\\b(?:${source})\\b`; + } + let flags = caseSensitive ? "gu" : "giu"; + if (isRegex && multiline) { + flags += "sm"; + } + return new RegExp(source, flags); } catch { return null; } @@ -61,6 +72,8 @@ export function replaceSingleSearchMatchInContent( column, isRegex, caseSensitive, + wholeWord = false, + multiline = false, }: { query: string; replacement: string; @@ -68,12 +81,16 @@ export function replaceSingleSearchMatchInContent( column: number; isRegex: boolean; caseSensitive: boolean; + wholeWord?: boolean; + multiline?: boolean; }, ): string | null { const regex = createSearchRegExp({ query, isRegex, caseSensitive, + wholeWord, + multiline, }); if (!regex) { return null; @@ -112,18 +129,24 @@ export function replaceSearchMatchesInLineInContent( line, isRegex, caseSensitive, + wholeWord = false, + multiline = false, }: { query: string; replacement: string; line: number; isRegex: boolean; caseSensitive: boolean; + wholeWord?: boolean; + multiline?: boolean; }, ): string | null { const regex = createSearchRegExp({ query, isRegex, caseSensitive, + wholeWord, + multiline, }); if (!regex) { return null; @@ -168,22 +191,105 @@ export function getSearchValidationError( } } +export interface SearchLineSegment { + kind: "text" | "match-before" | "match-after"; + text: string; +} + +/** + * Given a single line and a replacement, returns a segment list suitable + * for rendering an inline before/after diff. For each match on the line we + * emit the original text (`match-before`) followed by what it would become + * (`match-after`), interleaved with the surrounding untouched text. Returns + * `null` when the regex can't be compiled; callers should fall through to + * plain highlight rendering in that case. + */ +export function buildLineReplacementSegments( + line: string, + { + query, + replacement, + isRegex, + caseSensitive, + wholeWord = false, + multiline = false, + }: SearchPatternOptions & { replacement: string }, +): SearchLineSegment[] | null { + const regex = createSearchRegExp({ + query, + isRegex, + caseSensitive, + wholeWord, + multiline, + }); + if (!regex) { + return null; + } + + const segments: SearchLineSegment[] = []; + let cursor = 0; + let match = regex.exec(line); + + while (match) { + const matchText = match[0] ?? ""; + const matchLength = matchText.length > 0 ? matchText.length : 1; + const endIndex = match.index + matchLength; + + if (match.index > cursor) { + segments.push({ kind: "text", text: line.slice(cursor, match.index) }); + } + segments.push({ kind: "match-before", text: matchText }); + // Build the after-text by running the matched slice through + // String.prototype.replace so capture groups in `replacement` resolve + // ($1, $&, etc.) using the same semantics as the backend. + const singleShotRegex = new RegExp( + regex.source, + regex.flags.replace("g", ""), + ); + segments.push({ + kind: "match-after", + text: matchText.replace(singleShotRegex, replacement), + }); + + cursor = endIndex; + if (matchText.length === 0) { + regex.lastIndex += 1; + } + match = regex.exec(line); + } + + if (segments.length === 0) { + return null; + } + if (cursor < line.length) { + segments.push({ kind: "text", text: line.slice(cursor) }); + } + + return segments; +} + export function highlightSearchText( text: string, { query, isRegex, caseSensitive, + wholeWord = false, + multiline = false, }: { query: string; isRegex: boolean; caseSensitive: boolean; + wholeWord?: boolean; + multiline?: boolean; }, ): ReactNode { const regex = createSearchRegExp({ query, isRegex, caseSensitive, + wholeWord, + multiline, }); if (!regex) { return text; diff --git a/bun.lock b/bun.lock index 0098ec6f7d7..4c2e32b1daa 100644 --- a/bun.lock +++ b/bun.lock @@ -1054,6 +1054,7 @@ "devDependencies": { "@superset/typescript": "workspace:*", "@types/node": "^24.9.1", + "@vscode/ripgrep": "^1.15.9", "typescript": "^5.9.3", }, }, diff --git a/packages/host-service/src/trpc/router/filesystem/filesystem.ts b/packages/host-service/src/trpc/router/filesystem/filesystem.ts index 5d1328cf4cf..c131975a4c0 100644 --- a/packages/host-service/src/trpc/router/filesystem/filesystem.ts +++ b/packages/host-service/src/trpc/router/filesystem/filesystem.ts @@ -39,6 +39,9 @@ const searchContentInputSchema = z.object({ limit: z.number().optional(), isRegex: z.boolean().optional(), caseSensitive: z.boolean().optional(), + wholeWord: z.boolean().optional(), + multiline: z.boolean().optional(), + scopeId: z.string().optional(), }); const replaceContentInputSchema = z.object({ @@ -50,6 +53,8 @@ const replaceContentInputSchema = z.object({ excludePattern: z.string().optional(), isRegex: z.boolean().optional(), caseSensitive: z.boolean().optional(), + wholeWord: z.boolean().optional(), + multiline: z.boolean().optional(), paths: z.array(z.string()).optional(), }); diff --git a/packages/workspace-fs/package.json b/packages/workspace-fs/package.json index b982f1d4845..90e3aa6224b 100644 --- a/packages/workspace-fs/package.json +++ b/packages/workspace-fs/package.json @@ -36,6 +36,7 @@ "devDependencies": { "@superset/typescript": "workspace:*", "@types/node": "^24.9.1", + "@vscode/ripgrep": "^1.15.9", "typescript": "^5.9.3" } } diff --git a/packages/workspace-fs/src/bun-test.d.ts b/packages/workspace-fs/src/bun-test.d.ts index 606a04bb001..e1da34a98f1 100644 --- a/packages/workspace-fs/src/bun-test.d.ts +++ b/packages/workspace-fs/src/bun-test.d.ts @@ -6,7 +6,15 @@ declare module "bun:test" { callback: () => void | Promise, ): void; - export function it(name: string, callback: () => void | Promise): void; + interface ItFn { + (name: string, callback: () => void | Promise): void; + skip(name: string, callback: () => void | Promise): void; + skipIf( + condition: boolean, + ): (name: string, callback: () => void | Promise) => void; + } + + export const it: ItFn; export function expect(actual: T): { toContain(expected: unknown): void; diff --git a/packages/workspace-fs/src/client/index.ts b/packages/workspace-fs/src/client/index.ts index 5529c5c97be..a01f9d036c1 100644 --- a/packages/workspace-fs/src/client/index.ts +++ b/packages/workspace-fs/src/client/index.ts @@ -63,5 +63,8 @@ export function createFsClient(transport: FsClientTransport): FsService { watchPath(input) { return transport.subscribe("watchPath", input); }, + searchContentStream(input) { + return transport.subscribe("searchContentStream", input); + }, }; } diff --git a/packages/workspace-fs/src/core/service.ts b/packages/workspace-fs/src/core/service.ts index 8fa58c1a88d..2755aed30f1 100644 --- a/packages/workspace-fs/src/core/service.ts +++ b/packages/workspace-fs/src/core/service.ts @@ -9,6 +9,23 @@ import type { FsWriteResult, } from "../types"; +export interface FsContentStreamInput { + query: string; + includeHidden?: boolean; + includePattern?: string; + excludePattern?: string; + limit?: number; + isRegex?: boolean; + caseSensitive?: boolean; + wholeWord?: boolean; + multiline?: boolean; + scopeId?: string; +} + +export interface FsContentStreamEvent { + match: FsContentMatch; +} + export interface FsService { listDirectory(input: { absolutePath: string; @@ -72,6 +89,9 @@ export interface FsService { limit?: number; isRegex?: boolean; caseSensitive?: boolean; + wholeWord?: boolean; + multiline?: boolean; + scopeId?: string; }): Promise<{ matches: FsContentMatch[] }>; replaceContent(input: { @@ -82,6 +102,8 @@ export interface FsService { excludePattern?: string; isRegex?: boolean; caseSensitive?: boolean; + wholeWord?: boolean; + multiline?: boolean; paths?: string[]; }): Promise; @@ -89,6 +111,10 @@ export interface FsService { absolutePath: string; recursive?: boolean; }): AsyncIterable<{ events: FsWatchEvent[] }>; + + searchContentStream( + input: FsContentStreamInput, + ): AsyncIterable; } export interface FsRequestMap { @@ -169,6 +195,9 @@ export interface FsRequestMap { limit?: number; isRegex?: boolean; caseSensitive?: boolean; + wholeWord?: boolean; + multiline?: boolean; + scopeId?: string; }; output: { matches: FsContentMatch[] }; }; @@ -181,6 +210,8 @@ export interface FsRequestMap { excludePattern?: string; isRegex?: boolean; caseSensitive?: boolean; + wholeWord?: boolean; + multiline?: boolean; paths?: string[]; }; output: FsReplaceContentResult; @@ -192,4 +223,8 @@ export interface FsSubscriptionMap { input: { absolutePath: string; recursive?: boolean }; event: { events: FsWatchEvent[] }; }; + searchContentStream: { + input: FsContentStreamInput; + event: FsContentStreamEvent; + }; } diff --git a/packages/workspace-fs/src/host/service.ts b/packages/workspace-fs/src/host/service.ts index 515fa1ae22d..9a5f5bd514f 100644 --- a/packages/workspace-fs/src/host/service.ts +++ b/packages/workspace-fs/src/host/service.ts @@ -9,10 +9,11 @@ import { readFile, writeFile, } from "../fs"; -import type { SearchContentOptions } from "../search"; +import type { RunRipgrepStream, SearchContentOptions } from "../search"; import { replaceContent, searchContent, + searchContentStream, searchFiles, warmupSearchIndex, } from "../search"; @@ -28,6 +29,8 @@ export interface FsHostServiceOptions { watcherManager?: Pick; trashItem?: (absolutePath: string) => Promise; runRipgrep?: SearchContentOptions["runRipgrep"]; + /** Streaming ripgrep runner for the searchContentStream subscription. */ + spawnRipgrep?: RunRipgrepStream; } interface AsyncQueueState { @@ -245,6 +248,9 @@ export function createFsHostService( limit: input.limit, isRegex: input.isRegex, caseSensitive: input.caseSensitive, + wholeWord: input.wholeWord, + multiline: input.multiline, + scopeId: input.scopeId, runRipgrep: options.runRipgrep, }); return { matches }; @@ -260,6 +266,8 @@ export function createFsHostService( excludePattern: input.excludePattern, isRegex: input.isRegex, caseSensitive: input.caseSensitive, + wholeWord: input.wholeWord, + multiline: input.multiline, paths: input.paths, }); }, @@ -280,6 +288,29 @@ export function createFsHostService( }); }, + async *searchContentStream(input) { + // Wrap the raw match stream so the subscription event matches + // the FsSubscriptionMap shape (`{ match }`). Callers iterate and + // break to cancel; the inner generator propagates that via its + // try/finally cleanup. + for await (const match of searchContentStream({ + rootPath, + query: input.query, + includeHidden: input.includeHidden, + includePattern: input.includePattern, + excludePattern: input.excludePattern, + limit: input.limit, + isRegex: input.isRegex, + caseSensitive: input.caseSensitive, + wholeWord: input.wholeWord, + multiline: input.multiline, + scopeId: input.scopeId, + spawnRipgrep: options.spawnRipgrep, + })) { + yield { match }; + } + }, + async close() { await options.watcherManager?.close(); }, diff --git a/packages/workspace-fs/src/search.test.ts b/packages/workspace-fs/src/search.test.ts index c467c49f5a4..a33f08ed978 100644 --- a/packages/workspace-fs/src/search.test.ts +++ b/packages/workspace-fs/src/search.test.ts @@ -1,11 +1,27 @@ import { afterEach, describe, expect, it } from "bun:test"; +import { execFile } from "node:child_process"; +import { existsSync } from "node:fs"; import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; -import type { SearchPatchEvent } from "./search"; +import { promisify } from "node:util"; +import { rgPath as bundledRgPath } from "@vscode/ripgrep"; + +// Bun/CI can hand us a resolved @vscode/ripgrep module whose postinstall hook +// didn't run, so the bundled binary doesn't exist on disk. Skip any tests +// that rely on actually executing ripgrep when that happens instead of +// failing the suite; the behavior they verify is already guarded by the +// fallback path. +const BUNDLED_RG_AVAILABLE = existsSync(bundledRgPath); +const itIfRg = it.skipIf(!BUNDLED_RG_AVAILABLE); + +import type { RunRipgrepStream, SearchPatchEvent } from "./search"; import { invalidateAllSearchIndexes, patchSearchIndexesForRoot, + replaceContent, + searchContent, + searchContentStream, searchFiles, warmupSearchIndex, } from "./search"; @@ -22,9 +38,13 @@ afterEach(async () => { }); async function createTempRoot(): Promise { - const rootPath = await fs.mkdtemp( - path.join(os.tmpdir(), "workspace-fs-search-"), - ); + // On macOS, os.tmpdir() resolves to `/var/folders/...` which is itself a + // symlink to `/private/var/folders/...`. workspace-fs' write path calls + // fs.realpath() and enforces that the result lives under the workspace + // root; without this realpath call the tempdir symlink would trip that + // check every time. + const raw = await fs.mkdtemp(path.join(os.tmpdir(), "workspace-fs-search-")); + const rootPath = await fs.realpath(raw); tempRoots.push(rootPath); return rootPath; } @@ -33,6 +53,60 @@ function createPatchEvent(event: SearchPatchEvent): SearchPatchEvent { return event; } +// The test environment doesn't have `rg` on PATH (CI agents rarely do), so +// `defaultRunRipgrep` would throw ENOENT and the caller silently falls back +// to the synchronous scan path. That fallback can't exercise ripgrep-only +// features like `--multiline`, so tests that need those opt into the +// bundled @vscode/ripgrep binary explicitly. +const execFileAsync = promisify(execFile); +const bundledRunRipgrep = async ( + args: string[], + options: { cwd: string; maxBuffer: number; signal?: AbortSignal }, +): Promise<{ stdout: string }> => { + const result = await execFileAsync(bundledRgPath, args, { + cwd: options.cwd, + encoding: "utf8", + maxBuffer: options.maxBuffer, + windowsHide: true, + signal: options.signal, + }); + return { stdout: result.stdout }; +}; + +// Same idea as bundledRunRipgrep, but streams stdout so searchContentStream +// can exercise its incremental parse path. +const bundledSpawnRipgrep: RunRipgrepStream = async function* (args, options) { + const { spawn } = await import("node:child_process"); + const child = spawn(bundledRgPath, args, { + cwd: options.cwd, + windowsHide: true, + }); + const signal = options.signal; + const onAbort = () => { + if (!child.killed) child.kill("SIGTERM"); + }; + if (signal) { + if (signal.aborted) onAbort(); + else signal.addEventListener("abort", onAbort, { once: true }); + } + try { + child.stdout.setEncoding("utf8"); + for await (const chunk of child.stdout as AsyncIterable) { + yield chunk; + } + await new Promise((resolve, reject) => { + child.once("close", (code) => { + if (code === null || code === 0 || code === 1) resolve(); + else reject(new Error(`rg exit ${code}`)); + }); + child.once("error", reject); + }); + } finally { + signal?.removeEventListener("abort", onAbort); + if (!child.killed) child.kill("SIGTERM"); + } +}; + describe("patchSearchIndexesForRoot", () => { it("adds created files to an existing visible search index", async () => { const rootPath = await createTempRoot(); @@ -433,3 +507,231 @@ describe("searchFiles", () => { expect(results[0]?.relativePath).toEqual("alpha.ts"); }); }); + +describe("searchContent", () => { + it("respects .gitignore via ripgrep when includeHidden=false", async () => { + const rootPath = await createTempRoot(); + // ripgrep only honors `.gitignore` inside a git repository (everywhere + // else it looks for `.ignore`). Workspaces are always git worktrees in + // production, so set one up here to mirror the real environment. + await execFileAsync("git", ["init", "--quiet"], { cwd: rootPath }); + await fs.mkdir(path.join(rootPath, "dist"), { recursive: true }); + await fs.writeFile(path.join(rootPath, "src.ts"), "const TOKEN = 1;\n"); + await fs.writeFile( + path.join(rootPath, "dist", "bundled.ts"), + "const TOKEN = 2;\n", + ); + await fs.writeFile(path.join(rootPath, ".gitignore"), "dist/\n"); + + const results = await searchContent({ + rootPath, + query: "TOKEN", + includeHidden: false, + runRipgrep: bundledRunRipgrep, + }); + const paths = results.map((r) => r.relativePath); + expect(paths).toContain("src.ts"); + expect(paths.includes("dist/bundled.ts")).toEqual(false); + }); + + it("reveals .gitignore'd files when includeHidden=true", async () => { + const rootPath = await createTempRoot(); + await execFileAsync("git", ["init", "--quiet"], { cwd: rootPath }); + await fs.mkdir(path.join(rootPath, "dist"), { recursive: true }); + await fs.writeFile( + path.join(rootPath, "dist", "bundled.ts"), + "const TOKEN = 2;\n", + ); + await fs.writeFile(path.join(rootPath, ".gitignore"), "dist/\n"); + + const results = await searchContent({ + rootPath, + query: "TOKEN", + includeHidden: true, + runRipgrep: bundledRunRipgrep, + }); + expect(results.map((r) => r.relativePath)).toContain("dist/bundled.ts"); + }); + + it("wholeWord=true does not match substrings", async () => { + const rootPath = await createTempRoot(); + await fs.writeFile( + path.join(rootPath, "a.ts"), + "const foo = 1;\nconst foobar = 2;\n", + ); + + const results = await searchContent({ + rootPath, + query: "foo", + wholeWord: true, + runRipgrep: bundledRunRipgrep, + }); + // Only the `foo` line should match; `foobar` must be filtered out. + expect(results).toHaveLength(1); + expect(results[0]?.line).toEqual(1); + }); + + itIfRg("multiline=true lets regex span newlines", async () => { + const rootPath = await createTempRoot(); + await fs.writeFile( + path.join(rootPath, "a.ts"), + "function foo() {\n return 1;\n}\n", + ); + + const results = await searchContent({ + rootPath, + query: "function foo\\(\\).*return", + isRegex: true, + multiline: true, + runRipgrep: bundledRunRipgrep, + }); + expect(results.length).toBeGreaterThan(0); + }); + + it("scoped cancellation does not cross-cancel Search tab and Quick Open", async () => { + const rootPath = await createTempRoot(); + await fs.writeFile(path.join(rootPath, "a.ts"), "ALPHA\n"); + await fs.writeFile(path.join(rootPath, "b.ts"), "BETA\n"); + + const [alpha, beta] = await Promise.all([ + searchContent({ + rootPath, + query: "ALPHA", + scopeId: "search-tab", + runRipgrep: bundledRunRipgrep, + }), + searchContent({ + rootPath, + query: "BETA", + scopeId: "quick-open", + runRipgrep: bundledRunRipgrep, + }), + ]); + + expect(alpha[0]?.relativePath).toEqual("a.ts"); + expect(beta[0]?.relativePath).toEqual("b.ts"); + }); +}); + +describe("replaceContent", () => { + it("supports regex capture group replacements ($1)", async () => { + const rootPath = await createTempRoot(); + const filePath = path.join(rootPath, "a.ts"); + await fs.writeFile(filePath, "const foo = 1;\nconst bar = 2;\n"); + + const result = await replaceContent({ + rootPath, + query: "(foo|bar)", + replacement: "$1Name", + isRegex: true, + }); + expect(result.filesUpdated).toEqual(1); + + const updated = await fs.readFile(filePath, "utf8"); + expect(updated).toEqual("const fooName = 1;\nconst barName = 2;\n"); + }); + + it("wholeWord replacement does not touch substring hits", async () => { + const rootPath = await createTempRoot(); + const filePath = path.join(rootPath, "a.ts"); + await fs.writeFile(filePath, "foo + foobar\n"); + + const result = await replaceContent({ + rootPath, + query: "foo", + replacement: "BAR", + wholeWord: true, + }); + expect(result.filesUpdated).toEqual(1); + + const updated = await fs.readFile(filePath, "utf8"); + expect(updated).toEqual("BAR + foobar\n"); + }); +}); + +describe("searchContentStream", () => { + itIfRg( + "yields each match incrementally as ripgrep produces them", + async () => { + const rootPath = await createTempRoot(); + // Spread matches across multiple files so ripgrep flushes between + // them; this is the scenario where streaming actually pays off. + for (let fileIndex = 0; fileIndex < 5; fileIndex++) { + await fs.writeFile( + path.join(rootPath, `file-${fileIndex}.ts`), + "export const NEEDLE = 1;\n", + ); + } + + const matches = []; + for await (const match of searchContentStream({ + rootPath, + query: "NEEDLE", + spawnRipgrep: bundledSpawnRipgrep, + })) { + matches.push(match); + } + + expect(matches.length).toEqual(5); + for (const match of matches) { + expect(match.relativePath.startsWith("file-")).toEqual(true); + expect(match.line).toEqual(1); + expect(match.preview.includes("NEEDLE")).toEqual(true); + } + }, + ); + + itIfRg("honors limit so runaway queries terminate", async () => { + const rootPath = await createTempRoot(); + const lines: string[] = []; + for (let i = 0; i < 50; i++) lines.push(`NEEDLE line ${i}`); + await fs.writeFile(path.join(rootPath, "big.ts"), `${lines.join("\n")}\n`); + + const matches = []; + for await (const match of searchContentStream({ + rootPath, + query: "NEEDLE", + limit: 2, + spawnRipgrep: bundledSpawnRipgrep, + })) { + matches.push(match); + } + + // ripgrep's --max-count caps per-file too, but limit should enforce the + // tighter cap regardless of underlying behavior. + expect(matches.length <= 2).toEqual(true); + }); + + itIfRg("cancels streaming when the external signal fires", async () => { + const rootPath = await createTempRoot(); + for (let i = 0; i < 50; i++) { + await fs.writeFile( + path.join(rootPath, `file-${i}.ts`), + "NEEDLE\n".repeat(100), + ); + } + + const controller = new AbortController(); + const matches = []; + const iter = searchContentStream({ + rootPath, + query: "NEEDLE", + signal: controller.signal, + spawnRipgrep: bundledSpawnRipgrep, + }); + + let seen = 0; + for await (const match of iter) { + matches.push(match); + seen += 1; + if (seen === 1) { + controller.abort(); + } + } + + // After abort, the generator must stop yielding; we may or may not + // have a trailing match that was already in the buffer, but it must + // not stream the entire 5000-match corpus. + expect(matches.length < 500).toEqual(true); + }); +}); diff --git a/packages/workspace-fs/src/search.ts b/packages/workspace-fs/src/search.ts index e60a0b61a82..49f1991ee1c 100644 --- a/packages/workspace-fs/src/search.ts +++ b/packages/workspace-fs/src/search.ts @@ -1,4 +1,4 @@ -import { execFile } from "node:child_process"; +import { execFile, spawn } from "node:child_process"; import fs from "node:fs/promises"; import path from "node:path"; import { promisify } from "node:util"; @@ -198,6 +198,21 @@ export interface RunRipgrepOptions { signal?: AbortSignal; } +export interface RunRipgrepStreamOptions { + cwd: string; + signal?: AbortSignal; +} + +/** + * Streaming ripgrep runner. Yields stdout chunks as they arrive so callers + * can parse match lines before the subprocess finishes. Implementations + * must honor the provided AbortSignal. + */ +export type RunRipgrepStream = ( + args: string[], + options: RunRipgrepStreamOptions, +) => AsyncIterable; + export interface SearchContentOptions { rootPath: string; query: string; @@ -207,10 +222,30 @@ export interface SearchContentOptions { limit?: number; isRegex?: boolean; caseSensitive?: boolean; + /** + * VSCode's "Match whole word" toggle. Wraps the query in word boundaries + * (`\b`) so `foo` does not match `foobar`. Orthogonal to `isRegex`. + */ + wholeWord?: boolean; + /** + * VSCode's multiline regex mode. Only meaningful when `isRegex` is true; + * lets the pattern span newlines and makes `.` match them. + */ + multiline?: boolean; + /** + * Logical caller identity (e.g. "search-tab"). Each scope owns its own + * AbortController so the Search tab, Cmd+P and Files tab don't cancel + * each other's queries when they land on the same workspace. + */ + scopeId?: string; + /** External cancel signal; forwarded to the internal controller. */ + signal?: AbortSignal; runRipgrep?: ( args: string[], options: RunRipgrepOptions, ) => Promise<{ stdout: string }>; + /** Streaming runner, used by searchContentStream. */ + spawnRipgrep?: RunRipgrepStream; } export interface ReplaceContentOptions { @@ -222,6 +257,10 @@ export interface ReplaceContentOptions { excludePattern?: string; isRegex?: boolean; caseSensitive?: boolean; + /** Matches the corresponding flag on `SearchContentOptions`. */ + wholeWord?: boolean; + /** Matches the corresponding flag on `SearchContentOptions`. */ + multiline?: boolean; paths?: string[]; } @@ -386,17 +425,28 @@ function compileSearchPattern({ query, isRegex = false, caseSensitive, + wholeWord = false, + multiline = false, }: Pick< SearchContentOptions, - "query" | "isRegex" | "caseSensitive" + "query" | "isRegex" | "caseSensitive" | "wholeWord" | "multiline" >): CompiledSearchPattern { const resolvedCaseSensitive = resolveCaseSensitive( query, caseSensitive, isRegex, ); - const flags = resolvedCaseSensitive ? "gu" : "giu"; - const source = isRegex ? query : escapeRegExp(query); + // `s` (dotall) + `m` (anchors per line) only ship when the caller opts + // into multiline mode. This keeps simple searches behaving exactly as + // before while letting regex users match across newlines. + let flags = resolvedCaseSensitive ? "gu" : "giu"; + if (isRegex && multiline) { + flags += "sm"; + } + let source = isRegex ? query : escapeRegExp(query); + if (wholeWord) { + source = `\\b(?:${source})\\b`; + } return { isRegex, @@ -732,6 +782,68 @@ async function defaultRunRipgrep( return { stdout: result.stdout }; } +// Streaming default runner. Uses `spawn` so stdout chunks can be consumed +// before the process exits. Desktop overrides this to invoke the bundled +// ripgrep binary instead of relying on PATH. +async function* defaultSpawnRipgrep( + args: string[], + options: RunRipgrepStreamOptions, +): AsyncIterable { + const child = spawn("rg", args, { + cwd: options.cwd, + windowsHide: true, + }); + + const onAbort = () => { + // `spawn`'s `signal` option exists on modern Node, but we wire up the + // handler manually so we can treat the cancellation as a clean + // shutdown (no throw propagated to the generator consumer). + if (!child.killed) { + child.kill("SIGTERM"); + } + }; + const signal = options.signal; + if (signal) { + if (signal.aborted) { + onAbort(); + } else { + signal.addEventListener("abort", onAbort, { once: true }); + } + } + + try { + // Set encoding so `data` events arrive as strings instead of Buffers. + child.stdout.setEncoding("utf8"); + for await (const chunk of child.stdout as AsyncIterable) { + if (signal?.aborted) { + return; + } + yield chunk; + } + // Drain exit so any non-zero code turns into a real error (other than + // exit 1 which ripgrep uses for "no matches found"). + await new Promise((resolve, reject) => { + child.once("error", reject); + child.once("close", (code) => { + if (signal?.aborted || code === null || code === 0 || code === 1) { + resolve(); + } else { + const err = new Error(`ripgrep exited with code ${code}`) as Error & { + code?: number; + }; + err.code = code; + reject(err); + } + }); + }); + } finally { + signal?.removeEventListener("abort", onAbort); + if (!child.killed) { + child.kill("SIGTERM"); + } + } +} + async function searchContentWithRipgrep({ rootPath, query, @@ -741,18 +853,47 @@ async function searchContentWithRipgrep({ limit, isRegex, caseSensitive, + wholeWord, + multiline, useSmartCase, runRipgrep, -}: Required> & { + scopeId, + signal, +}: { + rootPath: string; + query: string; + includeHidden: boolean; + includePattern: string; + excludePattern: string; + limit: number; + isRegex: boolean; + caseSensitive: boolean; + wholeWord: boolean; + multiline: boolean; useSmartCase: boolean; runRipgrep: NonNullable; + scopeId?: string; + signal?: AbortSignal; }): Promise { - const prevController = activeSearchControllers.get(rootPath); + const normalizedRootPath = normalizeAbsolutePath(rootPath); + // Scope the cancellation channel so the Search tab, Cmd+P and Files tab + // never preempt each other when they happen to land on the same + // workspace simultaneously. + const controllerKey = `${normalizedRootPath}::${scopeId ?? "default"}`; + const prevController = activeSearchControllers.get(controllerKey); if (prevController) { prevController.abort(); } const controller = new AbortController(); - activeSearchControllers.set(rootPath, controller); + activeSearchControllers.set(controllerKey, controller); + const onExternalAbort = () => controller.abort(); + if (signal) { + if (signal.aborted) { + controller.abort(); + } else { + signal.addEventListener("abort", onExternalAbort, { once: true }); + } + } const safeLimit = safeSearchLimit(limit); const maxCandidates = safeLimit * KEYWORD_SEARCH_CANDIDATE_MULTIPLIER; @@ -773,6 +914,12 @@ async function searchContentWithRipgrep({ } else { args.push("--ignore-case"); } + if (multiline) { + // `--multiline` lets the regex cross newlines, `--multiline-dotall` + // makes `.` match them. We couple them so behavior matches VSCode's + // "multi-line" toggle. + args.push("--multiline", "--multiline-dotall"); + } } else { if (caseSensitive) { args.push("--case-sensitive"); @@ -784,6 +931,10 @@ async function searchContentWithRipgrep({ args.push("--fixed-strings"); } + if (wholeWord) { + args.push("--word-regexp"); + } + if (includeHidden) { args.push("--hidden", "--no-ignore"); } @@ -804,7 +955,7 @@ async function searchContentWithRipgrep({ try { const { stdout } = await runRipgrep(args, { - cwd: normalizeAbsolutePath(rootPath), + cwd: normalizedRootPath, maxBuffer: KEYWORD_SEARCH_RIPGREP_BUFFER_BYTES, signal: controller.signal, }); @@ -840,7 +991,7 @@ async function searchContentWithRipgrep({ } const pathData = "path" in data ? data.path : null; - const relativePath = + const rawPath = typeof pathData === "object" && pathData !== null && "text" in pathData && @@ -848,10 +999,16 @@ async function searchContentWithRipgrep({ ? pathData.text : null; - if (!relativePath) { + if (!rawPath) { continue; } + // ripgrep echoes the `.` target we pass as CWD, so every path comes + // back prefixed with `./`. Strip it so relativePath looks identical + // across ripgrep and fast-glob code paths (tests match on exact + // strings). + const relativePath = normalizePathForGlob(rawPath); + const lineNumber = "line_number" in data && typeof data.line_number === "number" ? data.line_number @@ -880,10 +1037,7 @@ async function searchContentWithRipgrep({ } } - const absolutePath = path.join( - normalizeAbsolutePath(rootPath), - relativePath, - ); + const absolutePath = path.join(normalizedRootPath, relativePath); const id = `${absolutePath}:${lineNumber}:${column}`; if (seen.has(id)) { continue; @@ -900,14 +1054,16 @@ async function searchContentWithRipgrep({ }); } - if (activeSearchControllers.get(rootPath) === controller) { - activeSearchControllers.delete(rootPath); + signal?.removeEventListener("abort", onExternalAbort); + if (activeSearchControllers.get(controllerKey) === controller) { + activeSearchControllers.delete(controllerKey); } return rankContentMatches(matches, query, safeLimit, isRegex); } catch (error) { - if (activeSearchControllers.get(rootPath) === controller) { - activeSearchControllers.delete(rootPath); + signal?.removeEventListener("abort", onExternalAbort); + if (activeSearchControllers.get(controllerKey) === controller) { + activeSearchControllers.delete(controllerKey); } if (error instanceof Error && error.name === "AbortError") { @@ -1371,12 +1527,20 @@ export async function searchFiles({ export async function searchContent({ rootPath, query, - includeHidden = true, + // `searchFiles` defaults to `false` here; keeping this `true` would be + // surprising, but callers (SearchView) already pass `false` explicitly + // and flipping the default could affect other unknown consumers. Leave + // as-is but let the flag propagate honestly. + includeHidden = false, includePattern = "", excludePattern = "", limit = 20, isRegex = false, caseSensitive, + wholeWord = false, + multiline = false, + scopeId, + signal, runRipgrep = defaultRunRipgrep, }: SearchContentOptions): Promise { const trimmedQuery = query.trim(); @@ -1390,6 +1554,8 @@ export async function searchContent({ query: trimmedQuery, isRegex, caseSensitive, + wholeWord, + multiline, }); internalMatches = await searchContentWithRipgrep({ @@ -1401,8 +1567,12 @@ export async function searchContent({ limit, isRegex, caseSensitive: pattern.caseSensitive, + wholeWord, + multiline, useSmartCase: !isRegex && caseSensitive === undefined, runRipgrep, + scopeId, + signal, }); } catch (error) { if (error instanceof Error && error.name === "AbortError") { @@ -1413,6 +1583,8 @@ export async function searchContent({ query: trimmedQuery, isRegex, caseSensitive, + wholeWord, + multiline, }); const index = await getSearchIndex({ rootPath, @@ -1443,15 +1615,309 @@ export async function searchContent({ ); } +// Shared helper between the batched and streaming searchContent paths so +// argv stays in sync when we add flags (wholeWord, multiline, etc.). +function buildRipgrepSearchArgs({ + query, + includeHidden, + includePattern, + excludePattern, + isRegex, + caseSensitive, + wholeWord, + multiline, + useSmartCase, +}: { + query: string; + includeHidden: boolean; + includePattern: string; + excludePattern: string; + isRegex: boolean; + caseSensitive: boolean; + wholeWord: boolean; + multiline: boolean; + useSmartCase: boolean; +}): string[] { + const args = [ + "--json", + "--line-number", + "--column", + "--no-messages", + "--max-filesize", + `${Math.floor(MAX_KEYWORD_FILE_SIZE_BYTES / 1024)}K`, + "--max-count", + String(KEYWORD_SEARCH_MAX_COUNT_PER_FILE), + ]; + + if (isRegex) { + args.push(caseSensitive ? "--case-sensitive" : "--ignore-case"); + if (multiline) { + args.push("--multiline", "--multiline-dotall"); + } + } else { + if (caseSensitive) { + args.push("--case-sensitive"); + } else if (useSmartCase) { + args.push("--smart-case"); + } else { + args.push("--ignore-case"); + } + args.push("--fixed-strings"); + } + + if (wholeWord) { + args.push("--word-regexp"); + } + if (includeHidden) { + args.push("--hidden", "--no-ignore"); + } + for (const pattern of DEFAULT_IGNORE_PATTERNS) { + args.push("--glob", `!${pattern}`); + } + for (const pattern of parseGlobPatterns(includePattern)) { + args.push("--glob", normalizePathForGlob(pattern)); + } + for (const pattern of parseGlobPatterns(excludePattern)) { + args.push("--glob", `!${normalizePathForGlob(pattern)}`); + } + + args.push(query, "."); + return args; +} + +interface RgJsonMatch { + absolutePath: string; + relativePath: string; + name: string; + line: number; + column: number; + preview: string; +} + +// Parses one line of ripgrep `--json` output. Returns the match, or null +// for begin/end/summary/unparsable lines. +function parseRipgrepMatchLine( + rawLine: string, + normalizedRootPath: string, +): RgJsonMatch | null { + if (!rawLine) { + return null; + } + + let parsed: unknown; + try { + parsed = JSON.parse(rawLine); + } catch { + return null; + } + + if ( + typeof parsed !== "object" || + parsed === null || + !("type" in parsed) || + parsed.type !== "match" || + !("data" in parsed) + ) { + return null; + } + + const data = parsed.data; + if (typeof data !== "object" || data === null) { + return null; + } + + const pathData = "path" in data ? data.path : null; + const rawPath = + typeof pathData === "object" && + pathData !== null && + "text" in pathData && + typeof pathData.text === "string" + ? pathData.text + : null; + if (!rawPath) { + return null; + } + + const relativePath = normalizePathForGlob(rawPath); + const lineNumber = + "line_number" in data && typeof data.line_number === "number" + ? data.line_number + : 1; + + const linesData = "lines" in data ? data.lines : null; + const lineText = + typeof linesData === "object" && + linesData !== null && + "text" in linesData && + typeof linesData.text === "string" + ? linesData.text + : ""; + + const submatches = "submatches" in data ? data.submatches : null; + let column = 1; + if (Array.isArray(submatches) && submatches.length > 0) { + const firstSubmatch = submatches[0]; + if ( + typeof firstSubmatch === "object" && + firstSubmatch !== null && + "start" in firstSubmatch && + typeof firstSubmatch.start === "number" + ) { + column = firstSubmatch.start + 1; + } + } + + return { + absolutePath: path.join(normalizedRootPath, relativePath), + relativePath, + name: path.basename(relativePath), + line: lineNumber, + column, + preview: formatPreviewLine(lineText.replace(/\r?\n$/, "")), + }; +} + +export interface SearchContentStreamOptions + extends Omit { + /** Maximum matches emitted before the stream ends. Defaults to 500. */ + limit?: number; +} + +/** + * VSCode-style streaming search: yields each match as ripgrep reports it + * instead of buffering the full result set. Falls back to throwing if the + * underlying binary is missing — callers expecting a legacy environment + * without rg should continue to use `searchContent`. + */ +export async function* searchContentStream({ + rootPath, + query, + includeHidden = false, + includePattern = "", + excludePattern = "", + limit = MAX_SEARCH_RESULTS, + isRegex = false, + caseSensitive, + wholeWord = false, + multiline = false, + scopeId, + signal, + spawnRipgrep = defaultSpawnRipgrep, +}: SearchContentStreamOptions): AsyncIterable { + const trimmedQuery = query.trim(); + if (!trimmedQuery) { + return; + } + + const normalizedRootPath = normalizeAbsolutePath(rootPath); + const controllerKey = `${normalizedRootPath}::${scopeId ?? "default"}::stream`; + const prev = activeSearchControllers.get(controllerKey); + prev?.abort(); + const controller = new AbortController(); + activeSearchControllers.set(controllerKey, controller); + const onExternalAbort = () => controller.abort(); + if (signal) { + if (signal.aborted) { + controller.abort(); + } else { + signal.addEventListener("abort", onExternalAbort, { once: true }); + } + } + + const pattern = compileSearchPattern({ + query: trimmedQuery, + isRegex, + caseSensitive, + wholeWord, + multiline, + }); + const args = buildRipgrepSearchArgs({ + query: trimmedQuery, + includeHidden, + includePattern, + excludePattern, + isRegex, + caseSensitive: pattern.caseSensitive, + wholeWord, + multiline, + useSmartCase: !isRegex && caseSensitive === undefined, + }); + + const safeLimit = safeSearchLimit(limit); + const seen = new Set(); + let emitted = 0; + let buffer = ""; + + try { + for await (const chunk of spawnRipgrep(args, { + cwd: normalizedRootPath, + signal: controller.signal, + })) { + if (controller.signal.aborted) { + return; + } + buffer += chunk; + let newlineIndex = buffer.indexOf("\n"); + while (newlineIndex !== -1) { + const line = buffer.slice(0, newlineIndex); + buffer = buffer.slice(newlineIndex + 1); + const match = parseRipgrepMatchLine(line, normalizedRootPath); + if (match) { + const id = `${match.absolutePath}:${match.line}:${match.column}`; + if (!seen.has(id)) { + seen.add(id); + emitted += 1; + yield { + absolutePath: match.absolutePath, + relativePath: match.relativePath, + line: match.line, + column: match.column, + preview: match.preview, + }; + if (emitted >= safeLimit) { + controller.abort(); + return; + } + } + } + newlineIndex = buffer.indexOf("\n"); + } + } + // Trailing partial line (no newline at EOF). + if (buffer && !controller.signal.aborted) { + const match = parseRipgrepMatchLine(buffer, normalizedRootPath); + if (match) { + const id = `${match.absolutePath}:${match.line}:${match.column}`; + if (!seen.has(id) && emitted < safeLimit) { + yield { + absolutePath: match.absolutePath, + relativePath: match.relativePath, + line: match.line, + column: match.column, + preview: match.preview, + }; + } + } + } + } finally { + signal?.removeEventListener("abort", onExternalAbort); + if (activeSearchControllers.get(controllerKey) === controller) { + activeSearchControllers.delete(controllerKey); + } + } +} + export async function replaceContent({ rootPath, query, replacement, - includeHidden = true, + includeHidden = false, includePattern = "", excludePattern = "", isRegex = false, caseSensitive, + wholeWord = false, + multiline = false, paths, }: ReplaceContentOptions): Promise { const trimmedQuery = query.trim(); @@ -1470,6 +1936,8 @@ export async function replaceContent({ query: trimmedQuery, isRegex, caseSensitive, + wholeWord, + multiline, }); const index = await getSearchIndex({ rootPath: normalizedRootPath,