diff --git a/i18n/en.json b/i18n/en.json index f0b10d2ac1cc7..27084b059370a 100644 --- a/i18n/en.json +++ b/i18n/en.json @@ -1150,6 +1150,7 @@ "hide_all_people": "Hide all people", "hide_gallery": "Hide gallery", "hide_named_person": "Hide person {name}", + "hide_ocr_boxes": "Hide OCR boxes", "hide_password": "Hide password", "hide_person": "Hide person", "hide_unnamed_people": "Hide unnamed people", @@ -1260,6 +1261,7 @@ "link_to_oauth": "Link to OAuth", "linked_oauth_account": "Linked OAuth account", "list": "List", + "load_ocr_data": "Load OCR data", "loading": "Loading", "loading_search_results_failed": "Loading search results failed", "local": "Local", @@ -1440,6 +1442,7 @@ "no_locked_photos_message": "Photos and videos in the locked folder are hidden and won't show up as you browse or search your library.", "no_name": "No Name", "no_notifications": "No notifications", + "no_ocr_data": "No OCR data available", "no_people_found": "No matching people found", "no_places": "No places", "no_remote_assets_found": "No remote assets found with this checksum", @@ -1465,6 +1468,7 @@ "obtainium_configurator": "Obtainium Configurator", "obtainium_configurator_instructions": "Use Obtainium to install and update the Android app directly from Immich GitHub's release. Create an API key and select a variant to create your Obtainium configuration link", "ocr": "OCR", + "ocr_text": "OCR Text", "official_immich_resources": "Official Immich Resources", "offline": "Offline", "offset": "Offset", @@ -1941,6 +1945,7 @@ "show_in_timeline_setting_description": "Show photos and videos from this user in your timeline", "show_keyboard_shortcuts": "Show keyboard shortcuts", "show_metadata": "Show metadata", + "show_ocr_boxes": "Show OCR boxes", "show_or_hide_info": "Show or hide info", "show_password": "Show password", "show_person_options": "Show person options", diff --git a/web/src/lib/components/asset-viewer/detail-panel.svelte b/web/src/lib/components/asset-viewer/detail-panel.svelte index 51c3098356d4d..efa8acb76809c 100644 --- a/web/src/lib/components/asset-viewer/detail-panel.svelte +++ b/web/src/lib/components/asset-viewer/detail-panel.svelte @@ -9,6 +9,7 @@ import { authManager } from '$lib/managers/auth-manager.svelte'; import AssetChangeDateModal from '$lib/modals/AssetChangeDateModal.svelte'; import { isFaceEditMode } from '$lib/stores/face-edit.svelte'; + import { ocrBoxesArray } from '$lib/stores/ocr.store'; import { boundingBoxesArray } from '$lib/stores/people.store'; import { locale } from '$lib/stores/preferences.store'; import { featureFlags } from '$lib/stores/system-config-manager.svelte'; @@ -19,7 +20,7 @@ import { getMetadataSearchQuery } from '$lib/utils/metadata-search'; import { fromISODateTime, fromISODateTimeUTC, toTimelineAsset } from '$lib/utils/timeline-util'; import { getParentPath } from '$lib/utils/tree-utils'; - import { AssetMediaSize, getAssetInfo, type AlbumResponseDto, type AssetResponseDto } from '@immich/sdk'; + import { AssetMediaSize, getAssetInfo, getAssetOcr, type AlbumResponseDto, type AssetResponseDto } from '@immich/sdk'; import { Icon, IconButton, LoadingSpinner, modalManager } from '@immich/ui'; import { mdiCalendar, @@ -31,6 +32,7 @@ mdiInformationOutline, mdiPencil, mdiPlus, + mdiTextBox, } from '@mdi/js'; import { DateTime } from 'luxon'; import { t } from 'svelte-i18n'; @@ -51,6 +53,8 @@ let showAssetPath = $state(false); let showEditFaces = $state(false); + let showOcrTexts = $state(false); + let ocrData = $state>>([]); let isOwner = $derived($user?.id === asset.ownerId); let people = $derived(asset.people || []); let unassignedFaces = $derived(asset.unassignedFaces || []); @@ -80,6 +84,8 @@ if (asset.id !== previousId) { showEditFaces = false; previousId = asset.id; + showOcrTexts = false; + ocrData = []; } }); @@ -115,6 +121,25 @@ await modalManager.show(AssetChangeDateModal, { asset: toTimelineAsset(asset), initialDate: dateTime }); }; + + const toggleOcrTexts = async () => { + showOcrTexts = !showOcrTexts; + if (showOcrTexts && ocrData.length === 0) { + try { + ocrData = await getAssetOcr({ id: asset.id }); + ocrData.reverse(); // make the texts appear in the top-down order + $ocrBoxesArray = ocrData; + } catch (error) { + console.error('Failed to load OCR data:', error); + } + } else if (showOcrTexts) { + // If data is already loaded, display all boxes + $ocrBoxesArray = ocrData; + } else { + // Clear when hidden + $ocrBoxesArray = []; + } + };
@@ -258,6 +283,64 @@
{/if} + {#if !authManager.isSharedLink} +
+
+

{$t('ocr_text')}

+
+ {#if ocrData.length > 0} + + {:else} + + {/if} +
+
+ + {#if showOcrTexts} +
+ {#if ocrData.length > 0} +
+ {#each ocrData as ocr, index (ocr.id)} + + {/each} +
+ {:else} +

{$t('no_ocr_data')}

+ {/if} +
+ {/if} +
+ {/if} +
{#if asset.exifInfo}
diff --git a/web/src/lib/components/asset-viewer/photo-viewer.svelte b/web/src/lib/components/asset-viewer/photo-viewer.svelte index d88609f7bbef1..2eba05a088f8c 100644 --- a/web/src/lib/components/asset-viewer/photo-viewer.svelte +++ b/web/src/lib/components/asset-viewer/photo-viewer.svelte @@ -8,6 +8,7 @@ import type { TimelineAsset } from '$lib/managers/timeline-manager/types'; import { photoViewerImgElement } from '$lib/stores/assets-store.svelte'; import { isFaceEditMode } from '$lib/stores/face-edit.svelte'; + import { ocrBoxesArray } from '$lib/stores/ocr.store'; import { boundingBoxesArray } from '$lib/stores/people.store'; import { alwaysLoadOriginalFile } from '$lib/stores/preferences.store'; import { SlideshowLook, SlideshowState, slideshowLookCssMapping, slideshowStore } from '$lib/stores/slideshow.store'; @@ -15,7 +16,8 @@ import { getAssetOriginalUrl, getAssetThumbnailUrl, handlePromiseError } from '$lib/utils'; import { canCopyImageToClipboard, copyImageToClipboard, isWebCompatibleImage } from '$lib/utils/asset-utils'; import { handleError } from '$lib/utils/handle-error'; - import { getBoundingBox } from '$lib/utils/people-utils'; + import { getOcrBoundingBox } from '$lib/utils/ocr-utils'; + import { getFaceBoundingBox } from '$lib/utils/people-utils'; import { cancelImageUrl } from '$lib/utils/sw-messaging'; import { getAltText } from '$lib/utils/thumbnail-util'; import { toTimelineAsset } from '$lib/utils/timeline-util'; @@ -69,6 +71,7 @@ onDestroy(() => { $boundingBoxesArray = []; + $ocrBoxesArray = []; }); const preload = (targetSize: AssetMediaSize | 'original', preloadAssets?: TimelineAsset[]) => { @@ -201,6 +204,14 @@ let containerWidth = $state(0); let containerHeight = $state(0); + + // // Recompute box positions whenever container size changes to handle layout shifts + const ocrDisplayBoxes = $derived.by(() => { + // create explicit dependencies on container dimensions + void containerWidth; + void containerHeight; + return getOcrBoundingBox($ocrBoxesArray, $photoZoomState, $photoViewerImgElement); + }); + - {#each getBoundingBox($boundingBoxesArray, $photoZoomState, $photoViewerImgElement) as boundingbox} + {#each getFaceBoundingBox($boundingBoxesArray, $photoZoomState, $photoViewerImgElement) as boundingbox}
{/each} + + + {#each ocrDisplayBoxes as ocrbox} +
+ {/each} + + + {#if $ocrBoxesArray.length === 1} + {@const focusedBox = ocrDisplayBoxes[0]} + {#if focusedBox} + +
+ +
+ +
+ +
+ {/if} + {/if}
{#if isFaceEditMode.value} diff --git a/web/src/lib/stores/ocr.store.ts b/web/src/lib/stores/ocr.store.ts new file mode 100644 index 0000000000000..da20f62156bec --- /dev/null +++ b/web/src/lib/stores/ocr.store.ts @@ -0,0 +1,4 @@ +import type { AssetOcrResponseDto } from '@immich/sdk'; +import { writable } from 'svelte/store'; + +export const ocrBoxesArray = writable([]); diff --git a/web/src/lib/utils/bounding-box-utils.ts b/web/src/lib/utils/bounding-box-utils.ts new file mode 100644 index 0000000000000..4b12d7616de19 --- /dev/null +++ b/web/src/lib/utils/bounding-box-utils.ts @@ -0,0 +1,84 @@ +import type { ZoomImageWheelState } from '@zoom-image/core'; + +const getContainedSize = (img: HTMLImageElement): { width: number; height: number } => { + const ratio = img.naturalWidth / img.naturalHeight; + let width = img.height * ratio; + let height = img.height; + if (width > img.width) { + width = img.width; + height = img.width / ratio; + } + return { width, height }; +}; + +export interface BoundingBox { + top: number; + left: number; + width: number; + height: number; +} + +export interface BoundingBoxCoordinates { + x1: number; + x2: number; + y1: number; + y2: number; + imageWidth: number; + imageHeight: number; +} + +/** + * Calculate display bounding boxes with zoom and pan support + * @param items Array of items with bounding box coordinates + * @param zoom Current zoom state + * @param photoViewer The image element + * @returns Array of calculated bounding boxes ready for display + */ +export const calculateBoundingBoxes = ( + items: T[], + zoom: ZoomImageWheelState, + photoViewer: HTMLImageElement | null, +): (BoundingBox & { item: T })[] => { + const boxes: (BoundingBox & { item: T })[] = []; + + if (photoViewer === null) { + return boxes; + } + + const clientHeight = photoViewer.clientHeight; + const clientWidth = photoViewer.clientWidth; + const { width, height } = getContainedSize(photoViewer); + + for (const item of items) { + // Create the coordinates of the box based on the displayed image. + // The coordinates must take into account margins due to the 'object-fit: contain;' css property of the photo-viewer. + const coordinates = { + x1: + (width / item.imageWidth) * zoom.currentZoom * item.x1 + + ((clientWidth - width) / 2) * zoom.currentZoom + + zoom.currentPositionX, + x2: + (width / item.imageWidth) * zoom.currentZoom * item.x2 + + ((clientWidth - width) / 2) * zoom.currentZoom + + zoom.currentPositionX, + y1: + (height / item.imageHeight) * zoom.currentZoom * item.y1 + + ((clientHeight - height) / 2) * zoom.currentZoom + + zoom.currentPositionY, + y2: + (height / item.imageHeight) * zoom.currentZoom * item.y2 + + ((clientHeight - height) / 2) * zoom.currentZoom + + zoom.currentPositionY, + }; + + boxes.push({ + top: Math.round(coordinates.y1), + left: Math.round(coordinates.x1), + width: Math.round(coordinates.x2 - coordinates.x1), + height: Math.round(coordinates.y2 - coordinates.y1), + item, + }); + } + + return boxes; +}; diff --git a/web/src/lib/utils/ocr-utils.ts b/web/src/lib/utils/ocr-utils.ts new file mode 100644 index 0000000000000..157cda00848eb --- /dev/null +++ b/web/src/lib/utils/ocr-utils.ts @@ -0,0 +1,69 @@ +import type { AssetOcrResponseDto } from '@immich/sdk'; +import type { ZoomImageWheelState } from '@zoom-image/core'; +import { calculateBoundingBoxes, type BoundingBoxCoordinates } from './bounding-box-utils'; + +export interface OcrBoundingBox { + top: number; + left: number; + width: number; + height: number; + text: string; + boxScore: number; + textScore: number; +} + +/** + * Convert OCR data to normalized bounding box coordinates + * OCR coordinates are normalized (0-1) and have 4 corners, so we need to convert them + */ +const ocrToCoordinates = (ocr: AssetOcrResponseDto, imageWidth: number, imageHeight: number): BoundingBoxCoordinates => { + // OCR box has 4 corners: (x1,y1), (x2,y2), (x3,y3), (x4,y4) + // For simplicity, we'll create a bounding rectangle from min/max coordinates + const x1 = ocr.x1 * imageWidth; + const x2 = ocr.x2 * imageWidth; + const x3 = ocr.x3 * imageWidth; + const x4 = ocr.x4 * imageWidth; + const y1 = ocr.y1 * imageHeight; + const y2 = ocr.y2 * imageHeight; + const y3 = ocr.y3 * imageHeight; + const y4 = ocr.y4 * imageHeight; + + return { + x1: Math.min(x1, x2, x3, x4), + x2: Math.max(x1, x2, x3, x4), + y1: Math.min(y1, y2, y3, y4), + y2: Math.max(y1, y2, y3, y4), + imageWidth, + imageHeight, + }; +}; + +export const getOcrBoundingBox = ( + ocrData: AssetOcrResponseDto[], + zoom: ZoomImageWheelState, + photoViewer: HTMLImageElement | null, +): OcrBoundingBox[] => { + if (photoViewer === null) { + return []; + } + + const imageWidth = photoViewer.naturalWidth; + const imageHeight = photoViewer.naturalHeight; + + const normalizedOcrData = ocrData.map((ocr) => ({ + ...ocrToCoordinates(ocr, imageWidth, imageHeight), + ocr, + })); + + const boxes = calculateBoundingBoxes(normalizedOcrData, zoom, photoViewer); + + return boxes.map((box) => ({ + top: box.top, + left: box.left, + width: box.width, + height: box.height, + text: box.item.ocr.text, + boxScore: box.item.ocr.boxScore, + textScore: box.item.ocr.textScore, + })); +}; diff --git a/web/src/lib/utils/people-utils.ts b/web/src/lib/utils/people-utils.ts index 5fb03842b8aca..22a61b0ba7896 100644 --- a/web/src/lib/utils/people-utils.ts +++ b/web/src/lib/utils/people-utils.ts @@ -2,74 +2,32 @@ import type { Faces } from '$lib/stores/people.store'; import { getAssetThumbnailUrl } from '$lib/utils'; import { AssetTypeEnum, type AssetFaceResponseDto } from '@immich/sdk'; import type { ZoomImageWheelState } from '@zoom-image/core'; - -const getContainedSize = (img: HTMLImageElement): { width: number; height: number } => { - const ratio = img.naturalWidth / img.naturalHeight; - let width = img.height * ratio; - let height = img.height; - if (width > img.width) { - width = img.width; - height = img.width / ratio; - } - return { width, height }; -}; - -export interface boundingBox { - top: number; - left: number; - width: number; - height: number; -} - -export const getBoundingBox = ( +import { calculateBoundingBoxes, type BoundingBox, type BoundingBoxCoordinates } from './bounding-box-utils'; + +/** + * Convert face data to normalized bounding box coordinates + */ +const faceToCoordinates = (face: Faces): BoundingBoxCoordinates => ({ + x1: face.boundingBoxX1, + x2: face.boundingBoxX2, + y1: face.boundingBoxY1, + y2: face.boundingBoxY2, + imageWidth: face.imageWidth, + imageHeight: face.imageHeight, +}); + +export const getFaceBoundingBox = ( faces: Faces[], zoom: ZoomImageWheelState, photoViewer: HTMLImageElement | null, -): boundingBox[] => { - const boxes: boundingBox[] = []; - +): BoundingBox[] => { if (photoViewer === null) { - return boxes; + return []; } - const clientHeight = photoViewer.clientHeight; - const clientWidth = photoViewer.clientWidth; - - const { width, height } = getContainedSize(photoViewer); - for (const face of faces) { - /* - * - * Create the coordinates of the box based on the displayed image. - * The coordinates must take into account margins due to the 'object-fit: contain;' css property of the photo-viewer. - * - */ - const coordinates = { - x1: - (width / face.imageWidth) * zoom.currentZoom * face.boundingBoxX1 + - ((clientWidth - width) / 2) * zoom.currentZoom + - zoom.currentPositionX, - x2: - (width / face.imageWidth) * zoom.currentZoom * face.boundingBoxX2 + - ((clientWidth - width) / 2) * zoom.currentZoom + - zoom.currentPositionX, - y1: - (height / face.imageHeight) * zoom.currentZoom * face.boundingBoxY1 + - ((clientHeight - height) / 2) * zoom.currentZoom + - zoom.currentPositionY, - y2: - (height / face.imageHeight) * zoom.currentZoom * face.boundingBoxY2 + - ((clientHeight - height) / 2) * zoom.currentZoom + - zoom.currentPositionY, - }; + const normalizedFaces = faces.map(faceToCoordinates); - boxes.push({ - top: Math.round(coordinates.y1), - left: Math.round(coordinates.x1), - width: Math.round(coordinates.x2 - coordinates.x1), - height: Math.round(coordinates.y2 - coordinates.y1), - }); - } - return boxes; + return calculateBoundingBoxes(normalizedFaces, zoom, photoViewer); }; export const zoomImageToBase64 = async (