Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions i18n/en.json
Original file line number Diff line number Diff line change
Expand Up @@ -1150,6 +1150,7 @@
"hide_all_people": "Hide all people",
"hide_gallery": "Hide gallery",
"hide_named_person": "Hide person {name}",
"hide_ocr_boxes": "Hide OCR boxes",
"hide_password": "Hide password",
"hide_person": "Hide person",
"hide_unnamed_people": "Hide unnamed people",
Expand Down Expand Up @@ -1260,6 +1261,7 @@
"link_to_oauth": "Link to OAuth",
"linked_oauth_account": "Linked OAuth account",
"list": "List",
"load_ocr_data": "Load OCR data",
"loading": "Loading",
"loading_search_results_failed": "Loading search results failed",
"local": "Local",
Expand Down Expand Up @@ -1440,6 +1442,7 @@
"no_locked_photos_message": "Photos and videos in the locked folder are hidden and won't show up as you browse or search your library.",
"no_name": "No Name",
"no_notifications": "No notifications",
"no_ocr_data": "No OCR data available",
"no_people_found": "No matching people found",
"no_places": "No places",
"no_remote_assets_found": "No remote assets found with this checksum",
Expand All @@ -1465,6 +1468,7 @@
"obtainium_configurator": "Obtainium Configurator",
"obtainium_configurator_instructions": "Use Obtainium to install and update the Android app directly from Immich GitHub's release. Create an API key and select a variant to create your Obtainium configuration link",
"ocr": "OCR",
"ocr_text": "OCR Text",
"official_immich_resources": "Official Immich Resources",
"offline": "Offline",
"offset": "Offset",
Expand Down Expand Up @@ -1941,6 +1945,7 @@
"show_in_timeline_setting_description": "Show photos and videos from this user in your timeline",
"show_keyboard_shortcuts": "Show keyboard shortcuts",
"show_metadata": "Show metadata",
"show_ocr_boxes": "Show OCR boxes",
"show_or_hide_info": "Show or hide info",
"show_password": "Show password",
"show_person_options": "Show person options",
Expand Down
85 changes: 84 additions & 1 deletion web/src/lib/components/asset-viewer/detail-panel.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import { authManager } from '$lib/managers/auth-manager.svelte';
import AssetChangeDateModal from '$lib/modals/AssetChangeDateModal.svelte';
import { isFaceEditMode } from '$lib/stores/face-edit.svelte';
import { ocrBoxesArray } from '$lib/stores/ocr.store';
import { boundingBoxesArray } from '$lib/stores/people.store';
import { locale } from '$lib/stores/preferences.store';
import { featureFlags } from '$lib/stores/system-config-manager.svelte';
Expand All @@ -19,7 +20,7 @@
import { getMetadataSearchQuery } from '$lib/utils/metadata-search';
import { fromISODateTime, fromISODateTimeUTC, toTimelineAsset } from '$lib/utils/timeline-util';
import { getParentPath } from '$lib/utils/tree-utils';
import { AssetMediaSize, getAssetInfo, type AlbumResponseDto, type AssetResponseDto } from '@immich/sdk';
import { AssetMediaSize, getAssetInfo, getAssetOcr, type AlbumResponseDto, type AssetResponseDto } from '@immich/sdk';
import { Icon, IconButton, LoadingSpinner, modalManager } from '@immich/ui';
import {
mdiCalendar,
Expand All @@ -31,6 +32,7 @@
mdiInformationOutline,
mdiPencil,
mdiPlus,
mdiTextBox,
} from '@mdi/js';
import { DateTime } from 'luxon';
import { t } from 'svelte-i18n';
Expand All @@ -51,6 +53,8 @@

let showAssetPath = $state(false);
let showEditFaces = $state(false);
let showOcrTexts = $state(false);
let ocrData = $state<Awaited<ReturnType<typeof getAssetOcr>>>([]);
let isOwner = $derived($user?.id === asset.ownerId);
let people = $derived(asset.people || []);
let unassignedFaces = $derived(asset.unassignedFaces || []);
Expand Down Expand Up @@ -80,6 +84,8 @@
if (asset.id !== previousId) {
showEditFaces = false;
previousId = asset.id;
showOcrTexts = false;
ocrData = [];
}
});

Expand Down Expand Up @@ -115,6 +121,25 @@

await modalManager.show(AssetChangeDateModal, { asset: toTimelineAsset(asset), initialDate: dateTime });
};

const toggleOcrTexts = async () => {
showOcrTexts = !showOcrTexts;
if (showOcrTexts && ocrData.length === 0) {
try {
ocrData = await getAssetOcr({ id: asset.id });
ocrData.reverse(); // make the texts appear in the top-down order
$ocrBoxesArray = ocrData;
} catch (error) {
console.error('Failed to load OCR data:', error);
}
} else if (showOcrTexts) {
// If data is already loaded, display all boxes
$ocrBoxesArray = ocrData;
} else {
// Clear when hidden
$ocrBoxesArray = [];
}
};
</script>

<section class="relative p-2">
Expand Down Expand Up @@ -258,6 +283,64 @@
</section>
{/if}

{#if !authManager.isSharedLink}
<section class="px-4 pt-4 text-sm">
<div class="flex h-10 w-full items-center justify-between">
<h2 class="uppercase">{$t('ocr_text')}</h2>
<div class="flex gap-2 items-center">
{#if ocrData.length > 0}
<IconButton
aria-label={showOcrTexts ? $t('hide_ocr_boxes') : $t('show_ocr_boxes')}
icon={showOcrTexts ? mdiEyeOff : mdiEye}
size="medium"
shape="round"
color="secondary"
variant="ghost"
onclick={toggleOcrTexts}
/>
{:else}
<IconButton
aria-label={$t('load_ocr_data')}
icon={mdiTextBox}
size="medium"
shape="round"
color="secondary"
variant="ghost"
onclick={toggleOcrTexts}
/>
{/if}
</div>
</div>

{#if showOcrTexts}
<div class="mt-2" transition:slide={{ duration: 250 }}>
{#if ocrData.length > 0}
<div class="flex flex-col">
{#each ocrData as ocr, index (ocr.id)}
<button
type="button"
class="text-start p-2 rounded hover:bg-gray-100 dark:hover:bg-gray-800 transition-colors select-text"
onfocus={() => ($ocrBoxesArray = [ocr])}
onblur={() => ($ocrBoxesArray = [])}
onmouseover={() => ($ocrBoxesArray = [ocr])}
onmouseleave={() => ($ocrBoxesArray = ocrData)}
>
<div class="flex items-start gap-2">
<div class="flex-1 min-w-0">
<p class="break-words text-sm">{ocr.text}</p>
</div>
</div>
</button>
{/each}
</div>
{:else}
<p class="text-sm text-gray-500 dark:text-gray-400">{$t('no_ocr_data')}</p>
{/if}
</div>
{/if}
</section>
{/if}

<div class="px-4 py-4">
{#if asset.exifInfo}
<div class="flex h-10 w-full items-center justify-between text-sm">
Expand Down
56 changes: 54 additions & 2 deletions web/src/lib/components/asset-viewer/photo-viewer.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,16 @@
import type { TimelineAsset } from '$lib/managers/timeline-manager/types';
import { photoViewerImgElement } from '$lib/stores/assets-store.svelte';
import { isFaceEditMode } from '$lib/stores/face-edit.svelte';
import { ocrBoxesArray } from '$lib/stores/ocr.store';
import { boundingBoxesArray } from '$lib/stores/people.store';
import { alwaysLoadOriginalFile } from '$lib/stores/preferences.store';
import { SlideshowLook, SlideshowState, slideshowLookCssMapping, slideshowStore } from '$lib/stores/slideshow.store';
import { photoZoomState } from '$lib/stores/zoom-image.store';
import { getAssetOriginalUrl, getAssetThumbnailUrl, handlePromiseError } from '$lib/utils';
import { canCopyImageToClipboard, copyImageToClipboard, isWebCompatibleImage } from '$lib/utils/asset-utils';
import { handleError } from '$lib/utils/handle-error';
import { getBoundingBox } from '$lib/utils/people-utils';
import { getOcrBoundingBox } from '$lib/utils/ocr-utils';
import { getFaceBoundingBox } from '$lib/utils/people-utils';
import { cancelImageUrl } from '$lib/utils/sw-messaging';
import { getAltText } from '$lib/utils/thumbnail-util';
import { toTimelineAsset } from '$lib/utils/timeline-util';
Expand Down Expand Up @@ -69,6 +71,7 @@

onDestroy(() => {
$boundingBoxesArray = [];
$ocrBoxesArray = [];
});

const preload = (targetSize: AssetMediaSize | 'original', preloadAssets?: TimelineAsset[]) => {
Expand Down Expand Up @@ -201,6 +204,14 @@

let containerWidth = $state(0);
let containerHeight = $state(0);

// // Recompute box positions whenever container size changes to handle layout shifts
const ocrDisplayBoxes = $derived.by(() => {
// create explicit dependencies on container dimensions
void containerWidth;
void containerHeight;
return getOcrBoundingBox($ocrBoxesArray, $photoZoomState, $photoViewerImgElement);
});
</script>

<svelte:document
Expand Down Expand Up @@ -254,13 +265,54 @@
: slideshowLookCssMapping[$slideshowLook]}"
draggable="false"
/>
<!-- Face bounding boxes -->
<!-- eslint-disable-next-line svelte/require-each-key -->
{#each getBoundingBox($boundingBoxesArray, $photoZoomState, $photoViewerImgElement) as boundingbox}
{#each getFaceBoundingBox($boundingBoxesArray, $photoZoomState, $photoViewerImgElement) as boundingbox}
<div
class="absolute border-solid border-white border-3 rounded-lg"
style="top: {boundingbox.top}px; left: {boundingbox.left}px; height: {boundingbox.height}px; width: {boundingbox.width}px;"
></div>
{/each}

<!-- OCR text bounding boxes -->
{#each ocrDisplayBoxes as ocrbox}
<div
class="absolute border-solid border-yellow-400 border-2 rounded"
style="top: {ocrbox.top}px; left: {ocrbox.left}px; height: {ocrbox.height}px; width: {ocrbox.width}px;"
title="{ocrbox.text} (box: {(ocrbox.boxScore * 100).toFixed(1)}%, text: {(ocrbox.textScore * 100).toFixed(
1,
)}%)"
></div>
{/each}

<!-- OCR overlay mask for focused boxes -->
{#if $ocrBoxesArray.length === 1}
{@const focusedBox = ocrDisplayBoxes[0]}
{#if focusedBox}
<!-- Top mask -->
<div
class="absolute inset-x-0 top-0 pointer-events-none bg-black/60"
style="height: {focusedBox.top}px;"
></div>
<!-- Bottom mask -->
<div
class="absolute inset-x-0 bottom-0 pointer-events-none bg-black/60"
style="height: {containerHeight - focusedBox.top - focusedBox.height}px;"
></div>
<!-- Left mask -->
<div
class="absolute left-0 pointer-events-none bg-black/60"
style="top: {focusedBox.top}px; width: {focusedBox.left}px; height: {focusedBox.height}px;"
></div>
<!-- Right mask -->
<div
class="absolute right-0 pointer-events-none bg-black/60"
style="top: {focusedBox.top}px; width: {containerWidth -
focusedBox.left -
focusedBox.width}px; height: {focusedBox.height}px;"
></div>
{/if}
{/if}
</div>

{#if isFaceEditMode.value}
Expand Down
4 changes: 4 additions & 0 deletions web/src/lib/stores/ocr.store.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import type { AssetOcrResponseDto } from '@immich/sdk';
import { writable } from 'svelte/store';

export const ocrBoxesArray = writable<AssetOcrResponseDto[]>([]);
84 changes: 84 additions & 0 deletions web/src/lib/utils/bounding-box-utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import type { ZoomImageWheelState } from '@zoom-image/core';

const getContainedSize = (img: HTMLImageElement): { width: number; height: number } => {
const ratio = img.naturalWidth / img.naturalHeight;
let width = img.height * ratio;
let height = img.height;
if (width > img.width) {
width = img.width;
height = img.width / ratio;
}
return { width, height };
};

export interface BoundingBox {
top: number;
left: number;
width: number;
height: number;
}

export interface BoundingBoxCoordinates {
x1: number;
x2: number;
y1: number;
y2: number;
imageWidth: number;
imageHeight: number;
}

/**
* Calculate display bounding boxes with zoom and pan support
* @param items Array of items with bounding box coordinates
* @param zoom Current zoom state
* @param photoViewer The image element
* @returns Array of calculated bounding boxes ready for display
*/
export const calculateBoundingBoxes = <T extends BoundingBoxCoordinates>(
items: T[],
zoom: ZoomImageWheelState,
photoViewer: HTMLImageElement | null,
): (BoundingBox & { item: T })[] => {
const boxes: (BoundingBox & { item: T })[] = [];

if (photoViewer === null) {
return boxes;
}

const clientHeight = photoViewer.clientHeight;
const clientWidth = photoViewer.clientWidth;
const { width, height } = getContainedSize(photoViewer);

for (const item of items) {
// Create the coordinates of the box based on the displayed image.
// The coordinates must take into account margins due to the 'object-fit: contain;' css property of the photo-viewer.
const coordinates = {
x1:
(width / item.imageWidth) * zoom.currentZoom * item.x1 +
((clientWidth - width) / 2) * zoom.currentZoom +
zoom.currentPositionX,
x2:
(width / item.imageWidth) * zoom.currentZoom * item.x2 +
((clientWidth - width) / 2) * zoom.currentZoom +
zoom.currentPositionX,
y1:
(height / item.imageHeight) * zoom.currentZoom * item.y1 +
((clientHeight - height) / 2) * zoom.currentZoom +
zoom.currentPositionY,
y2:
(height / item.imageHeight) * zoom.currentZoom * item.y2 +
((clientHeight - height) / 2) * zoom.currentZoom +
zoom.currentPositionY,
};

boxes.push({
top: Math.round(coordinates.y1),
left: Math.round(coordinates.x1),
width: Math.round(coordinates.x2 - coordinates.x1),
height: Math.round(coordinates.y2 - coordinates.y1),
item,
});
}

return boxes;
};
Loading
Loading