Compare commits

...

1 Commits

Author SHA1 Message Date
Mees Frensel
0e627ba004 feat(web): show ocr text boxes in panoramas 2026-01-30 15:54:25 +01:00
4 changed files with 170 additions and 83 deletions

View File

@@ -424,7 +424,6 @@
const showOcrButton = $derived(
$slideshowState === SlideshowState.None &&
asset.type === AssetTypeEnum.Image &&
!(asset.exifInfo?.projectionType === 'EQUIRECTANGULAR') &&
!assetViewerManager.isShowEditor &&
ocrManager.hasOcrData,
);

View File

@@ -1,6 +1,6 @@
<script lang="ts">
import type { OcrBox } from '$lib/utils/ocr-utils';
import { calculateBoundingBoxDimensions } from '$lib/utils/ocr-utils';
import { calculateBoundingBoxMatrix } from '$lib/utils/ocr-utils';
type Props = {
ocrBox: OcrBox;
@@ -8,28 +8,19 @@
let { ocrBox }: Props = $props();
const dimensions = $derived(calculateBoundingBoxDimensions(ocrBox.points));
const dimensions = $derived(calculateBoundingBoxMatrix(ocrBox.points));
const transform = $derived(
`translate(${dimensions.minX}px, ${dimensions.minY}px) rotate(${dimensions.rotation}deg) skew(${dimensions.skewX}deg, ${dimensions.skewY}deg)`,
);
const transformOrigin = $derived(
`${dimensions.centerX - dimensions.minX}px ${dimensions.centerY - dimensions.minY}px`,
const transform = $derived(`matrix3d(${dimensions.matrix.join(',')})`);
// Fits almost all strings within the box, depends on font family
const fontSize = $derived(
`max(var(--text-sm), min(var(--text-6xl), ${(1.4 * dimensions.width) / ocrBox.text.length}px))`,
);
</script>
<div class="absolute group left-0 top-0 pointer-events-none">
<!-- Bounding box with CSS transforms -->
<div class="absolute left-0 top-0">
<div
class="absolute border-2 border-blue-500 bg-blue-500/10 cursor-pointer pointer-events-auto transition-all group-hover:bg-blue-500/30 group-hover:border-blue-600 group-hover:border-[3px]"
style="width: {dimensions.width}px; height: {dimensions.height}px; transform: {transform}; transform-origin: {transformOrigin};"
></div>
<!-- Text overlay - always rendered but invisible, allows text selection and copy -->
<div
class="absolute flex items-center justify-center text-transparent text-sm px-2 py-1 pointer-events-auto cursor-text whitespace-pre-wrap wrap-break-word select-text group-hover:text-white group-hover:bg-black/75 group-hover:z-10"
style="width: {dimensions.width}px; height: {dimensions.height}px; transform: {transform}; transform-origin: {transformOrigin};"
class="absolute flex items-center justify-center text-transparent text-sm border-2 border-blue-500 bg-blue-500/10 px-2 py-1 pointer-events-auto cursor-text whitespace-pre-wrap wrap-break-word select-text transition-all hover:text-white hover:bg-black/60 hover:border-blue-600 hover:border-3"
style="font-size: {fontSize}; width: {dimensions.width}px; height: {dimensions.height}px; transform: {transform}; transform-origin: 0 0;"
>
{ocrBox.text}
</div>

View File

@@ -2,8 +2,10 @@
import { shortcuts } from '$lib/actions/shortcut';
import AssetViewerEvents from '$lib/components/AssetViewerEvents.svelte';
import { assetViewerManager } from '$lib/managers/asset-viewer-manager.svelte';
import { ocrManager, type OcrBoundingBox } from '$lib/stores/ocr.svelte';
import { boundingBoxesArray, type Faces } from '$lib/stores/people.store';
import { alwaysLoadOriginalFile } from '$lib/stores/preferences.store';
import { calculateBoundingBoxMatrix, getOcrBoundingBoxesAtSize, type Point } from '$lib/utils/ocr-utils';
import {
EquirectangularAdapter,
Viewer,
@@ -27,6 +29,17 @@
strokeLinejoin: 'round',
};
// Adapted as well as possible from classlist 'border-2 border-blue-500 bg-blue-500/10 hover:border-blue-600 hover:border-3'
const OCR_BOX_SVG_STYLE = {
fill: 'var(--color-blue-500)',
fillOpacity: '0.1',
stroke: 'var(--color-blue-500)',
strokeWidth: '2px',
};
const OCR_TOOLTIP_HTML_CLASS =
'flex items-center justify-center text-white bg-black/50 cursor-text pointer-events-auto whitespace-pre-wrap wrap-break-word select-text';
type Props = {
panorama: string | { source: string };
originalPanorama?: string | { source: string };
@@ -96,6 +109,59 @@
}
});
$effect(() => {
updateOcrBoxes(ocrManager.showOverlay, ocrManager.data);
});
/** Use updateOnly=true on zoom, pan, or resize. */
const updateOcrBoxes = (showOverlay: boolean, ocrData: OcrBoundingBox[], updateOnly = false) => {
if (!viewer || !viewer.state.textureData || !viewer.getPlugin(MarkersPlugin)) {
return;
}
const markersPlugin = viewer.getPlugin<MarkersPlugin>(MarkersPlugin);
if (!showOverlay) {
markersPlugin.clearMarkers();
return;
}
if (!updateOnly) {
markersPlugin.clearMarkers();
}
const boxes = getOcrBoundingBoxesAtSize(ocrData, {
width: viewer.state.textureData.panoData.croppedWidth,
height: viewer.state.textureData.panoData.croppedHeight,
});
for (const [index, box] of boxes.entries()) {
const points = box.points.map((p) => texturePointToViewerPoint(viewer, p));
const { matrix, width, height } = calculateBoundingBoxMatrix(points);
const fontSize = (1.4 * width) / box.text.length; // fits almost all strings within the box, depends on font family
const transform = `matrix3d(${matrix.join(',')})`;
const content = `<div class="${OCR_TOOLTIP_HTML_CLASS}" style="font-size: ${fontSize}px; width: ${width}px; height: ${height}px; transform: ${transform}; transform-origin: 0 0;">${box.text}</div>`;
if (updateOnly) {
markersPlugin.updateMarker({
id: `box_${index}`,
polygonPixels: box.points.map((b) => [b.x, b.y]),
tooltip: { content },
});
} else {
markersPlugin.addMarker({
id: `box_${index}`,
polygonPixels: box.points.map((b) => [b.x, b.y]),
svgStyle: OCR_BOX_SVG_STYLE,
tooltip: { content, trigger: 'click' },
});
}
}
};
const texturePointToViewerPoint = (viewer: Viewer, point: Point) => {
const spherical = viewer.dataHelper.textureCoordsToSphericalCoords({ textureX: point.x, textureY: point.y });
return viewer.dataHelper.sphericalCoordsToViewerCoords(spherical);
};
const onZoom = () => {
viewer?.animate({ zoom: assetViewerManager.zoom > 1 ? 50 : 83.3, speed: 250 });
};
@@ -160,7 +226,20 @@
viewer.addEventListener(events.ZoomUpdatedEvent.type, zoomHandler, { passive: true });
}
return () => viewer.removeEventListener(events.ZoomUpdatedEvent.type, zoomHandler);
const onReadyHandler = () => updateOcrBoxes(ocrManager.showOverlay, ocrManager.data, false);
const updateHandler = () => updateOcrBoxes(ocrManager.showOverlay, ocrManager.data, true);
viewer.addEventListener(events.ReadyEvent.type, onReadyHandler);
viewer.addEventListener(events.PositionUpdatedEvent.type, updateHandler);
viewer.addEventListener(events.SizeUpdatedEvent.type, updateHandler);
viewer.addEventListener(events.ZoomUpdatedEvent.type, updateHandler, { passive: true });
return () => {
viewer.removeEventListener(events.ReadyEvent.type, onReadyHandler);
viewer.removeEventListener(events.PositionUpdatedEvent.type, updateHandler);
viewer.removeEventListener(events.SizeUpdatedEvent.type, updateHandler);
viewer.removeEventListener(events.ZoomUpdatedEvent.type, updateHandler);
viewer.removeEventListener(events.ZoomUpdatedEvent.type, zoomHandler);
};
});
onDestroy(() => {
@@ -176,3 +255,25 @@
<svelte:document use:shortcuts={[{ shortcut: { key: 'z' }, onShortcut: onZoom, preventDefault: true }]} />
<div class="h-full w-full mb-0" bind:this={container}></div>
<style>
/* Reset the default tooltip styling */
:global(.psv-tooltip) {
top: 0 !important;
left: 0 !important;
background: none;
box-shadow: none;
width: 0;
height: 0;
}
:global(.psv-tooltip-content) {
font: var(--font-normal);
padding: 0;
text-shadow: none;
}
:global(.psv-tooltip-arrow) {
display: none;
}
</style>

View File

@@ -12,70 +12,58 @@ const getContainedSize = (img: HTMLImageElement): { width: number; height: numbe
return { width, height };
};
export type Point = {
x: number;
y: number;
};
export interface OcrBox {
id: string;
points: { x: number; y: number }[];
points: Point[];
text: string;
confidence: number;
}
export interface BoundingBoxDimensions {
minX: number;
maxX: number;
minY: number;
maxY: number;
width: number;
height: number;
centerX: number;
centerY: number;
rotation: number;
skewX: number;
skewY: number;
}
/**
* Calculate bounding box dimensions and properties from OCR points
* Calculate bounding box transform from OCR points. Result matrix can be used as input for css matrix3d.
* @param points - Array of 4 corner points of the bounding box
* @returns Dimensions, rotation, and skew values for the bounding box
* @returns 4x4 matrix to transform the div with text onto the polygon defined by the corner points, and size to set on the source div.
*/
export const calculateBoundingBoxDimensions = (points: { x: number; y: number }[]): BoundingBoxDimensions => {
export const calculateBoundingBoxMatrix = (points: Point[]): { matrix: number[]; width: number; height: number } => {
const [topLeft, topRight, bottomRight, bottomLeft] = points;
const minX = Math.min(...points.map(({ x }) => x));
const maxX = Math.max(...points.map(({ x }) => x));
const minY = Math.min(...points.map(({ y }) => y));
const maxY = Math.max(...points.map(({ y }) => y));
const width = maxX - minX;
const height = maxY - minY;
const centerX = (minX + maxX) / 2;
const centerY = (minY + maxY) / 2;
// Calculate rotation angle from the bottom edge (bottomLeft to bottomRight)
const rotation = Math.atan2(bottomRight.y - bottomLeft.y, bottomRight.x - bottomLeft.x) * (180 / Math.PI);
// Approximate width and height to prevent text distortion as much as possible
const distance = (p1: Point, p2: Point) => Math.hypot(p2.x - p1.x, p2.y - p1.y);
const width = Math.max(distance(topLeft, topRight), distance(bottomLeft, bottomRight));
const height = Math.max(distance(topLeft, bottomLeft), distance(topRight, bottomRight));
// Calculate skew angles to handle perspective distortion
// SkewX: compare left and right edges
const leftEdgeAngle = Math.atan2(bottomLeft.y - topLeft.y, bottomLeft.x - topLeft.x);
const rightEdgeAngle = Math.atan2(bottomRight.y - topRight.y, bottomRight.x - topRight.x);
const skewX = (rightEdgeAngle - leftEdgeAngle) * (180 / Math.PI);
const dx1 = topRight.x - bottomRight.x;
const dx2 = bottomLeft.x - bottomRight.x;
const dx3 = topLeft.x - topRight.x + bottomRight.x - bottomLeft.x;
// SkewY: compare top and bottom edges
const topEdgeAngle = Math.atan2(topRight.y - topLeft.y, topRight.x - topLeft.x);
const bottomEdgeAngle = Math.atan2(bottomRight.y - bottomLeft.y, bottomRight.x - bottomLeft.x);
const skewY = (bottomEdgeAngle - topEdgeAngle) * (180 / Math.PI);
const dy1 = topRight.y - bottomRight.y;
const dy2 = bottomLeft.y - bottomRight.y;
const dy3 = topLeft.y - topRight.y + bottomRight.y - bottomLeft.y;
return {
minX,
maxX,
minY,
maxY,
width,
height,
centerX,
centerY,
rotation,
skewX,
skewY,
};
const det = dx1 * dy2 - dx2 * dy1;
const a13 = (dx3 * dy2 - dx2 * dy3) / det;
const a23 = (dx1 * dy3 - dx3 * dy1) / det;
const a11 = (1 + a13) * topRight.x - topLeft.x;
const a21 = (1 + a23) * bottomLeft.x - topLeft.x;
const a12 = (1 + a13) * topRight.y - topLeft.y;
const a22 = (1 + a23) * bottomLeft.y - topLeft.y;
// prettier-ignore
const matrix = [
a11 / width, a12 / width, 0, a13 / width,
a21 / height, a22 / height, 0, a23 / height,
0, 0, 1, 0,
topLeft.x, topLeft.y, 0, 1,
];
return { matrix, width, height };
};
/**
@@ -87,18 +75,32 @@ export const getOcrBoundingBoxes = (
zoom: ZoomImageWheelState,
photoViewer: HTMLImageElement | null,
): OcrBox[] => {
const boxes: OcrBox[] = [];
if (photoViewer === null || !photoViewer.naturalWidth || !photoViewer.naturalHeight) {
return boxes;
return [];
}
const clientHeight = photoViewer.clientHeight;
const clientWidth = photoViewer.clientWidth;
const { width, height } = getContainedSize(photoViewer);
const imageWidth = photoViewer.naturalWidth;
const imageHeight = photoViewer.naturalHeight;
const offset = {
x: ((clientWidth - width) / 2) * zoom.currentZoom + zoom.currentPositionX,
y: ((clientHeight - height) / 2) * zoom.currentZoom + zoom.currentPositionY,
};
return getOcrBoundingBoxesAtSize(
ocrData,
{ width: width * zoom.currentZoom, height: height * zoom.currentZoom },
offset,
);
};
export const getOcrBoundingBoxesAtSize = (
ocrData: OcrBoundingBox[],
targetSize: { width: number; height: number },
offset?: Point,
) => {
const boxes: OcrBox[] = [];
for (const ocr of ocrData) {
// Convert normalized coordinates (0-1) to actual pixel positions
@@ -109,14 +111,8 @@ export const getOcrBoundingBoxes = (
{ x: ocr.x3, y: ocr.y3 },
{ x: ocr.x4, y: ocr.y4 },
].map((point) => ({
x:
(width / imageWidth) * zoom.currentZoom * point.x * imageWidth +
((clientWidth - width) / 2) * zoom.currentZoom +
zoom.currentPositionX,
y:
(height / imageHeight) * zoom.currentZoom * point.y * imageHeight +
((clientHeight - height) / 2) * zoom.currentZoom +
zoom.currentPositionY,
x: targetSize.width * point.x + (offset?.x ?? 0),
y: targetSize.height * point.y + (offset?.y ?? 0),
}));
boxes.push({