mirror of
https://github.com/immich-app/immich.git
synced 2026-01-30 16:54:48 -08:00
Compare commits
1 Commits
feat/share
...
feat/pano-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0e627ba004 |
@@ -424,7 +424,6 @@
|
||||
const showOcrButton = $derived(
|
||||
$slideshowState === SlideshowState.None &&
|
||||
asset.type === AssetTypeEnum.Image &&
|
||||
!(asset.exifInfo?.projectionType === 'EQUIRECTANGULAR') &&
|
||||
!assetViewerManager.isShowEditor &&
|
||||
ocrManager.hasOcrData,
|
||||
);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
<script lang="ts">
|
||||
import type { OcrBox } from '$lib/utils/ocr-utils';
|
||||
import { calculateBoundingBoxDimensions } from '$lib/utils/ocr-utils';
|
||||
import { calculateBoundingBoxMatrix } from '$lib/utils/ocr-utils';
|
||||
|
||||
type Props = {
|
||||
ocrBox: OcrBox;
|
||||
@@ -8,28 +8,19 @@
|
||||
|
||||
let { ocrBox }: Props = $props();
|
||||
|
||||
const dimensions = $derived(calculateBoundingBoxDimensions(ocrBox.points));
|
||||
const dimensions = $derived(calculateBoundingBoxMatrix(ocrBox.points));
|
||||
|
||||
const transform = $derived(
|
||||
`translate(${dimensions.minX}px, ${dimensions.minY}px) rotate(${dimensions.rotation}deg) skew(${dimensions.skewX}deg, ${dimensions.skewY}deg)`,
|
||||
);
|
||||
|
||||
const transformOrigin = $derived(
|
||||
`${dimensions.centerX - dimensions.minX}px ${dimensions.centerY - dimensions.minY}px`,
|
||||
const transform = $derived(`matrix3d(${dimensions.matrix.join(',')})`);
|
||||
// Fits almost all strings within the box, depends on font family
|
||||
const fontSize = $derived(
|
||||
`max(var(--text-sm), min(var(--text-6xl), ${(1.4 * dimensions.width) / ocrBox.text.length}px))`,
|
||||
);
|
||||
</script>
|
||||
|
||||
<div class="absolute group left-0 top-0 pointer-events-none">
|
||||
<!-- Bounding box with CSS transforms -->
|
||||
<div class="absolute left-0 top-0">
|
||||
<div
|
||||
class="absolute border-2 border-blue-500 bg-blue-500/10 cursor-pointer pointer-events-auto transition-all group-hover:bg-blue-500/30 group-hover:border-blue-600 group-hover:border-[3px]"
|
||||
style="width: {dimensions.width}px; height: {dimensions.height}px; transform: {transform}; transform-origin: {transformOrigin};"
|
||||
></div>
|
||||
|
||||
<!-- Text overlay - always rendered but invisible, allows text selection and copy -->
|
||||
<div
|
||||
class="absolute flex items-center justify-center text-transparent text-sm px-2 py-1 pointer-events-auto cursor-text whitespace-pre-wrap wrap-break-word select-text group-hover:text-white group-hover:bg-black/75 group-hover:z-10"
|
||||
style="width: {dimensions.width}px; height: {dimensions.height}px; transform: {transform}; transform-origin: {transformOrigin};"
|
||||
class="absolute flex items-center justify-center text-transparent text-sm border-2 border-blue-500 bg-blue-500/10 px-2 py-1 pointer-events-auto cursor-text whitespace-pre-wrap wrap-break-word select-text transition-all hover:text-white hover:bg-black/60 hover:border-blue-600 hover:border-3"
|
||||
style="font-size: {fontSize}; width: {dimensions.width}px; height: {dimensions.height}px; transform: {transform}; transform-origin: 0 0;"
|
||||
>
|
||||
{ocrBox.text}
|
||||
</div>
|
||||
|
||||
@@ -2,8 +2,10 @@
|
||||
import { shortcuts } from '$lib/actions/shortcut';
|
||||
import AssetViewerEvents from '$lib/components/AssetViewerEvents.svelte';
|
||||
import { assetViewerManager } from '$lib/managers/asset-viewer-manager.svelte';
|
||||
import { ocrManager, type OcrBoundingBox } from '$lib/stores/ocr.svelte';
|
||||
import { boundingBoxesArray, type Faces } from '$lib/stores/people.store';
|
||||
import { alwaysLoadOriginalFile } from '$lib/stores/preferences.store';
|
||||
import { calculateBoundingBoxMatrix, getOcrBoundingBoxesAtSize, type Point } from '$lib/utils/ocr-utils';
|
||||
import {
|
||||
EquirectangularAdapter,
|
||||
Viewer,
|
||||
@@ -27,6 +29,17 @@
|
||||
strokeLinejoin: 'round',
|
||||
};
|
||||
|
||||
// Adapted as well as possible from classlist 'border-2 border-blue-500 bg-blue-500/10 hover:border-blue-600 hover:border-3'
|
||||
const OCR_BOX_SVG_STYLE = {
|
||||
fill: 'var(--color-blue-500)',
|
||||
fillOpacity: '0.1',
|
||||
stroke: 'var(--color-blue-500)',
|
||||
strokeWidth: '2px',
|
||||
};
|
||||
|
||||
const OCR_TOOLTIP_HTML_CLASS =
|
||||
'flex items-center justify-center text-white bg-black/50 cursor-text pointer-events-auto whitespace-pre-wrap wrap-break-word select-text';
|
||||
|
||||
type Props = {
|
||||
panorama: string | { source: string };
|
||||
originalPanorama?: string | { source: string };
|
||||
@@ -96,6 +109,59 @@
|
||||
}
|
||||
});
|
||||
|
||||
$effect(() => {
|
||||
updateOcrBoxes(ocrManager.showOverlay, ocrManager.data);
|
||||
});
|
||||
|
||||
/** Use updateOnly=true on zoom, pan, or resize. */
|
||||
const updateOcrBoxes = (showOverlay: boolean, ocrData: OcrBoundingBox[], updateOnly = false) => {
|
||||
if (!viewer || !viewer.state.textureData || !viewer.getPlugin(MarkersPlugin)) {
|
||||
return;
|
||||
}
|
||||
const markersPlugin = viewer.getPlugin<MarkersPlugin>(MarkersPlugin);
|
||||
if (!showOverlay) {
|
||||
markersPlugin.clearMarkers();
|
||||
return;
|
||||
}
|
||||
if (!updateOnly) {
|
||||
markersPlugin.clearMarkers();
|
||||
}
|
||||
|
||||
const boxes = getOcrBoundingBoxesAtSize(ocrData, {
|
||||
width: viewer.state.textureData.panoData.croppedWidth,
|
||||
height: viewer.state.textureData.panoData.croppedHeight,
|
||||
});
|
||||
|
||||
for (const [index, box] of boxes.entries()) {
|
||||
const points = box.points.map((p) => texturePointToViewerPoint(viewer, p));
|
||||
const { matrix, width, height } = calculateBoundingBoxMatrix(points);
|
||||
|
||||
const fontSize = (1.4 * width) / box.text.length; // fits almost all strings within the box, depends on font family
|
||||
const transform = `matrix3d(${matrix.join(',')})`;
|
||||
const content = `<div class="${OCR_TOOLTIP_HTML_CLASS}" style="font-size: ${fontSize}px; width: ${width}px; height: ${height}px; transform: ${transform}; transform-origin: 0 0;">${box.text}</div>`;
|
||||
|
||||
if (updateOnly) {
|
||||
markersPlugin.updateMarker({
|
||||
id: `box_${index}`,
|
||||
polygonPixels: box.points.map((b) => [b.x, b.y]),
|
||||
tooltip: { content },
|
||||
});
|
||||
} else {
|
||||
markersPlugin.addMarker({
|
||||
id: `box_${index}`,
|
||||
polygonPixels: box.points.map((b) => [b.x, b.y]),
|
||||
svgStyle: OCR_BOX_SVG_STYLE,
|
||||
tooltip: { content, trigger: 'click' },
|
||||
});
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const texturePointToViewerPoint = (viewer: Viewer, point: Point) => {
|
||||
const spherical = viewer.dataHelper.textureCoordsToSphericalCoords({ textureX: point.x, textureY: point.y });
|
||||
return viewer.dataHelper.sphericalCoordsToViewerCoords(spherical);
|
||||
};
|
||||
|
||||
const onZoom = () => {
|
||||
viewer?.animate({ zoom: assetViewerManager.zoom > 1 ? 50 : 83.3, speed: 250 });
|
||||
};
|
||||
@@ -160,7 +226,20 @@
|
||||
viewer.addEventListener(events.ZoomUpdatedEvent.type, zoomHandler, { passive: true });
|
||||
}
|
||||
|
||||
return () => viewer.removeEventListener(events.ZoomUpdatedEvent.type, zoomHandler);
|
||||
const onReadyHandler = () => updateOcrBoxes(ocrManager.showOverlay, ocrManager.data, false);
|
||||
const updateHandler = () => updateOcrBoxes(ocrManager.showOverlay, ocrManager.data, true);
|
||||
viewer.addEventListener(events.ReadyEvent.type, onReadyHandler);
|
||||
viewer.addEventListener(events.PositionUpdatedEvent.type, updateHandler);
|
||||
viewer.addEventListener(events.SizeUpdatedEvent.type, updateHandler);
|
||||
viewer.addEventListener(events.ZoomUpdatedEvent.type, updateHandler, { passive: true });
|
||||
|
||||
return () => {
|
||||
viewer.removeEventListener(events.ReadyEvent.type, onReadyHandler);
|
||||
viewer.removeEventListener(events.PositionUpdatedEvent.type, updateHandler);
|
||||
viewer.removeEventListener(events.SizeUpdatedEvent.type, updateHandler);
|
||||
viewer.removeEventListener(events.ZoomUpdatedEvent.type, updateHandler);
|
||||
viewer.removeEventListener(events.ZoomUpdatedEvent.type, zoomHandler);
|
||||
};
|
||||
});
|
||||
|
||||
onDestroy(() => {
|
||||
@@ -176,3 +255,25 @@
|
||||
|
||||
<svelte:document use:shortcuts={[{ shortcut: { key: 'z' }, onShortcut: onZoom, preventDefault: true }]} />
|
||||
<div class="h-full w-full mb-0" bind:this={container}></div>
|
||||
|
||||
<style>
|
||||
/* Reset the default tooltip styling */
|
||||
:global(.psv-tooltip) {
|
||||
top: 0 !important;
|
||||
left: 0 !important;
|
||||
background: none;
|
||||
box-shadow: none;
|
||||
width: 0;
|
||||
height: 0;
|
||||
}
|
||||
|
||||
:global(.psv-tooltip-content) {
|
||||
font: var(--font-normal);
|
||||
padding: 0;
|
||||
text-shadow: none;
|
||||
}
|
||||
|
||||
:global(.psv-tooltip-arrow) {
|
||||
display: none;
|
||||
}
|
||||
</style>
|
||||
|
||||
@@ -12,70 +12,58 @@ const getContainedSize = (img: HTMLImageElement): { width: number; height: numbe
|
||||
return { width, height };
|
||||
};
|
||||
|
||||
export type Point = {
|
||||
x: number;
|
||||
y: number;
|
||||
};
|
||||
|
||||
export interface OcrBox {
|
||||
id: string;
|
||||
points: { x: number; y: number }[];
|
||||
points: Point[];
|
||||
text: string;
|
||||
confidence: number;
|
||||
}
|
||||
|
||||
export interface BoundingBoxDimensions {
|
||||
minX: number;
|
||||
maxX: number;
|
||||
minY: number;
|
||||
maxY: number;
|
||||
width: number;
|
||||
height: number;
|
||||
centerX: number;
|
||||
centerY: number;
|
||||
rotation: number;
|
||||
skewX: number;
|
||||
skewY: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate bounding box dimensions and properties from OCR points
|
||||
* Calculate bounding box transform from OCR points. Result matrix can be used as input for css matrix3d.
|
||||
* @param points - Array of 4 corner points of the bounding box
|
||||
* @returns Dimensions, rotation, and skew values for the bounding box
|
||||
* @returns 4x4 matrix to transform the div with text onto the polygon defined by the corner points, and size to set on the source div.
|
||||
*/
|
||||
export const calculateBoundingBoxDimensions = (points: { x: number; y: number }[]): BoundingBoxDimensions => {
|
||||
export const calculateBoundingBoxMatrix = (points: Point[]): { matrix: number[]; width: number; height: number } => {
|
||||
const [topLeft, topRight, bottomRight, bottomLeft] = points;
|
||||
const minX = Math.min(...points.map(({ x }) => x));
|
||||
const maxX = Math.max(...points.map(({ x }) => x));
|
||||
const minY = Math.min(...points.map(({ y }) => y));
|
||||
const maxY = Math.max(...points.map(({ y }) => y));
|
||||
const width = maxX - minX;
|
||||
const height = maxY - minY;
|
||||
const centerX = (minX + maxX) / 2;
|
||||
const centerY = (minY + maxY) / 2;
|
||||
|
||||
// Calculate rotation angle from the bottom edge (bottomLeft to bottomRight)
|
||||
const rotation = Math.atan2(bottomRight.y - bottomLeft.y, bottomRight.x - bottomLeft.x) * (180 / Math.PI);
|
||||
// Approximate width and height to prevent text distortion as much as possible
|
||||
const distance = (p1: Point, p2: Point) => Math.hypot(p2.x - p1.x, p2.y - p1.y);
|
||||
const width = Math.max(distance(topLeft, topRight), distance(bottomLeft, bottomRight));
|
||||
const height = Math.max(distance(topLeft, bottomLeft), distance(topRight, bottomRight));
|
||||
|
||||
// Calculate skew angles to handle perspective distortion
|
||||
// SkewX: compare left and right edges
|
||||
const leftEdgeAngle = Math.atan2(bottomLeft.y - topLeft.y, bottomLeft.x - topLeft.x);
|
||||
const rightEdgeAngle = Math.atan2(bottomRight.y - topRight.y, bottomRight.x - topRight.x);
|
||||
const skewX = (rightEdgeAngle - leftEdgeAngle) * (180 / Math.PI);
|
||||
const dx1 = topRight.x - bottomRight.x;
|
||||
const dx2 = bottomLeft.x - bottomRight.x;
|
||||
const dx3 = topLeft.x - topRight.x + bottomRight.x - bottomLeft.x;
|
||||
|
||||
// SkewY: compare top and bottom edges
|
||||
const topEdgeAngle = Math.atan2(topRight.y - topLeft.y, topRight.x - topLeft.x);
|
||||
const bottomEdgeAngle = Math.atan2(bottomRight.y - bottomLeft.y, bottomRight.x - bottomLeft.x);
|
||||
const skewY = (bottomEdgeAngle - topEdgeAngle) * (180 / Math.PI);
|
||||
const dy1 = topRight.y - bottomRight.y;
|
||||
const dy2 = bottomLeft.y - bottomRight.y;
|
||||
const dy3 = topLeft.y - topRight.y + bottomRight.y - bottomLeft.y;
|
||||
|
||||
return {
|
||||
minX,
|
||||
maxX,
|
||||
minY,
|
||||
maxY,
|
||||
width,
|
||||
height,
|
||||
centerX,
|
||||
centerY,
|
||||
rotation,
|
||||
skewX,
|
||||
skewY,
|
||||
};
|
||||
const det = dx1 * dy2 - dx2 * dy1;
|
||||
const a13 = (dx3 * dy2 - dx2 * dy3) / det;
|
||||
const a23 = (dx1 * dy3 - dx3 * dy1) / det;
|
||||
|
||||
const a11 = (1 + a13) * topRight.x - topLeft.x;
|
||||
const a21 = (1 + a23) * bottomLeft.x - topLeft.x;
|
||||
|
||||
const a12 = (1 + a13) * topRight.y - topLeft.y;
|
||||
const a22 = (1 + a23) * bottomLeft.y - topLeft.y;
|
||||
|
||||
// prettier-ignore
|
||||
const matrix = [
|
||||
a11 / width, a12 / width, 0, a13 / width,
|
||||
a21 / height, a22 / height, 0, a23 / height,
|
||||
0, 0, 1, 0,
|
||||
topLeft.x, topLeft.y, 0, 1,
|
||||
];
|
||||
|
||||
return { matrix, width, height };
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -87,18 +75,32 @@ export const getOcrBoundingBoxes = (
|
||||
zoom: ZoomImageWheelState,
|
||||
photoViewer: HTMLImageElement | null,
|
||||
): OcrBox[] => {
|
||||
const boxes: OcrBox[] = [];
|
||||
|
||||
if (photoViewer === null || !photoViewer.naturalWidth || !photoViewer.naturalHeight) {
|
||||
return boxes;
|
||||
return [];
|
||||
}
|
||||
|
||||
const clientHeight = photoViewer.clientHeight;
|
||||
const clientWidth = photoViewer.clientWidth;
|
||||
const { width, height } = getContainedSize(photoViewer);
|
||||
|
||||
const imageWidth = photoViewer.naturalWidth;
|
||||
const imageHeight = photoViewer.naturalHeight;
|
||||
const offset = {
|
||||
x: ((clientWidth - width) / 2) * zoom.currentZoom + zoom.currentPositionX,
|
||||
y: ((clientHeight - height) / 2) * zoom.currentZoom + zoom.currentPositionY,
|
||||
};
|
||||
|
||||
return getOcrBoundingBoxesAtSize(
|
||||
ocrData,
|
||||
{ width: width * zoom.currentZoom, height: height * zoom.currentZoom },
|
||||
offset,
|
||||
);
|
||||
};
|
||||
|
||||
export const getOcrBoundingBoxesAtSize = (
|
||||
ocrData: OcrBoundingBox[],
|
||||
targetSize: { width: number; height: number },
|
||||
offset?: Point,
|
||||
) => {
|
||||
const boxes: OcrBox[] = [];
|
||||
|
||||
for (const ocr of ocrData) {
|
||||
// Convert normalized coordinates (0-1) to actual pixel positions
|
||||
@@ -109,14 +111,8 @@ export const getOcrBoundingBoxes = (
|
||||
{ x: ocr.x3, y: ocr.y3 },
|
||||
{ x: ocr.x4, y: ocr.y4 },
|
||||
].map((point) => ({
|
||||
x:
|
||||
(width / imageWidth) * zoom.currentZoom * point.x * imageWidth +
|
||||
((clientWidth - width) / 2) * zoom.currentZoom +
|
||||
zoom.currentPositionX,
|
||||
y:
|
||||
(height / imageHeight) * zoom.currentZoom * point.y * imageHeight +
|
||||
((clientHeight - height) / 2) * zoom.currentZoom +
|
||||
zoom.currentPositionY,
|
||||
x: targetSize.width * point.x + (offset?.x ?? 0),
|
||||
y: targetSize.height * point.y + (offset?.y ?? 0),
|
||||
}));
|
||||
|
||||
boxes.push({
|
||||
|
||||
Reference in New Issue
Block a user