diff --git a/web/src/lib/components/asset-viewer/ocr-bounding-box.svelte b/web/src/lib/components/asset-viewer/ocr-bounding-box.svelte
index 6f6caad0fc..ff8cd58186 100644
--- a/web/src/lib/components/asset-viewer/ocr-bounding-box.svelte
+++ b/web/src/lib/components/asset-viewer/ocr-bounding-box.svelte
@@ -1,6 +1,6 @@
{ocrBox.text}
diff --git a/web/src/lib/components/asset-viewer/photo-viewer.svelte b/web/src/lib/components/asset-viewer/photo-viewer.svelte
index 55c765ce22..299db0e2ba 100644
--- a/web/src/lib/components/asset-viewer/photo-viewer.svelte
+++ b/web/src/lib/components/asset-viewer/photo-viewer.svelte
@@ -14,7 +14,7 @@
import { SlideshowLook, SlideshowState, slideshowStore } from '$lib/stores/slideshow.store';
import { handlePromiseError } from '$lib/utils';
import { canCopyImageToClipboard, copyImageToClipboard } from '$lib/utils/asset-utils';
- import { getNaturalSize, scaleToFit, type ContentMetrics } from '$lib/utils/container-utils';
+ import { type ContentMetrics, getNaturalSize, scaleToFit } from '$lib/utils/container-utils';
import { handleError } from '$lib/utils/handle-error';
import { getOcrBoundingBoxes } from '$lib/utils/ocr-utils';
import { getBoundingBox } from '$lib/utils/people-utils';
@@ -73,12 +73,17 @@
}
const natural = getNaturalSize(assetViewerManager.imgRef);
- const scaled = scaleToFit(natural, container);
+ const client = { width: containerWidth, height: containerHeight };
+ const scaled = scaleToFit(natural, client);
+ const offsetX = (client.width - scaled.width) / 2;
+ const offsetY = (client.height - scaled.height) / 2;
+ const { currentZoom, currentPositionX, currentPositionY } = assetViewerManager.zoomState;
+
return {
- contentWidth: scaled.width,
- contentHeight: scaled.height,
- offsetX: 0,
- offsetY: 0,
+ contentWidth: scaled.width * currentZoom,
+ contentHeight: scaled.height * currentZoom,
+ offsetX: offsetX * currentZoom + currentPositionX,
+ offsetY: offsetY * currentZoom + currentPositionY,
};
});
diff --git a/web/src/lib/utils/ocr-utils.ts b/web/src/lib/utils/ocr-utils.ts
index 3da36cf57a..0f5006141c 100644
--- a/web/src/lib/utils/ocr-utils.ts
+++ b/web/src/lib/utils/ocr-utils.ts
@@ -1,18 +1,33 @@
import type { OcrBoundingBox } from '$lib/stores/ocr.svelte';
import type { ContentMetrics } from '$lib/utils/container-utils';
+import { clamp } from 'lodash-es';
export type Point = {
x: number;
y: number;
};
+const distance = (p1: Point, p2: Point) => Math.hypot(p2.x - p1.x, p2.y - p1.y);
+
export interface OcrBox {
id: string;
points: Point[];
text: string;
confidence: number;
+ isVertical: boolean;
}
+const CJK_PATTERN =
+ /[\u3000-\u303F\u3040-\u309F\u30A0-\u30FF\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF\uAC00-\uD7AF\uFF00-\uFFEF]/;
+
+const VERTICAL_ASPECT_RATIO = 1.5;
+
+const containsCjk = (text: string): boolean => CJK_PATTERN.test(text);
+
+const isVerticalText = (width: number, height: number, text: string): boolean => {
+ return height / width >= VERTICAL_ASPECT_RATIO && containsCjk(text);
+};
+
/**
* Calculate bounding box transform from OCR points. Result matrix can be used as input for css matrix3d.
* @param points - Array of 4 corner points of the bounding box
@@ -21,8 +36,6 @@ export interface OcrBox {
export const calculateBoundingBoxMatrix = (points: Point[]): { matrix: number[]; width: number; height: number } => {
const [topLeft, topRight, bottomRight, bottomLeft] = points;
- // Approximate width and height to prevent text distortion as much as possible
- const distance = (p1: Point, p2: Point) => Math.hypot(p2.x - p1.x, p2.y - p1.y);
const width = Math.max(distance(topLeft, topRight), distance(bottomLeft, bottomRight));
const height = Math.max(distance(topLeft, bottomLeft), distance(topRight, bottomRight));
@@ -55,6 +68,71 @@ export const calculateBoundingBoxMatrix = (points: Point[]): { matrix: number[];
return { matrix, width, height };
};
+const HORIZONTAL_PADDING = 16;
+const VERTICAL_PADDING = 8;
+const REFERENCE_FONT_SIZE = 100;
+const MIN_FONT_SIZE = 8;
+const MAX_FONT_SIZE = 96;
+const FALLBACK_FONT = `${REFERENCE_FONT_SIZE}px sans-serif`;
+
+let sharedCanvasContext: CanvasRenderingContext2D | null = null;
+let resolvedFont: string | undefined;
+
+const getCanvasContext = (): CanvasRenderingContext2D | null => {
+ if (sharedCanvasContext !== null) {
+ return sharedCanvasContext;
+ }
+ const canvas = document.createElement('canvas');
+ const context = canvas.getContext('2d');
+ if (!context) {
+ return null;
+ }
+ sharedCanvasContext = context;
+ return sharedCanvasContext;
+};
+
+const getReferenceFont = (): string => {
+ if (resolvedFont !== undefined) {
+ return resolvedFont;
+ }
+ const fontFamily = globalThis.getComputedStyle?.(document.documentElement).getPropertyValue('--font-sans').trim();
+ resolvedFont = fontFamily ? `${REFERENCE_FONT_SIZE}px ${fontFamily}` : FALLBACK_FONT;
+ return resolvedFont;
+};
+
+export const calculateFittedFontSize = (
+ text: string,
+ boxWidth: number,
+ boxHeight: number,
+ isVertical: boolean,
+): number => {
+ const availableWidth = boxWidth - HORIZONTAL_PADDING;
+ const availableHeight = boxHeight - VERTICAL_PADDING;
+
+ if (isVertical) {
+ const fontSize = Math.min(availableWidth, availableHeight / text.length);
+ return clamp(fontSize, MIN_FONT_SIZE, MAX_FONT_SIZE);
+ }
+
+ const context = getCanvasContext();
+ if (!context) {
+ return clamp((1.4 * availableWidth) / text.length, MIN_FONT_SIZE, MAX_FONT_SIZE);
+ }
+
+ // Unsupported in Safari iOS <16.6; falls back to default canvas font, giving less accurate but functional sizing
+ // eslint-disable-next-line tscompat/tscompat
+ context.font = getReferenceFont();
+
+ const metrics = context.measureText(text);
+ const measuredWidth = metrics.width;
+ const measuredHeight = metrics.actualBoundingBoxAscent + metrics.actualBoundingBoxDescent;
+
+ const scaleFromWidth = (availableWidth / measuredWidth) * REFERENCE_FONT_SIZE;
+ const scaleFromHeight = (availableHeight / measuredHeight) * REFERENCE_FONT_SIZE;
+
+ return clamp(Math.min(scaleFromWidth, scaleFromHeight), MIN_FONT_SIZE, MAX_FONT_SIZE);
+};
+
export const getOcrBoundingBoxes = (ocrData: OcrBoundingBox[], metrics: ContentMetrics): OcrBox[] => {
const boxes: OcrBox[] = [];
for (const ocr of ocrData) {
@@ -68,13 +146,26 @@ export const getOcrBoundingBoxes = (ocrData: OcrBoundingBox[], metrics: ContentM
y: point.y * metrics.contentHeight + metrics.offsetY,
}));
+ const boxWidth = Math.max(distance(points[0], points[1]), distance(points[3], points[2]));
+ const boxHeight = Math.max(distance(points[0], points[3]), distance(points[1], points[2]));
+
boxes.push({
id: ocr.id,
points,
text: ocr.text,
confidence: ocr.textScore,
+ isVertical: isVerticalText(boxWidth, boxHeight, ocr.text),
});
}
+ const rowThreshold = metrics.contentHeight * 0.02;
+ boxes.sort((a, b) => {
+ const yDifference = a.points[0].y - b.points[0].y;
+ if (Math.abs(yDifference) < rowThreshold) {
+ return a.points[0].x - b.points[0].x;
+ }
+ return yDifference;
+ });
+
return boxes;
};