From ec041538d1779881c871d5c7ff66bc3f0169802a Mon Sep 17 00:00:00 2001 From: midzelis Date: Tue, 3 Mar 2026 19:42:24 +0000 Subject: [PATCH] feat(web): improve OCR overlay text fitting, reactivity, and accessibility - Precise font sizing using canvas measureText instead of character-count heuristic - Fix overlay repositioning on viewport resize by computing metrics from reactive state instead of DOM reads - Fix animation delay on resize by using transition-colors instead of transition-all - Add keyboard accessibility: OCR boxes are focusable via Tab with reading-order sort - Show text on focus (same styling as hover) with proper ARIA attributes --- .../asset-viewer/ocr-bounding-box.svelte | 29 +++- .../asset-viewer/photo-viewer.svelte | 3 +- web/src/lib/utils/ocr-utils.ts | 125 +++++++++++++++++- 3 files changed, 149 insertions(+), 8 deletions(-) diff --git a/web/src/lib/components/asset-viewer/ocr-bounding-box.svelte b/web/src/lib/components/asset-viewer/ocr-bounding-box.svelte index 6f6caad0fc..d5551b9cc5 100644 --- a/web/src/lib/components/asset-viewer/ocr-bounding-box.svelte +++ b/web/src/lib/components/asset-viewer/ocr-bounding-box.svelte @@ -1,6 +1,6 @@
{ocrBox.text}
diff --git a/web/src/lib/components/asset-viewer/photo-viewer.svelte b/web/src/lib/components/asset-viewer/photo-viewer.svelte index 55c765ce22..4a6a02cb4a 100644 --- a/web/src/lib/components/asset-viewer/photo-viewer.svelte +++ b/web/src/lib/components/asset-viewer/photo-viewer.svelte @@ -73,7 +73,8 @@ } const natural = getNaturalSize(assetViewerManager.imgRef); - const scaled = scaleToFit(natural, container); + const scaled = scaleToFit(natural, { width: containerWidth, height: containerHeight }); + return { contentWidth: scaled.width, contentHeight: scaled.height, diff --git a/web/src/lib/utils/ocr-utils.ts b/web/src/lib/utils/ocr-utils.ts index 3da36cf57a..c483eb9551 100644 --- a/web/src/lib/utils/ocr-utils.ts +++ b/web/src/lib/utils/ocr-utils.ts @@ -1,18 +1,38 @@ import type { OcrBoundingBox } from '$lib/stores/ocr.svelte'; import type { ContentMetrics } from '$lib/utils/container-utils'; +import { clamp } from 'lodash-es'; export type Point = { x: number; y: number; }; +const distance = (p1: Point, p2: Point) => Math.hypot(p2.x - p1.x, p2.y - p1.y); + +export type VerticalMode = 'none' | 'cjk' | 'rotated'; + export interface OcrBox { id: string; points: Point[]; text: string; confidence: number; + verticalMode: VerticalMode; } +const CJK_PATTERN = + /[\u3000-\u303F\u3040-\u309F\u30A0-\u30FF\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF\uAC00-\uD7AF\uFF00-\uFFEF]/; + +const VERTICAL_ASPECT_RATIO = 1.5; + +const containsCjk = (text: string): boolean => CJK_PATTERN.test(text); + +const getVerticalMode = (width: number, height: number, text: string): VerticalMode => { + if (height / width < VERTICAL_ASPECT_RATIO) { + return 'none'; + } + return containsCjk(text) ? 'cjk' : 'rotated'; +}; + /** * Calculate bounding box transform from OCR points. Result matrix can be used as input for css matrix3d. * @param points - Array of 4 corner points of the bounding box @@ -21,8 +41,6 @@ export interface OcrBox { export const calculateBoundingBoxMatrix = (points: Point[]): { matrix: number[]; width: number; height: number } => { const [topLeft, topRight, bottomRight, bottomLeft] = points; - // Approximate width and height to prevent text distortion as much as possible - const distance = (p1: Point, p2: Point) => Math.hypot(p2.x - p1.x, p2.y - p1.y); const width = Math.max(distance(topLeft, topRight), distance(bottomLeft, bottomRight)); const height = Math.max(distance(topLeft, bottomLeft), distance(topRight, bottomRight)); @@ -55,6 +73,96 @@ export const calculateBoundingBoxMatrix = (points: Point[]): { matrix: number[]; return { matrix, width, height }; }; +const BORDER_SIZE = 4; +const HORIZONTAL_PADDING = 16 + BORDER_SIZE; +const VERTICAL_PADDING = 8 + BORDER_SIZE; +const REFERENCE_FONT_SIZE = 100; +const MIN_FONT_SIZE = 8; +const MAX_FONT_SIZE = 96; +const FALLBACK_FONT = `${REFERENCE_FONT_SIZE}px sans-serif`; + +let sharedCanvasContext: CanvasRenderingContext2D | null = null; +let resolvedFont: string | undefined; + +const getCanvasContext = (): CanvasRenderingContext2D | null => { + if (sharedCanvasContext !== null) { + return sharedCanvasContext; + } + const canvas = document.createElement('canvas'); + const context = canvas.getContext('2d'); + if (!context) { + return null; + } + sharedCanvasContext = context; + return sharedCanvasContext; +}; + +const getReferenceFont = (): string => { + if (resolvedFont !== undefined) { + return resolvedFont; + } + const fontFamily = globalThis.getComputedStyle?.(document.documentElement).getPropertyValue('--font-sans').trim(); + resolvedFont = fontFamily ? `${REFERENCE_FONT_SIZE}px ${fontFamily}` : FALLBACK_FONT; + return resolvedFont; +}; + +export const calculateFittedFontSize = ( + text: string, + boxWidth: number, + boxHeight: number, + verticalMode: VerticalMode, +): number => { + const isVertical = verticalMode === 'cjk' || verticalMode === 'rotated'; + const availableWidth = boxWidth - (isVertical ? VERTICAL_PADDING : HORIZONTAL_PADDING); + const availableHeight = boxHeight - (isVertical ? HORIZONTAL_PADDING : VERTICAL_PADDING); + + const context = getCanvasContext(); + + if (verticalMode === 'cjk') { + if (!context) { + const fontSize = Math.min(availableWidth, availableHeight / text.length); + return clamp(fontSize, MIN_FONT_SIZE, MAX_FONT_SIZE); + } + + // eslint-disable-next-line tscompat/tscompat + context.font = getReferenceFont(); + + let maxCharWidth = 0; + let totalCharHeight = 0; + for (const character of text) { + const metrics = context.measureText(character); + const charWidth = metrics.width; + const charHeight = metrics.actualBoundingBoxAscent + metrics.actualBoundingBoxDescent; + maxCharWidth = Math.max(maxCharWidth, charWidth); + totalCharHeight += Math.max(charWidth, charHeight); + } + + const scaleFromWidth = (availableWidth / maxCharWidth) * REFERENCE_FONT_SIZE; + const scaleFromHeight = (availableHeight / totalCharHeight) * REFERENCE_FONT_SIZE; + return clamp(Math.min(scaleFromWidth, scaleFromHeight), MIN_FONT_SIZE, MAX_FONT_SIZE); + } + + const fitWidth = verticalMode === 'rotated' ? availableHeight : availableWidth; + const fitHeight = verticalMode === 'rotated' ? availableWidth : availableHeight; + + if (!context) { + return clamp((1.4 * fitWidth) / text.length, MIN_FONT_SIZE, MAX_FONT_SIZE); + } + + // Unsupported in Safari iOS <16.6; falls back to default canvas font, giving less accurate but functional sizing + // eslint-disable-next-line tscompat/tscompat + context.font = getReferenceFont(); + + const metrics = context.measureText(text); + const measuredWidth = metrics.width; + const measuredHeight = metrics.actualBoundingBoxAscent + metrics.actualBoundingBoxDescent; + + const scaleFromWidth = (fitWidth / measuredWidth) * REFERENCE_FONT_SIZE; + const scaleFromHeight = (fitHeight / measuredHeight) * REFERENCE_FONT_SIZE; + + return clamp(Math.min(scaleFromWidth, scaleFromHeight), MIN_FONT_SIZE, MAX_FONT_SIZE); +}; + export const getOcrBoundingBoxes = (ocrData: OcrBoundingBox[], metrics: ContentMetrics): OcrBox[] => { const boxes: OcrBox[] = []; for (const ocr of ocrData) { @@ -68,13 +176,26 @@ export const getOcrBoundingBoxes = (ocrData: OcrBoundingBox[], metrics: ContentM y: point.y * metrics.contentHeight + metrics.offsetY, })); + const boxWidth = Math.max(distance(points[0], points[1]), distance(points[3], points[2])); + const boxHeight = Math.max(distance(points[0], points[3]), distance(points[1], points[2])); + boxes.push({ id: ocr.id, points, text: ocr.text, confidence: ocr.textScore, + verticalMode: getVerticalMode(boxWidth, boxHeight, ocr.text), }); } + const rowThreshold = metrics.contentHeight * 0.02; + boxes.sort((a, b) => { + const yDifference = a.points[0].y - b.points[0].y; + if (Math.abs(yDifference) < rowThreshold) { + return a.points[0].x - b.points[0].x; + } + return yDifference; + }); + return boxes; };