mirror of
https://github.com/immich-app/immich.git
synced 2026-03-12 21:42:54 -07:00
feat(web): improve OCR overlay text fitting, reactivity, and accessibility
- Precise font sizing using canvas measureText instead of character-count heuristic - Fix overlay repositioning on viewport resize by computing metrics from reactive state instead of DOM reads - Fix animation delay on resize by using transition-colors instead of transition-all - Add keyboard accessibility: OCR boxes are focusable via Tab with reading-order sort - Show text on focus (same styling as hover) with proper ARIA attributes
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import type { OcrBox } from '$lib/utils/ocr-utils';
|
import type { OcrBox } from '$lib/utils/ocr-utils';
|
||||||
import { calculateBoundingBoxMatrix } from '$lib/utils/ocr-utils';
|
import { calculateBoundingBoxMatrix, calculateFittedFontSize } from '$lib/utils/ocr-utils';
|
||||||
|
|
||||||
type Props = {
|
type Props = {
|
||||||
ocrBox: OcrBox;
|
ocrBox: OcrBox;
|
||||||
@@ -11,16 +11,35 @@
|
|||||||
const dimensions = $derived(calculateBoundingBoxMatrix(ocrBox.points));
|
const dimensions = $derived(calculateBoundingBoxMatrix(ocrBox.points));
|
||||||
|
|
||||||
const transform = $derived(`matrix3d(${dimensions.matrix.join(',')})`);
|
const transform = $derived(`matrix3d(${dimensions.matrix.join(',')})`);
|
||||||
// Fits almost all strings within the box, depends on font family
|
|
||||||
const fontSize = $derived(
|
const fontSize = $derived(
|
||||||
`max(var(--text-sm), min(var(--text-6xl), ${(1.4 * dimensions.width) / ocrBox.text.length}px))`,
|
calculateFittedFontSize(ocrBox.text, dimensions.width, dimensions.height, ocrBox.verticalMode) + 'px',
|
||||||
);
|
);
|
||||||
|
|
||||||
|
const verticalStyle = $derived.by(() => {
|
||||||
|
switch (ocrBox.verticalMode) {
|
||||||
|
case 'cjk': {
|
||||||
|
return ' writing-mode: vertical-rl;';
|
||||||
|
}
|
||||||
|
case 'rotated': {
|
||||||
|
return ' writing-mode: vertical-rl; text-orientation: sideways;';
|
||||||
|
}
|
||||||
|
default: {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<div class="absolute left-0 top-0">
|
<div class="absolute left-0 top-0">
|
||||||
<div
|
<div
|
||||||
class="absolute flex items-center justify-center text-transparent text-sm border-2 border-blue-500 bg-blue-500/10 px-2 py-1 pointer-events-auto cursor-text whitespace-pre-wrap wrap-break-word select-text transition-all hover:text-white hover:bg-black/60 hover:border-blue-600 hover:border-3"
|
class="absolute flex items-center justify-center text-transparent border-2 border-blue-500 bg-blue-500/10 pointer-events-auto cursor-text select-text transition-colors hover:z-1 hover:text-white hover:bg-black/60 hover:border-blue-600 hover:border-3 focus:z-1 focus:text-white focus:bg-black/60 focus:border-blue-600 focus:border-3 focus:outline-none {ocrBox.verticalMode ===
|
||||||
style="font-size: {fontSize}; width: {dimensions.width}px; height: {dimensions.height}px; transform: {transform}; transform-origin: 0 0;"
|
'none'
|
||||||
|
? 'px-2 py-1 whitespace-nowrap'
|
||||||
|
: 'px-1 py-2'}"
|
||||||
|
style="font-size: {fontSize}; width: {dimensions.width}px; height: {dimensions.height}px; transform: {transform}; transform-origin: 0 0;{verticalStyle}"
|
||||||
|
tabindex="0"
|
||||||
|
role="button"
|
||||||
|
aria-label={ocrBox.text}
|
||||||
>
|
>
|
||||||
{ocrBox.text}
|
{ocrBox.text}
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -73,7 +73,8 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
const natural = getNaturalSize(assetViewerManager.imgRef);
|
const natural = getNaturalSize(assetViewerManager.imgRef);
|
||||||
const scaled = scaleToFit(natural, container);
|
const scaled = scaleToFit(natural, { width: containerWidth, height: containerHeight });
|
||||||
|
|
||||||
return {
|
return {
|
||||||
contentWidth: scaled.width,
|
contentWidth: scaled.width,
|
||||||
contentHeight: scaled.height,
|
contentHeight: scaled.height,
|
||||||
|
|||||||
@@ -1,18 +1,38 @@
|
|||||||
import type { OcrBoundingBox } from '$lib/stores/ocr.svelte';
|
import type { OcrBoundingBox } from '$lib/stores/ocr.svelte';
|
||||||
import type { ContentMetrics } from '$lib/utils/container-utils';
|
import type { ContentMetrics } from '$lib/utils/container-utils';
|
||||||
|
import { clamp } from 'lodash-es';
|
||||||
|
|
||||||
export type Point = {
|
export type Point = {
|
||||||
x: number;
|
x: number;
|
||||||
y: number;
|
y: number;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const distance = (p1: Point, p2: Point) => Math.hypot(p2.x - p1.x, p2.y - p1.y);
|
||||||
|
|
||||||
|
export type VerticalMode = 'none' | 'cjk' | 'rotated';
|
||||||
|
|
||||||
export interface OcrBox {
|
export interface OcrBox {
|
||||||
id: string;
|
id: string;
|
||||||
points: Point[];
|
points: Point[];
|
||||||
text: string;
|
text: string;
|
||||||
confidence: number;
|
confidence: number;
|
||||||
|
verticalMode: VerticalMode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const CJK_PATTERN =
|
||||||
|
/[\u3000-\u303F\u3040-\u309F\u30A0-\u30FF\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF\uAC00-\uD7AF\uFF00-\uFFEF]/;
|
||||||
|
|
||||||
|
const VERTICAL_ASPECT_RATIO = 1.5;
|
||||||
|
|
||||||
|
const containsCjk = (text: string): boolean => CJK_PATTERN.test(text);
|
||||||
|
|
||||||
|
const getVerticalMode = (width: number, height: number, text: string): VerticalMode => {
|
||||||
|
if (height / width < VERTICAL_ASPECT_RATIO) {
|
||||||
|
return 'none';
|
||||||
|
}
|
||||||
|
return containsCjk(text) ? 'cjk' : 'rotated';
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculate bounding box transform from OCR points. Result matrix can be used as input for css matrix3d.
|
* Calculate bounding box transform from OCR points. Result matrix can be used as input for css matrix3d.
|
||||||
* @param points - Array of 4 corner points of the bounding box
|
* @param points - Array of 4 corner points of the bounding box
|
||||||
@@ -21,8 +41,6 @@ export interface OcrBox {
|
|||||||
export const calculateBoundingBoxMatrix = (points: Point[]): { matrix: number[]; width: number; height: number } => {
|
export const calculateBoundingBoxMatrix = (points: Point[]): { matrix: number[]; width: number; height: number } => {
|
||||||
const [topLeft, topRight, bottomRight, bottomLeft] = points;
|
const [topLeft, topRight, bottomRight, bottomLeft] = points;
|
||||||
|
|
||||||
// Approximate width and height to prevent text distortion as much as possible
|
|
||||||
const distance = (p1: Point, p2: Point) => Math.hypot(p2.x - p1.x, p2.y - p1.y);
|
|
||||||
const width = Math.max(distance(topLeft, topRight), distance(bottomLeft, bottomRight));
|
const width = Math.max(distance(topLeft, topRight), distance(bottomLeft, bottomRight));
|
||||||
const height = Math.max(distance(topLeft, bottomLeft), distance(topRight, bottomRight));
|
const height = Math.max(distance(topLeft, bottomLeft), distance(topRight, bottomRight));
|
||||||
|
|
||||||
@@ -55,6 +73,96 @@ export const calculateBoundingBoxMatrix = (points: Point[]): { matrix: number[];
|
|||||||
return { matrix, width, height };
|
return { matrix, width, height };
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const BORDER_SIZE = 4;
|
||||||
|
const HORIZONTAL_PADDING = 16 + BORDER_SIZE;
|
||||||
|
const VERTICAL_PADDING = 8 + BORDER_SIZE;
|
||||||
|
const REFERENCE_FONT_SIZE = 100;
|
||||||
|
const MIN_FONT_SIZE = 8;
|
||||||
|
const MAX_FONT_SIZE = 96;
|
||||||
|
const FALLBACK_FONT = `${REFERENCE_FONT_SIZE}px sans-serif`;
|
||||||
|
|
||||||
|
let sharedCanvasContext: CanvasRenderingContext2D | null = null;
|
||||||
|
let resolvedFont: string | undefined;
|
||||||
|
|
||||||
|
const getCanvasContext = (): CanvasRenderingContext2D | null => {
|
||||||
|
if (sharedCanvasContext !== null) {
|
||||||
|
return sharedCanvasContext;
|
||||||
|
}
|
||||||
|
const canvas = document.createElement('canvas');
|
||||||
|
const context = canvas.getContext('2d');
|
||||||
|
if (!context) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
sharedCanvasContext = context;
|
||||||
|
return sharedCanvasContext;
|
||||||
|
};
|
||||||
|
|
||||||
|
const getReferenceFont = (): string => {
|
||||||
|
if (resolvedFont !== undefined) {
|
||||||
|
return resolvedFont;
|
||||||
|
}
|
||||||
|
const fontFamily = globalThis.getComputedStyle?.(document.documentElement).getPropertyValue('--font-sans').trim();
|
||||||
|
resolvedFont = fontFamily ? `${REFERENCE_FONT_SIZE}px ${fontFamily}` : FALLBACK_FONT;
|
||||||
|
return resolvedFont;
|
||||||
|
};
|
||||||
|
|
||||||
|
export const calculateFittedFontSize = (
|
||||||
|
text: string,
|
||||||
|
boxWidth: number,
|
||||||
|
boxHeight: number,
|
||||||
|
verticalMode: VerticalMode,
|
||||||
|
): number => {
|
||||||
|
const isVertical = verticalMode === 'cjk' || verticalMode === 'rotated';
|
||||||
|
const availableWidth = boxWidth - (isVertical ? VERTICAL_PADDING : HORIZONTAL_PADDING);
|
||||||
|
const availableHeight = boxHeight - (isVertical ? HORIZONTAL_PADDING : VERTICAL_PADDING);
|
||||||
|
|
||||||
|
const context = getCanvasContext();
|
||||||
|
|
||||||
|
if (verticalMode === 'cjk') {
|
||||||
|
if (!context) {
|
||||||
|
const fontSize = Math.min(availableWidth, availableHeight / text.length);
|
||||||
|
return clamp(fontSize, MIN_FONT_SIZE, MAX_FONT_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
// eslint-disable-next-line tscompat/tscompat
|
||||||
|
context.font = getReferenceFont();
|
||||||
|
|
||||||
|
let maxCharWidth = 0;
|
||||||
|
let totalCharHeight = 0;
|
||||||
|
for (const character of text) {
|
||||||
|
const metrics = context.measureText(character);
|
||||||
|
const charWidth = metrics.width;
|
||||||
|
const charHeight = metrics.actualBoundingBoxAscent + metrics.actualBoundingBoxDescent;
|
||||||
|
maxCharWidth = Math.max(maxCharWidth, charWidth);
|
||||||
|
totalCharHeight += Math.max(charWidth, charHeight);
|
||||||
|
}
|
||||||
|
|
||||||
|
const scaleFromWidth = (availableWidth / maxCharWidth) * REFERENCE_FONT_SIZE;
|
||||||
|
const scaleFromHeight = (availableHeight / totalCharHeight) * REFERENCE_FONT_SIZE;
|
||||||
|
return clamp(Math.min(scaleFromWidth, scaleFromHeight), MIN_FONT_SIZE, MAX_FONT_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
const fitWidth = verticalMode === 'rotated' ? availableHeight : availableWidth;
|
||||||
|
const fitHeight = verticalMode === 'rotated' ? availableWidth : availableHeight;
|
||||||
|
|
||||||
|
if (!context) {
|
||||||
|
return clamp((1.4 * fitWidth) / text.length, MIN_FONT_SIZE, MAX_FONT_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unsupported in Safari iOS <16.6; falls back to default canvas font, giving less accurate but functional sizing
|
||||||
|
// eslint-disable-next-line tscompat/tscompat
|
||||||
|
context.font = getReferenceFont();
|
||||||
|
|
||||||
|
const metrics = context.measureText(text);
|
||||||
|
const measuredWidth = metrics.width;
|
||||||
|
const measuredHeight = metrics.actualBoundingBoxAscent + metrics.actualBoundingBoxDescent;
|
||||||
|
|
||||||
|
const scaleFromWidth = (fitWidth / measuredWidth) * REFERENCE_FONT_SIZE;
|
||||||
|
const scaleFromHeight = (fitHeight / measuredHeight) * REFERENCE_FONT_SIZE;
|
||||||
|
|
||||||
|
return clamp(Math.min(scaleFromWidth, scaleFromHeight), MIN_FONT_SIZE, MAX_FONT_SIZE);
|
||||||
|
};
|
||||||
|
|
||||||
export const getOcrBoundingBoxes = (ocrData: OcrBoundingBox[], metrics: ContentMetrics): OcrBox[] => {
|
export const getOcrBoundingBoxes = (ocrData: OcrBoundingBox[], metrics: ContentMetrics): OcrBox[] => {
|
||||||
const boxes: OcrBox[] = [];
|
const boxes: OcrBox[] = [];
|
||||||
for (const ocr of ocrData) {
|
for (const ocr of ocrData) {
|
||||||
@@ -68,13 +176,26 @@ export const getOcrBoundingBoxes = (ocrData: OcrBoundingBox[], metrics: ContentM
|
|||||||
y: point.y * metrics.contentHeight + metrics.offsetY,
|
y: point.y * metrics.contentHeight + metrics.offsetY,
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
const boxWidth = Math.max(distance(points[0], points[1]), distance(points[3], points[2]));
|
||||||
|
const boxHeight = Math.max(distance(points[0], points[3]), distance(points[1], points[2]));
|
||||||
|
|
||||||
boxes.push({
|
boxes.push({
|
||||||
id: ocr.id,
|
id: ocr.id,
|
||||||
points,
|
points,
|
||||||
text: ocr.text,
|
text: ocr.text,
|
||||||
confidence: ocr.textScore,
|
confidence: ocr.textScore,
|
||||||
|
verticalMode: getVerticalMode(boxWidth, boxHeight, ocr.text),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const rowThreshold = metrics.contentHeight * 0.02;
|
||||||
|
boxes.sort((a, b) => {
|
||||||
|
const yDifference = a.points[0].y - b.points[0].y;
|
||||||
|
if (Math.abs(yDifference) < rowThreshold) {
|
||||||
|
return a.points[0].x - b.points[0].x;
|
||||||
|
}
|
||||||
|
return yDifference;
|
||||||
|
});
|
||||||
|
|
||||||
return boxes;
|
return boxes;
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user