Compare commits

...

2 Commits

Author SHA1 Message Date
Padhai_Kaneer df383c1ead fix(server): face region coordinates parsing (#29333) 2026-06-29 17:05:20 +02:00
Santo Shakil af2efda310 fix(mobile): apply exif orientation to android raw photos (#29337) 2026-06-29 10:59:09 -04:00
7 changed files with 249 additions and 21 deletions
+1
View File
@@ -7,6 +7,7 @@ project(native_buffer LANGUAGES C)
add_library(native_buffer SHARED
src/main/cpp/native_buffer.c
src/main/cpp/native_image.c
)
target_link_libraries(native_buffer jnigraphics)
@@ -0,0 +1,109 @@
#include <jni.h>
#include <stdlib.h>
#include <stdint.h>
#include <android/bitmap.h>
// Cache-friendly block size for the tiled rotation (in pixels). 32x32 uint32 = 4KB, fits L1.
#define TILE 32
// EXIF orientation values (androidx.exifinterface.media.ExifInterface.ORIENTATION_*).
enum {
ORIENTATION_FLIP_HORIZONTAL = 2,
ORIENTATION_ROTATE_180 = 3,
ORIENTATION_FLIP_VERTICAL = 4,
ORIENTATION_TRANSPOSE = 5,
ORIENTATION_ROTATE_90 = 6,
ORIENTATION_TRANSVERSE = 7,
ORIENTATION_ROTATE_270 = 8,
};
// The orientations that swap width and height. Must stay in sync with affine_for's dim usage.
static int swaps_dims(int o) {
return o == ORIENTATION_ROTATE_90 || o == ORIENTATION_ROTATE_270 ||
o == ORIENTATION_TRANSPOSE || o == ORIENTATION_TRANSVERSE;
}
// A source pixel (sx, sy) maps to destination index base + sx*stepX + sy*stepY, where dw is the
// destination width. This affine form covers all 8 EXIF orientations and matches the pixel layout
// of Bitmap.createBitmap(src, matrixForExifOrientation(o)). int64_t so it stays correct on
// armeabi-v7a (32-bit long) regardless of how large MAX_RAW_DECODE_PIXELS grows.
static void affine_for(int o, int sw, int sh, int dw, int64_t *base, int64_t *stepX, int64_t *stepY) {
switch (o) {
case ORIENTATION_ROTATE_90: *base = sh - 1; *stepX = dw; *stepY = -1; break;
case ORIENTATION_ROTATE_270: *base = (int64_t) (sw - 1) * dw; *stepX = -dw; *stepY = 1; break;
case ORIENTATION_ROTATE_180: *base = (int64_t) (sh - 1) * dw + (sw - 1); *stepX = -1; *stepY = -dw; break;
case ORIENTATION_FLIP_HORIZONTAL: *base = sw - 1; *stepX = -1; *stepY = dw; break;
case ORIENTATION_FLIP_VERTICAL: *base = (int64_t) (sh - 1) * dw; *stepX = 1; *stepY = -dw; break;
case ORIENTATION_TRANSPOSE: *base = 0; *stepX = dw; *stepY = 1; break;
case ORIENTATION_TRANSVERSE: *base = (int64_t) (sw - 1) * dw + (sh - 1); *stepX = -dw; *stepY = -1; break;
default: *base = 0; *stepX = 1; *stepY = dw; break;
}
}
// Copy each source pixel (whole uint32, so channel order/premult is irrelevant) to its rotated
// destination, walking TILE x TILE blocks so the scattered writes of a 90/270 transpose stay
// cache-resident. dst is densely packed (rowBytes == dw*4, no padding), which the affine math relies on.
static void rotate_tiled(const uint8_t *src, int srcStride, uint32_t *dst,
int sw, int sh, int64_t base, int64_t stepX, int64_t stepY) {
for (int ty = 0; ty < sh; ty += TILE) {
int yEnd = ty + TILE < sh ? ty + TILE : sh;
for (int tx = 0; tx < sw; tx += TILE) {
int xEnd = tx + TILE < sw ? tx + TILE : sw;
for (int sy = ty; sy < yEnd; sy++) {
const uint32_t *srcRow = (const uint32_t *) (src + (size_t) sy * srcStride);
int64_t idx = base + (int64_t) sy * stepY + (int64_t) tx * stepX;
for (int sx = tx; sx < xEnd; sx++) {
dst[idx] = srcRow[sx];
idx += stepX;
}
}
}
}
}
// Rotates an RGBA_8888 bitmap to the given EXIF orientation into a freshly malloc'd buffer (free it
// via NativeBuffer.free). Fills outInfo with {width, height, rowBytes} and returns the buffer
// address, or 0 if the bitmap can't be handled (e.g. a non-8888 format) so the caller can fall back.
JNIEXPORT jlong JNICALL
Java_app_alextran_immich_NativeImage_rotate(
JNIEnv *env, jclass clazz, jobject bitmap, jint orientation, jintArray outInfo) {
AndroidBitmapInfo info;
if (AndroidBitmap_getInfo(env, bitmap, &info) != ANDROID_BITMAP_RESULT_SUCCESS) {
return 0;
}
if (info.format != ANDROID_BITMAP_FORMAT_RGBA_8888) {
return 0;
}
int sw = (int) info.width;
int sh = (int) info.height;
int dw = swaps_dims(orientation) ? sh : sw;
int dh = swaps_dims(orientation) ? sw : sh;
uint32_t *dst = (uint32_t *) malloc((size_t) dw * dh * 4);
if (dst == NULL) {
return 0;
}
void *srcPixels = NULL;
if (AndroidBitmap_lockPixels(env, bitmap, &srcPixels) != ANDROID_BITMAP_RESULT_SUCCESS) {
free(dst);
return 0;
}
int64_t base, stepX, stepY;
affine_for(orientation, sw, sh, dw, &base, &stepX, &stepY);
rotate_tiled((const uint8_t *) srcPixels, (int) info.stride, dst, sw, sh, base, stepX, stepY);
AndroidBitmap_unlockPixels(env, bitmap);
jint dims[3] = {dw, dh, dw * 4};
(*env)->SetIntArrayRegion(env, outInfo, 0, 3, dims);
// Keep ownership in C until the buffer is safely handed back: if outInfo was somehow too small,
// SetIntArrayRegion left a pending exception and Kotlin will never receive (or free) dst.
if ((*env)->ExceptionCheck(env)) {
free(dst);
return 0;
}
return (jlong) dst;
}
@@ -0,0 +1,19 @@
package app.alextran.immich
import android.graphics.Bitmap
object NativeImage {
init {
// rotate() is compiled into the native_buffer shared lib (which already links jnigraphics).
System.loadLibrary("native_buffer")
}
/**
* Rotates an RGBA_8888 [bitmap] to the given EXIF [orientation], writing the result into a freshly
* malloc'd native buffer. Returns the buffer address (free it with [NativeBuffer.free]) and fills
* [outInfo] with {width, height, rowBytes}. Returns 0 when the bitmap can't be handled (e.g. a
* non-8888 config) so the caller can fall back.
*/
@JvmStatic
external fun rotate(bitmap: Bitmap, orientation: Int, outInfo: IntArray): Long
}
@@ -12,7 +12,9 @@ import android.provider.MediaStore.Images
import android.provider.MediaStore.Video
import android.util.Size
import androidx.annotation.RequiresApi
import androidx.exifinterface.media.ExifInterface
import app.alextran.immich.NativeBuffer
import app.alextran.immich.NativeImage
import kotlin.math.*
import java.io.IOException
import java.util.concurrent.Executors
@@ -181,35 +183,88 @@ class LocalImagesImpl(context: Context) : LocalImageApi {
val id = assetId.toLong()
signal.throwIfCanceled()
val bitmap = if (isVideo) {
decodeVideoThumbnail(id, size, signal)
} else {
decodeImage(id, size, signal)
}
try {
signal.throwIfCanceled()
val res = bitmap.toNativeBuffer()
signal.throwIfCanceled()
val res = if (isVideo) {
decodeVideoThumbnail(id, size, signal).toNativeBuffer()
} else {
val (bitmap, orientation) = decodeImage(id, size, signal)
signal.throwIfCanceled()
if (orientation == ExifInterface.ORIENTATION_NORMAL || orientation == ExifInterface.ORIENTATION_UNDEFINED) {
bitmap.toNativeBuffer()
} else {
rotateToNativeBuffer(bitmap, orientation, signal)
}
}
// Don't re-check cancellation here: res owns a malloc'd buffer, and bailing to CANCELLED would
// orphan it. Deliver it; Dart frees the buffer itself if the request was cancelled meanwhile.
callback(Result.success(res))
} catch (e: Exception) {
callback(if (e is OperationCanceledException) CANCELLED else Result.failure(e))
}
}
private fun decodeImage(id: Long, size: Size, signal: CancellationSignal): Bitmap {
// Returns the decoded bitmap plus the EXIF orientation that still needs applying. Only Q+ raw
// decodes come back unrotated (ImageDecoder / loadThumbnail skip EXIF for raw like DNG); every
// other path already orients itself, so it reports ORIENTATION_NORMAL.
private fun decodeImage(id: Long, size: Size, signal: CancellationSignal): Pair<Bitmap, Int> {
signal.throwIfCanceled()
val uri = ContentUris.withAppendedId(Images.Media.EXTERNAL_CONTENT_URI, id)
val handleRaw = Build.VERSION.SDK_INT >= Build.VERSION_CODES.Q && isRawMime(uri)
val orientation = if (handleRaw) rawOrientation(uri) else ExifInterface.ORIENTATION_NORMAL
if (size.width <= 0 || size.height <= 0 || size.width > 768 || size.height > 768) {
return decodeSource(uri, size, signal)
// A "load original" request is unsized -> a full-res decode (a sized > 768 just samples to target).
return decodeSource(uri, size, signal) to orientation
}
return if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.Q) {
val bitmap = if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.Q) {
resolver.loadThumbnail(uri, size, signal)
} else {
signal.setOnCancelListener { Images.Thumbnails.cancelThumbnailRequest(resolver, id) }
Images.Thumbnails.getThumbnail(resolver, id, Images.Thumbnails.MINI_KIND, OPTIONS)
}
return bitmap to orientation
}
private fun isRawMime(uri: Uri): Boolean {
val mime = resolver.getType(uri) ?: return false
return mime.startsWith("image/x-") || mime == "image/dng"
}
private fun rawOrientation(uri: Uri): Int {
return resolver.openInputStream(uri)?.use {
ExifInterface(it).getAttributeInt(ExifInterface.TAG_ORIENTATION, ExifInterface.ORIENTATION_NORMAL)
} ?: ExifInterface.ORIENTATION_NORMAL
}
// ImageDecoder / loadThumbnail skip EXIF orientation for raw (e.g. DNG) on Q+, so the decoded
// bitmap comes back unrotated. Rotate it into the output buffer in native code (one pass, no
// intermediate rotated bitmap).
private fun rotateToNativeBuffer(bitmap: Bitmap, orientation: Int, signal: CancellationSignal): Map<String, Long> {
signal.throwIfCanceled()
// Force ARGB_8888: the native rotate needs a lockable 8888 buffer, and toNativeBuffer() below
// allocates width*height*4 (an F16/HDR decode would otherwise under-allocate). No-op for the
// common already-8888 case.
val src = if (bitmap.config != Bitmap.Config.ARGB_8888) {
val converted = bitmap.copy(Bitmap.Config.ARGB_8888, false)
bitmap.recycle()
converted ?: throw IOException("could not convert bitmap to ARGB_8888")
} else {
bitmap
}
try {
val info = IntArray(3)
val pointer = NativeImage.rotate(src, orientation, info)
if (pointer == 0L) throw IOException("native rotate failed for orientation $orientation")
return mapOf(
"pointer" to pointer,
"width" to info[0].toLong(),
"height" to info[1].toLong(),
"rowBytes" to info[2].toLong()
)
} finally {
if (!src.isRecycled) src.recycle()
}
}
private fun decodeVideoThumbnail(id: Long, target: Size, signal: CancellationSignal): Bitmap {
@@ -53,10 +53,10 @@ export interface ImmichTags extends Omit<Tags, TagsWithWrongTypes> {
RegionList: {
Area: {
// (X,Y) // center of the rectangle
X: number;
Y: number;
W: number;
H: number;
X: number | string;
Y: number | string;
W: number | string;
H: number | string;
Unit: string;
};
Rotation?: number;
+33 -1
View File
@@ -39,7 +39,10 @@ const forSidecarJob = (
};
};
const makeFaceTags = (face: Partial<{ Name: string }> = {}, orientation?: ImmichTags['Orientation']) => ({
const makeFaceTags = (
face: Partial<{ Name: string }> = {},
orientation?: ImmichTags['Orientation'],
): Partial<ImmichTags> => ({
Orientation: orientation,
RegionInfo: {
AppliedToDimensions: { W: 1000, H: 100, Unit: 'pixel' },
@@ -1371,6 +1374,35 @@ describe(MetadataService.name, () => {
expect(mocks.person.updateAll).not.toHaveBeenCalled();
});
it('should handle string coordinates in face region bounding box calculation by limiting to 16 decimal places', async () => {
const asset = AssetFactory.create();
const person = PersonFactory.create();
mocks.assetJob.getForMetadataExtraction.mockResolvedValue(getForMetadataExtraction(asset));
mocks.systemMetadata.get.mockResolvedValue({ metadata: { faces: { import: true } } });
const faceTags = makeFaceTags({ Name: person.name });
// Simulating EXIF returning a string with >16 decimal places
faceTags.RegionInfo!.RegionList[0].Area.X = '0.48564814814814824';
faceTags.RegionInfo!.RegionList[0].Area.W = '0.2';
mockReadTags(faceTags);
mocks.person.getDistinctNames.mockResolvedValue([]);
mocks.person.createAll.mockResolvedValue([person.id]);
mocks.person.update.mockResolvedValue(person);
await sut.handleMetadataExtraction({ id: asset.id });
expect(mocks.person.refreshFaces).toHaveBeenCalledWith(
[
expect.objectContaining({
boundingBoxX1: Math.floor((0.485_648_148_148_148_2 - 0.2 / 2) * 1000),
}),
],
[],
);
});
it('should apply metadata face tags creating new people', async () => {
const asset = AssetFactory.create();
const person = PersonFactory.create();
+16 -4
View File
@@ -854,6 +854,13 @@ export class MetadataService extends BaseService {
// update area coordinates and dimensions in RegionList assuming "normalized" unit as per MWG guidelines
const adjustedRegionList = regionInfo.RegionList.map((region) => {
let { X, Y, W, H } = region.Area;
// EXIF floats with >16 decimals are serialized as strings. Ensure they are numbers.
X = Number(X);
Y = Number(Y);
W = Number(W);
H = Number(H);
switch (orientation) {
case ExifOrientation.MirrorHorizontal: {
X = 1 - X;
@@ -926,16 +933,21 @@ export class MetadataService extends BaseService {
const loweredName = region.Name.toLowerCase();
const personId = existingNameMap.get(loweredName) || this.cryptoRepository.randomUUID();
const X = Number(region.Area.X);
const Y = Number(region.Area.Y);
const W = Number(region.Area.W);
const H = Number(region.Area.H);
const face = {
id: this.cryptoRepository.randomUUID(),
personId,
assetId: asset.id,
imageWidth,
imageHeight,
boundingBoxX1: Math.floor((region.Area.X - region.Area.W / 2) * imageWidth),
boundingBoxY1: Math.floor((region.Area.Y - region.Area.H / 2) * imageHeight),
boundingBoxX2: Math.floor((region.Area.X + region.Area.W / 2) * imageWidth),
boundingBoxY2: Math.floor((region.Area.Y + region.Area.H / 2) * imageHeight),
boundingBoxX1: Math.floor((X - W / 2) * imageWidth),
boundingBoxY1: Math.floor((Y - H / 2) * imageHeight),
boundingBoxX2: Math.floor((X + W / 2) * imageWidth),
boundingBoxY2: Math.floor((Y + H / 2) * imageHeight),
sourceType: SourceType.Exif,
};