Compare commits

...

105 Commits

Author SHA1 Message Date
mertalev
338f8dc233 update docs 2025-03-15 00:12:07 -04:00
mertalev
f2a98ab523 organize imports 2025-03-15 00:11:21 -04:00
mertalev
9e689e835e add rk3568 2025-03-15 00:07:00 -04:00
mertalev
76eb285204 set log level 2025-03-14 23:37:39 -04:00
mertalev
bc9bec8673 formatting 2025-03-14 17:01:23 -04:00
mertalev
a9508fc5c8 clarify throughput vs latency 2025-03-14 16:49:40 -04:00
mertalev
678cb8938a comparison with arm nn in docs 2025-03-14 16:42:01 -04:00
mertalev
1785d0916b update tests 2025-03-14 16:07:35 -04:00
mertalev
fedbecc9f2 fix 2025-03-14 15:46:47 -04:00
mertalev
7fc47600f5 remove unused import 2025-03-14 15:35:14 -04:00
mertalev
4db85a8954 more linting 2025-03-14 15:30:11 -04:00
mertalev
bd9374e4a9 linting 2025-03-14 15:04:46 -04:00
mertalev
0f1a551842 fix retry 2025-03-14 14:29:07 -04:00
mertalev
323bcde733 add rk3576 2025-03-13 18:11:35 -04:00
mertalev
4b57fb5b37 add siglip2 models 2025-03-13 18:10:00 -04:00
mertalev
b959ae2570 handle facial recognition models 2025-03-13 17:53:41 -04:00
mertalev
e3d041e3c2 remove import 2025-03-13 17:34:07 -04:00
mertalev
2ba2c597e5 no flash attention for now 2025-03-13 17:32:19 -04:00
mertalev
9958ac9ec9 upload to hf 2025-03-13 17:31:41 -04:00
mertalev
c57c562166 export cli 2025-03-12 22:50:29 -04:00
mertalev
ce2a41826e quoted type 2025-03-11 19:20:14 -04:00
mertalev
ec0fa4d52b refactor 2025-03-11 18:43:25 -04:00
mertalev
f5e44f12e1 Merge remote-tracking branch 'origin/main' into rknn-toolkit-lite2 2025-03-11 12:42:25 -04:00
yoni13
6e08f1f371 dont stuck on core_0 on rk3588 but untested 2025-01-25 19:14:34 +08:00
Yoni Yang
3ed5d3dbac Merge branch 'main' into rknn-toolkit-lite2 2025-01-24 16:49:10 +08:00
Yoni Yang
f9387d8478 Merge branch 'main' into rknn-toolkit-lite2 2025-01-24 12:20:29 +08:00
Yoni Yang
4bb98471f4 Merge branch 'main' into rknn-toolkit-lite2 2025-01-23 23:22:24 +08:00
Yoni Yang
794da29411 Merge branch 'main' into rknn-toolkit-lite2 2025-01-22 11:43:59 +08:00
yoni13
dd52c2d68c Update permission 2025-01-19 23:19:44 +08:00
Yoni Yang
59e4b6598e Merge branch 'main' into rknn-toolkit-lite2 2025-01-19 21:50:47 +08:00
yoni13
20ba9f97e9 update mapping 2025-01-19 13:15:38 +00:00
yoni13
ac4ce3ea9c add input outputs 2025-01-19 12:10:01 +00:00
yoni13
2b967ca358 raise NotImplementedError for now 2025-01-19 12:02:08 +08:00
yoni13
1653cd9cd7 update supported SOCs 2025-01-18 17:38:44 +08:00
yoni13
32f3707e52 fix types and ignored pattern 2025-01-18 17:17:48 +08:00
yoni13
58f1cc92d7 prettier happy 2025-01-18 17:03:49 +08:00
yoni13
d2b7e10f55 shellcheck happy 2025-01-18 16:56:15 +08:00
yoni13
b3ae5d34cc fix typo in tests 2025-01-18 16:54:18 +08:00
Yoni Yang
4e42fbc091 Merge branch 'main' into rknn-toolkit-lite2 2025-01-18 16:45:38 +08:00
yoni13
9926045d5e add a simple script to notify user if some op is not supported 2025-01-18 16:32:08 +08:00
yoni13
d7381ab5c1 refactor ignore_patterns 2025-01-18 04:48:11 +00:00
yoni13
87a46dcc5e remove unnecessary print 2025-01-18 11:30:49 +08:00
yoni13
be76857ae6 make these functions snake case. 2025-01-18 11:03:54 +08:00
yoni13
f5de3de163 fix typo and add a propper var name 2025-01-18 10:46:57 +08:00
yoni13
3634ae1f5b fix granularity 2025-01-18 09:58:39 +08:00
yoni13
05675921be remove unrequired devices 2025-01-17 20:09:50 +08:00
yoni13
f067212491 tpe 2025-01-17 19:56:23 +08:00
yoni13
bc48b67379 switch to sha256 2025-01-17 19:42:23 +08:00
yoni13
26d5fb0ac6 add checksum for libnnrt.so 2025-01-17 19:39:26 +08:00
yoni13
f32d991131 changes some cases 2025-01-17 19:25:01 +08:00
yoni13
9882b83cd4 Should FIx the quote that made mypy unhappy 2025-01-15 00:35:43 +08:00
yoni13
01eb09526e trying to fix pytest 2025-01-15 00:28:48 +08:00
yoni13
b5a4ed5160 this duplicated? 2025-01-14 19:22:28 +08:00
yoni13
0f03f77e8e remove non implemented tests 2025-01-14 19:02:16 +08:00
yoni13
c21ce40d9c switch to Runtime error instead of exit() 2025-01-14 18:50:21 +08:00
Yoni Yang
cb01a11f19 Merge branch 'main' into rknn-toolkit-lite2 2025-01-14 18:44:31 +08:00
yoni13
5244ed6d4d black app export 2025-01-14 18:40:28 +08:00
yoni13
8b80d034cb fixed some bugs 2025-01-14 10:38:45 +00:00
yoni13
4b0f93cf6a add test,founds bugs, fix it tomorrow 2025-01-14 01:08:44 +08:00
yoni13
6c4e6cb96f reformat 2025-01-13 18:37:25 +08:00
yoni13
b6cc2054c5 ignore rknn model if not using it 2025-01-13 18:37:01 +08:00
Yoni Yang
f328104e84 Merge branch 'main' into rknn-toolkit-lite2 2025-01-13 18:33:05 +08:00
yoni13
2f7e44aa63 typing be happy. 2025-01-13 18:24:12 +08:00
yoni13
ebdfe1b7b6 Load model by SOC name 2025-01-13 17:08:16 +08:00
yoni13
daf886088a Add export script 2025-01-13 05:44:22 +00:00
yoni13
4c7ac1438b only load knnx model when required 2025-01-12 19:11:16 +00:00
Yoni Yang
8965a9fb16 Merge branch 'main' into rknn-toolkit-lite2 2025-01-13 01:41:55 +08:00
yoni13
7ae4b7129d format be happy 2025-01-13 01:40:20 +08:00
yoni13
1775397a84 Sort them by alphablet 2025-01-13 01:38:14 +08:00
yoni13
68fccad462 Fix docs. 2025-01-13 01:11:32 +08:00
yoni13
bb67a9db6e fix formatting 2025-01-13 00:50:49 +08:00
yoni13
c109e28686 DOCS 2025-01-13 00:44:22 +08:00
yoni13
e6ff21b345 set default thread num to 2, not everyone has 8 gigs of ram 2025-01-12 16:05:18 +08:00
yoni13
c665fd2625 Fix Please do not set this parameter on other platforms. 2025-01-12 01:29:24 +08:00
yoni13
c72cf61ed0 support core_mask for specfic socs 2025-01-12 01:24:24 +08:00
yoni13
19ee48f6f0 fix path 2025-01-12 01:09:36 +08:00
yoni13
efaf70eb9d Set running threads from env 2025-01-12 01:02:16 +08:00
yoni13
665718b09e add rknn to src 2025-01-11 21:11:30 +08:00
yoni13
807111e3b5 Should Fix No module named 'rknn' 2025-01-11 20:41:38 +08:00
yoni13
815ed1ae66 Install onnxruntime 2025-01-11 20:33:48 +08:00
yoni13
416211916d Check if NPU drivers is loaded or not. 2025-01-11 17:59:16 +08:00
yoni13
23d0ea0e7b ruff 2025-01-11 16:28:26 +08:00
yoni13
d5e453a773 ruff format 2025-01-11 16:26:17 +08:00
yoni13
7f2af6f819 Fix typo: rknnlite.api 2025-01-11 16:19:51 +08:00
yoni13
f4671f4886 Indentation issue 2025-01-11 16:16:26 +08:00
yoni13
7aaf3aa57b Remove unused imports. 2025-01-11 16:03:33 +08:00
yoni13
506ca0d3a4 Dockerfile for rknn 2025-01-11 15:47:24 +08:00
yoni13
d5ef821b24 Set group RKNN to optional 2025-01-11 15:30:05 +08:00
yoni13
d10147f478 Handling Import and file not found Error for non-arm devices. 2025-01-11 15:19:53 +08:00
Yoni Yang
66004e3b83 Merge branch 'immich-app:main' into rknn-toolkit-lite2 2025-01-11 10:45:55 +08:00
yoni13
c20d110257 support for rknn.rknnpool.is_available 2025-01-11 10:39:45 +08:00
yoni13
a2722e16e7 Revert my changes to dockerfiles 2025-01-11 10:13:03 +08:00
yoni13
4d704e9f73 fix inf,-inf with 2 concurrency 2025-01-10 14:04:18 +00:00
Yoni Yang
9bc3e5b2e2 Update rknn.py 2025-01-10 20:20:21 +08:00
Yoni Yang
8608b9c6c5 Merge branch 'immich-app:main' into rknn-toolkit-lite2 2025-01-09 18:40:40 +08:00
yoni13
a94fad543b all infrencing works with 1 max job concurrency 2025-01-09 10:38:40 +00:00
Yoni Yang
082c426e34 Merge branch 'immich-app:main' into rknn-toolkit-lite2 2024-12-25 20:24:17 +08:00
Yoni Yang
da152bd284 Merge branch 'immich-app:main' into rknn-toolkit-lite2 2024-12-13 13:46:24 +08:00
Yoni Yang
257cc6c963 Init commit for using rknn, RecognitionFormDataLoadTest doesnt work 2024-12-04 14:32:46 +00:00
Yoni Yang
4140e93aea Merge branch 'immich-app:main' into rknn-toolkit-lite2 2024-12-04 13:15:14 +08:00
Yoni Yang
b6c4b37237 Merge branch 'immich-app:main' into rknn-toolkit2 2024-12-03 06:00:43 -08:00
Yoni Yang
bc849e2e9f ViT-B-32__openai/textual/ Runs with emulator now. 2024-12-01 16:42:53 +00:00
Yoni Yang
7fddf282cf lowercase 2024-11-30 14:24:38 +00:00
Yoni Yang
6ffc227330 test 2024-11-29 07:43:59 +00:00
Yoni Yang
8ef3e49f74 untested 2024-11-29 07:42:09 +00:00
43 changed files with 2417 additions and 4726 deletions

View File

@@ -49,7 +49,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
suffix: ["", "-cuda", "-openvino", "-armnn"]
suffix: ["", "-cuda", "-openvino", "-armnn","-rknn"]
steps:
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
@@ -129,6 +129,9 @@ jobs:
runner: ubuntu-24.04-arm
device: armnn
suffix: -armnn
- platforms: linux/arm64
device: rknn
suffix: -rknn
steps:
- name: Prepare
@@ -454,4 +457,4 @@ jobs:
run: exit 1
- name: All jobs passed or skipped
if: ${{ !(contains(needs.*.result, 'failure')) }}
run: echo "All jobs passed or skipped" && echo "${{ toJSON(needs.*.result) }}"
run: echo "All jobs passed or skipped" && echo "${{ toJSON(needs.*.result) }}"

View File

@@ -95,12 +95,12 @@ services:
image: immich-machine-learning-dev:latest
# extends:
# file: hwaccel.ml.yml
# service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference
# service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl, rknn] for accelerated inference
build:
context: ../machine-learning
dockerfile: Dockerfile
args:
- DEVICE=cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference
- DEVICE=cpu # set to one of [armnn, cuda, openvino, openvino-wsl, rknn] for accelerated inference
ports:
- 3003:3003
volumes:

View File

@@ -38,12 +38,12 @@ services:
image: immich-machine-learning:latest
# extends:
# file: hwaccel.ml.yml
# service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference
# service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl, rknn] for accelerated inference
build:
context: ../machine-learning
dockerfile: Dockerfile
args:
- DEVICE=cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference
- DEVICE=cpu # set to one of [armnn, cuda, openvino, openvino-wsl, rknn] for accelerated inference
ports:
- 3003:3003
volumes:
@@ -77,22 +77,12 @@ services:
- 5432:5432
healthcheck:
test: >-
pg_isready --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" || exit 1;
Chksum="$$(psql --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" --tuples-only --no-align
--command='SELECT COALESCE(SUM(checksum_failures), 0) FROM pg_stat_database')";
echo "checksum failure count is $$Chksum";
[ "$$Chksum" = '0' ] || exit 1
pg_isready --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" || exit 1; Chksum="$$(psql --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" --tuples-only --no-align --command='SELECT COALESCE(SUM(checksum_failures), 0) FROM pg_stat_database')"; echo "checksum failure count is $$Chksum"; [ "$$Chksum" = '0' ] || exit 1
interval: 5m
start_interval: 30s
start_period: 5m
command: >-
postgres
-c shared_preload_libraries=vectors.so
-c 'search_path="$$user", public, vectors'
-c logging_collector=on
-c max_wal_size=2GB
-c shared_buffers=512MB
-c wal_compression=on
postgres -c shared_preload_libraries=vectors.so -c 'search_path="$$user", public, vectors' -c logging_collector=on -c max_wal_size=2GB -c shared_buffers=512MB -c wal_compression=on
restart: always
# set IMMICH_TELEMETRY_INCLUDE=all in .env to enable metrics
@@ -109,7 +99,7 @@ services:
# add data source for http://immich-prometheus:9090 to get started
immich-grafana:
container_name: immich_grafana
command: ['./run.sh', '-disable-reporting']
command: [ './run.sh', '-disable-reporting' ]
ports:
- 3000:3000
image: grafana/grafana:11.5.2-ubuntu@sha256:8b5858c447e06fd7a89006b562ba7bba7c4d5813600c7982374c41852adefaeb

View File

@@ -33,12 +33,12 @@ services:
immich-machine-learning:
container_name: immich_machine_learning
# For hardware acceleration, add one of -[armnn, cuda, openvino] to the image tag.
# For hardware acceleration, add one of -[armnn, cuda, openvino, rknn] to the image tag.
# Example tag: ${IMMICH_VERSION:-release}-cuda
image: ghcr.io/immich-app/immich-machine-learning:${IMMICH_VERSION:-release}
# extends: # uncomment this section for hardware acceleration - see https://immich.app/docs/features/ml-hardware-acceleration
# file: hwaccel.ml.yml
# service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference - use the `-wsl` version for WSL2 where applicable
# service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl, rknn] for accelerated inference - use the `-wsl` version for WSL2 where applicable
volumes:
- model-cache:/cache
env_file:
@@ -67,22 +67,12 @@ services:
- ${DB_DATA_LOCATION}:/var/lib/postgresql/data
healthcheck:
test: >-
pg_isready --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" || exit 1;
Chksum="$$(psql --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" --tuples-only --no-align
--command='SELECT COALESCE(SUM(checksum_failures), 0) FROM pg_stat_database')";
echo "checksum failure count is $$Chksum";
[ "$$Chksum" = '0' ] || exit 1
pg_isready --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" || exit 1; Chksum="$$(psql --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" --tuples-only --no-align --command='SELECT COALESCE(SUM(checksum_failures), 0) FROM pg_stat_database')"; echo "checksum failure count is $$Chksum"; [ "$$Chksum" = '0' ] || exit 1
interval: 5m
start_interval: 30s
start_period: 5m
command: >-
postgres
-c shared_preload_libraries=vectors.so
-c 'search_path="$$user", public, vectors'
-c logging_collector=on
-c max_wal_size=2GB
-c shared_buffers=512MB
-c wal_compression=on
postgres -c shared_preload_libraries=vectors.so -c 'search_path="$$user", public, vectors' -c logging_collector=on -c max_wal_size=2GB -c shared_buffers=512MB -c wal_compression=on
restart: always
volumes:

View File

@@ -13,6 +13,13 @@ services:
volumes:
- /lib/firmware/mali_csffw.bin:/lib/firmware/mali_csffw.bin:ro # Mali firmware for your chipset (not always required depending on the driver)
- /usr/lib/libmali.so:/usr/lib/libmali.so:ro # Mali driver for your chipset (always required)
rknn:
security_opt:
- systempaths=unconfined
- apparmor=unconfined
devices:
- /dev/dri:/dev/dri
cpu: {}

View File

@@ -12,6 +12,7 @@ You do not need to redo any machine learning jobs after enabling hardware accele
- ARM NN (Mali)
- CUDA (NVIDIA GPUs with [compute capability](https://developer.nvidia.com/cuda-gpus) 5.2 or higher)
- OpenVINO (Intel GPUs such as Iris Xe and Arc)
- RKNN (Rockchip)
## Limitations
@@ -19,6 +20,7 @@ You do not need to redo any machine learning jobs after enabling hardware accele
- Only Linux and Windows (through WSL2) servers are supported.
- ARM NN is only supported on devices with Mali GPUs. Other Arm devices are not supported.
- Some models may not be compatible with certain backends. CUDA is the most reliable.
- Search latency isn't improved by ARM NN due to model compatibility issues preventing its use. However, smart search jobs do make use of ARM NN.
## Prerequisites
@@ -33,6 +35,7 @@ You do not need to redo any machine learning jobs after enabling hardware accele
- The `hwaccel.ml.yml` file assumes the path to it is `/usr/lib/libmali.so`, so update accordingly if it is elsewhere
- The `hwaccel.ml.yml` file assumes an additional file `/lib/firmware/mali_csffw.bin`, so update accordingly if your device's driver does not require this file
- Optional: Configure your `.env` file, see [environment variables](/docs/install/environment-variables) for ARM NN specific settings
- In particular, the `MACHINE_LEARNING_ANN_FP16_TURBO` can significantly improve performance at the cost of very slightly lower accuracy
#### CUDA
@@ -47,6 +50,16 @@ You do not need to redo any machine learning jobs after enabling hardware accele
- Ensure the server's kernel version is new enough to use the device for hardware accceleration.
- Expect higher RAM usage when using OpenVINO compared to CPU processing.
#### RKNN
- You must have a supported Rockchip SoC: only RK3566, RK3568, RK3576 and RK3588 are supported at this moment.
- Make sure you have the appropriate linux kernel driver installed
- This is usually pre-installed on the device vendor's Linux images
- RKNPU driver V0.9.8 or later must be available in the host server
- You may confirm this by running `cat /sys/kernel/debug/rknpu/version` to check the version
- Optional: Configure your `.env` file, see [environment variables](/docs/install/environment-variables) for RKNN specific settings
- In particular, setting `MACHINE_LEARNING_RKNN_THREADS` to 2 or 3 can _dramatically_ improve performance for RK3576 and RK3588 compared to the default of 1, at the expense of multiplying the amount of RAM each model uses by that amount.
## Setup
1. If you do not already have it, download the latest [`hwaccel.ml.yml`][hw-file] file and ensure it's in the same folder as the `docker-compose.yml`.
@@ -127,3 +140,12 @@ Note that you should increase job concurrencies to increase overall utilization
- If you encounter an error when a model is running, try a different model to see if the issue is model-specific.
- You may want to increase concurrency past the default for higher utilization. However, keep in mind that this will also increase VRAM consumption.
- Larger models benefit more from hardware acceleration, if you have the VRAM for them.
- Compared to ARM NN, RKNPU has:
- Wider model support (including for search, which ARM NN does not accelerate)
- Less heat generation
- Very slightly lower accuracy (RKNPU always uses FP16, while ARM NN by default uses higher precision FP32 unless `MACHINE_LEARNING_ANN_FP16_TURBO` is enabled)
- Varying speed (tested on RK3588):
- If `MACHINE_LEARNING_RKNN_THREADS` is at the default of 1, RKNPU will have substantially lower throughput for ML jobs than ARM NN in most cases, but similar latency (such as when searching)
- If `MACHINE_LEARNING_RKNN_THREADS` is set to 3, it will be somewhat faster than ARM NN at FP32, but somewhat slower than ARM NN if `MACHINE_LEARNING_ANN_FP16_TURBO` is enabled
- When other tasks also use the GPU (like transcoding), RKNPU has a significant advantage over ARM NN as it uses the otherwise idle NPU instead of competing for GPU usage
- Lower RAM usage if `MACHINE_LEARNING_RKNN_THREADS` is at the default of 1, but significantly higher if greater than 1 (which is necessary for it to fully utilize the NPU and hence be comparable in speed to ARM NN)

View File

@@ -170,6 +170,8 @@ Redis (Sentinel) URL example JSON before encoding:
| `MACHINE_LEARNING_MAX_BATCH_SIZE__FACIAL_RECOGNITION` | Set the maximum number of faces that will be processed at once by the facial recognition model | None (`1` if using OpenVINO) | machine learning |
| `MACHINE_LEARNING_PING_TIMEOUT` | How long (ms) to wait for a PING response when checking if an ML server is available | `2000` | server |
| `MACHINE_LEARNING_AVAILABILITY_BACKOFF_TIME` | How long to ignore ML servers that are offline before trying again | `30000` | server |
| `MACHINE_LEARNING_RKNN` | Enable RKNN hardware acceleration if supported | `True` | machine learning |
| `MACHINE_LEARNING_RKNN_THREADS` | How many threads of RKNN runtime should be spinned up while inferencing. | `1` | machine learning |
\*1: It is recommended to begin with this parameter when changing the concurrency levels of the machine learning service and then tune the other ones.

View File

@@ -1,5 +1,24 @@
*.zip
*.onnx
*.rknn
*.npy
*_attr__value
*.weight
*.bias
onnx__*
*in_proj_bias
*.proj
*.latent
*.pos_embed
vocab.txt
export/immich_model_exporter/models/**/README.md
tokenizer.json
tokenizer_config.json
special_tokens_map.json
preprocess_cfg.json
config.json
merges.txt
vocab.json
upload/
venv/
__pycache__/

View File

@@ -15,6 +15,8 @@ RUN mkdir /opt/armnn && \
cd /opt/ann && \
sh build.sh
FROM builder-cpu AS builder-rknn
FROM builder-${DEVICE} AS builder
ARG DEVICE
@@ -76,6 +78,10 @@ COPY --from=builder-armnn \
/opt/ann/build.sh \
/opt/armnn/
FROM prod-cpu AS prod-rknn
ADD --checksum=sha256:73993ed4b440460825f21611731564503cc1d5a0c123746477da6cd574f34885 https://github.com/airockchip/rknn-toolkit2/raw/refs/tags/v2.3.0/rknpu2/runtime/Linux/librknn_api/aarch64/librknnrt.so /usr/lib/
FROM prod-${DEVICE} AS prod
ARG DEVICE
@@ -121,4 +127,4 @@ ENV IMMICH_SOURCE_URL=https://github.com/immich-app/immich/commit/${BUILD_SOURCE
ENTRYPOINT ["tini", "--"]
CMD ["./start.sh"]
HEALTHCHECK CMD python3 healthcheck.py
HEALTHCHECK CMD python3 healthcheck.py

View File

@@ -64,6 +64,8 @@ class Settings(BaseSettings):
ann: bool = True
ann_fp16_turbo: bool = False
ann_tuning_level: int = 2
rknn: bool = True
rknn_threads: int = 1
preload: PreloadModelData | None = None
max_batch_size: MaxBatchSize | None = None

View File

@@ -136,6 +136,12 @@ def ann_session() -> Iterator[mock.Mock]:
yield mocked
@pytest.fixture(scope="function")
def rknn_session() -> Iterator[mock.Mock]:
with mock.patch("app.sessions.rknn.RknnPoolExecutor") as mocked:
yield mocked
@pytest.fixture(scope="function")
def rmtree() -> Iterator[mock.Mock]:
with mock.patch("app.models.base.rmtree", autospec=True) as mocked:

View File

@@ -226,9 +226,9 @@ async def load(model: InferenceModel) -> InferenceModel:
except FileNotFoundError as e:
if model.model_format == ModelFormat.ONNX:
raise e
log.exception(e)
log.warning(
f"{model.model_format.upper()} is available, but model '{model.model_name}' does not support it."
f"{model.model_format.upper()} is available, but model '{model.model_name}' does not support it.",
exc_info=e,
)
model.model_format = ModelFormat.ONNX
model.load()

View File

@@ -8,6 +8,7 @@ from typing import Any, ClassVar
from huggingface_hub import snapshot_download
import ann.ann
import app.sessions.rknn as rknn
from app.sessions.ort import OrtSession
from ..config import clean_name, log, settings
@@ -66,12 +67,17 @@ class InferenceModel(ABC):
pass
def _download(self) -> None:
ignore_patterns = [] if self.model_format == ModelFormat.ARMNN else ["*.armnn"]
ignored_patterns: dict[ModelFormat, list[str]] = {
ModelFormat.ONNX: ["*.armnn", "*.rknn"],
ModelFormat.ARMNN: ["*.rknn"],
ModelFormat.RKNN: ["*.armnn"],
}
snapshot_download(
f"immich-app/{clean_name(self.model_name)}",
cache_dir=self.cache_dir,
local_dir=self.cache_dir,
ignore_patterns=ignore_patterns,
ignore_patterns=ignored_patterns.get(self.model_format, []),
)
def _load(self) -> ModelSession:
@@ -108,17 +114,25 @@ class InferenceModel(ABC):
session: ModelSession = AnnSession(model_path)
case ".onnx":
session = OrtSession(model_path)
case ".rknn":
session = rknn.RknnSession(model_path)
case _:
raise ValueError(f"Unsupported model file type: {model_path.suffix}")
return session
def model_path_for_format(self, model_format: ModelFormat) -> Path:
model_path_prefix = rknn.model_prefix if model_format == ModelFormat.RKNN else None
if model_path_prefix:
return self.model_dir / model_path_prefix / f"model.{model_format}"
return self.model_dir / f"model.{model_format}"
@property
def model_dir(self) -> Path:
return self.cache_dir / self.model_type.value
@property
def model_path(self) -> Path:
return self.model_dir / f"model.{self.model_format}"
return self.model_path_for_format(self.model_format)
@property
def model_task(self) -> ModelTask:
@@ -155,4 +169,9 @@ class InferenceModel(ABC):
@property
def _model_format_default(self) -> ModelFormat:
return ModelFormat.ARMNN if ann.ann.is_available and settings.ann else ModelFormat.ONNX
if rknn.is_available:
return ModelFormat.RKNN
elif ann.ann.is_available and settings.ann:
return ModelFormat.ARMNN
else:
return ModelFormat.ONNX

View File

@@ -44,6 +44,18 @@ _OPENCLIP_MODELS = {
"nllb-clip-base-siglip__v1",
"nllb-clip-large-siglip__mrl",
"nllb-clip-large-siglip__v1",
"ViT-B-16-SigLIP2__webli",
"ViT-B-32-SigLIP2-256__webli",
"ViT-L-16-SigLIP2-256__webli",
"ViT-L-16-SigLIP2-384__webli",
"ViT-L-16-SigLIP2-512__webli",
"ViT-SO400M-14-SigLIP2-378__webli",
"ViT-SO400M-14-SigLIP2__webli",
"ViT-SO400M-16-SigLIP2-256__webli",
"ViT-SO400M-16-SigLIP2-384__webli",
"ViT-SO400M-16-SigLIP2-512__webli",
"ViT-gopt-16-SigLIP2-256__webli",
"ViT-gopt-16-SigLIP2-384__webli",
}
@@ -65,6 +77,9 @@ _INSIGHTFACE_MODELS = {
SUPPORTED_PROVIDERS = ["CUDAExecutionProvider", "OpenVINOExecutionProvider", "CPUExecutionProvider"]
RKNN_SUPPORTED_SOCS = ["rk3566", "rk3568", "rk3576", "rk3588"]
RKNN_COREMASK_SUPPORTED_SOCS = ["rk3576", "rk3588"]
def get_model_source(model_name: str) -> ModelSource | None:
cleaned_name = clean_name(model_name)

View File

@@ -31,7 +31,7 @@ class FaceRecognizer(InferenceModel):
self._add_batch_axis(self.model_path)
session = self._make_session(self.model_path)
self.model = ArcFaceONNX(
self.model_path.with_suffix(".onnx").as_posix(),
self.model_path_for_format(ModelFormat.ONNX).as_posix(),
session=session,
)
return session

View File

@@ -35,6 +35,7 @@ class ModelType(StrEnum):
class ModelFormat(StrEnum):
ARMNN = "armnn"
ONNX = "onnx"
RKNN = "rknn"
class ModelSource(StrEnum):

View File

@@ -0,0 +1,76 @@
from __future__ import annotations
from pathlib import Path
from typing import Any, NamedTuple
import numpy as np
from numpy.typing import NDArray
from app.config import log, settings
from app.schemas import SessionNode
from .rknnpool import RknnPoolExecutor, is_available, soc_name
is_available = is_available and settings.rknn
model_prefix = Path("rknpu") / soc_name if is_available and soc_name is not None else None
def run_inference(rknn_lite: Any, input: list[NDArray[np.float32]]) -> list[NDArray[np.float32]]:
outputs: list[NDArray[np.float32]] = rknn_lite.inference(inputs=input, data_format="nchw")
return outputs
input_output_mapping: dict[str, dict[str, Any]] = {
"detection": {
"input": {"norm_tensor:0": (1, 3, 640, 640)},
"output": {
"norm_tensor:1": (12800, 1),
"norm_tensor:2": (3200, 1),
"norm_tensor:3": (800, 1),
"norm_tensor:4": (12800, 4),
"norm_tensor:5": (3200, 4),
"norm_tensor:6": (800, 4),
"norm_tensor:7": (12800, 10),
"norm_tensor:8": (3200, 10),
"norm_tensor:9": (800, 10),
},
},
"recognition": {"input": {"norm_tensor:0": (1, 3, 112, 112)}, "output": {"norm_tensor:1": (1, 512)}},
}
class RknnSession:
def __init__(self, model_path: Path) -> None:
self.model_type = "detection" if "detection" in model_path.parts else "recognition"
self.tpe = settings.rknn_threads
log.info(f"Loading RKNN model from {model_path} with {self.tpe} threads.")
self.rknnpool = RknnPoolExecutor(model_path=model_path.as_posix(), tpes=self.tpe, func=run_inference)
log.info(f"Loaded RKNN model from {model_path} with {self.tpe} threads.")
def get_inputs(self) -> list[SessionNode]:
return [RknnNode(name=k, shape=v) for k, v in input_output_mapping[self.model_type]["input"].items()]
def get_outputs(self) -> list[SessionNode]:
return [RknnNode(name=k, shape=v) for k, v in input_output_mapping[self.model_type]["output"].items()]
def run(
self,
output_names: list[str] | None,
input_feed: dict[str, NDArray[np.float32]] | dict[str, NDArray[np.int32]],
run_options: Any = None,
) -> list[NDArray[np.float32]]:
input_data: list[NDArray[np.float32]] = [np.ascontiguousarray(v) for v in input_feed.values()]
self.rknnpool.put(input_data)
res = self.rknnpool.get()
if res is None:
raise RuntimeError("RKNN inference failed!")
return res
class RknnNode(NamedTuple):
name: str | None
shape: tuple[int, ...]
__all__ = ["RknnSession", "RknnNode", "is_available", "soc_name", "model_prefix"]

View File

@@ -0,0 +1,91 @@
# This code is from leafqycc/rknn-multi-threaded
# Following Apache License 2.0
import logging
from concurrent.futures import Future, ThreadPoolExecutor
from pathlib import Path
from queue import Queue
from typing import Callable
import numpy as np
from numpy.typing import NDArray
from app.config import log
from app.models.constants import RKNN_COREMASK_SUPPORTED_SOCS, RKNN_SUPPORTED_SOCS
def get_soc(device_tree_path: Path | str) -> str | None:
try:
with Path(device_tree_path).open() as f:
device_compatible_str = f.read()
for soc in RKNN_SUPPORTED_SOCS:
if soc in device_compatible_str:
return soc
log.warning("Device is not supported for RKNN")
except OSError as e:
log.warning(f"Could not read {device_tree_path}. Reason: %s", e)
return None
soc_name = None
is_available = False
try:
from rknnlite.api import RKNNLite
soc_name = get_soc("/proc/device-tree/compatible")
is_available = soc_name is not None
except ImportError:
log.debug("RKNN is not available")
def init_rknn(model_path: str) -> "RKNNLite":
if not is_available:
raise RuntimeError("rknn is not available!")
rknn_lite = RKNNLite()
rknn_lite.rknn_log.logger.setLevel(logging.ERROR)
ret = rknn_lite.load_rknn(model_path)
if ret != 0:
raise RuntimeError("Failed to load RKNN model")
if soc_name in RKNN_COREMASK_SUPPORTED_SOCS:
ret = rknn_lite.init_runtime(core_mask=RKNNLite.NPU_CORE_AUTO)
else:
ret = rknn_lite.init_runtime() # Please do not set this parameter on other platforms.
if ret != 0:
raise RuntimeError("Failed to inititalize RKNN runtime environment")
return rknn_lite
class RknnPoolExecutor:
def __init__(
self,
model_path: str,
tpes: int,
func: Callable[["RKNNLite", list[NDArray[np.float32]]], list[NDArray[np.float32]]],
) -> None:
self.tpes = tpes
self.queue: Queue[Future[list[NDArray[np.float32]]]] = Queue()
self.rknn_pool = [init_rknn(model_path) for _ in range(tpes)]
self.pool = ThreadPoolExecutor(max_workers=tpes)
self.func = func
self.num = 0
def put(self, inputs: list[NDArray[np.float32]]) -> None:
self.queue.put(self.pool.submit(self.func, self.rknn_pool[self.num % self.tpes], inputs))
self.num += 1
def get(self) -> list[NDArray[np.float32]] | None:
if self.queue.empty():
return None
fut = self.queue.get()
return fut.result()
def release(self) -> None:
self.pool.shutdown()
for rknn_lite in self.rknn_pool:
rknn_lite.release()
def __del__(self) -> None:
self.release()

View File

@@ -25,6 +25,7 @@ from app.models.facial_recognition.detection import FaceDetector
from app.models.facial_recognition.recognition import FaceRecognizer
from app.sessions.ann import AnnSession
from app.sessions.ort import OrtSession
from app.sessions.rknn import RknnSession, run_inference
from .config import Settings, settings
from .models.base import InferenceModel
@@ -69,6 +70,14 @@ class TestBase:
assert encoder.model_format == ModelFormat.ARMNN
def test_sets_default_model_format_to_rknn_if_available(self, mocker: MockerFixture) -> None:
mocker.patch.object(settings, "rknn", True)
mocker.patch("app.sessions.rknn.is_available", True)
encoder = OpenClipTextualEncoder("ViT-B-32__openai")
assert encoder.model_format == ModelFormat.RKNN
def test_casts_cache_dir_string_to_path(self) -> None:
cache_dir = "/test_cache"
encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=cache_dir)
@@ -125,7 +134,7 @@ class TestBase:
"immich-app/ViT-B-32__openai",
cache_dir=encoder.cache_dir,
local_dir=encoder.cache_dir,
ignore_patterns=["*.armnn"],
ignore_patterns=["*.armnn", "*.rknn"],
)
def test_download_downloads_armnn_if_preferred_format(self, snapshot_download: mock.Mock) -> None:
@@ -136,7 +145,18 @@ class TestBase:
"immich-app/ViT-B-32__openai",
cache_dir=encoder.cache_dir,
local_dir=encoder.cache_dir,
ignore_patterns=[],
ignore_patterns=["*.rknn"],
)
def test_download_downloads_rknn_if_preferred_format(self, snapshot_download: mock.Mock) -> None:
encoder = OpenClipTextualEncoder("ViT-B-32__openai", model_format=ModelFormat.RKNN)
encoder.download()
snapshot_download.assert_called_once_with(
"immich-app/ViT-B-32__openai",
cache_dir=encoder.cache_dir,
local_dir=encoder.cache_dir,
ignore_patterns=["*.armnn"],
)
def test_throws_exception_if_model_path_does_not_exist(
@@ -328,6 +348,33 @@ class TestAnnSession:
np_spy.assert_has_calls([mock.call(input1), mock.call(input2)])
class TestRknnSession:
def test_creates_rknn_session(self, rknn_session: mock.Mock, info: mock.Mock, mocker: MockerFixture) -> None:
model_path = mock.MagicMock(spec=Path)
tpe = 1
mocker.patch("app.sessions.rknn.soc_name", "rk3566")
mocker.patch("app.sessions.rknn.is_available", True)
RknnSession(model_path)
rknn_session.assert_called_once_with(model_path=model_path.as_posix(), tpes=tpe, func=run_inference)
info.assert_has_calls([mock.call(f"Loaded RKNN model from {model_path} with {tpe} threads.")])
def test_run_rknn(self, rknn_session: mock.Mock, mocker: MockerFixture) -> None:
rknn_session.return_value.load.return_value = 123
np_spy = mocker.spy(np, "ascontiguousarray")
mocker.patch("app.sessions.rknn.soc_name", "rk3566")
session = RknnSession(Path("ViT-B-32__openai"))
[input1, input2] = [np.random.rand(1, 3, 224, 224).astype(np.float32) for _ in range(2)]
input_feed = {"input.1": input1, "input.2": input2}
session.run(None, input_feed)
rknn_session.return_value.put.assert_called_once_with([input1, input2])
np_spy.call_count == 2
np_spy.assert_has_calls([mock.call(input1), mock.call(input2)])
class TestCLIP:
embedding = np.random.rand(512).astype(np.float32)
cache_dir = Path("test_cache")
@@ -829,9 +876,7 @@ class TestLoad:
mock_model.clear_cache.assert_not_called()
mock_model.load.assert_not_called()
async def test_falls_back_to_onnx_if_other_format_does_not_exist(
self, exception: mock.Mock, warning: mock.Mock
) -> None:
async def test_falls_back_to_onnx_if_other_format_does_not_exist(self, warning: mock.Mock) -> None:
mock_model = mock.Mock(spec=InferenceModel)
mock_model.model_name = "test_model_name"
mock_model.model_type = ModelType.VISUAL
@@ -846,8 +891,9 @@ class TestLoad:
mock_model.clear_cache.assert_not_called()
assert mock_model.load.call_count == 2
exception.assert_called_once_with(error)
warning.assert_called_once_with("ARMNN is available, but model 'test_model_name' does not support it.")
warning.assert_called_once_with(
"ARMNN is available, but model 'test_model_name' does not support it.", exc_info=error
)
mock_model.model_format = ModelFormat.ONNX

View File

@@ -0,0 +1 @@
3.12

View File

@@ -1,20 +0,0 @@
FROM mambaorg/micromamba:bookworm-slim@sha256:e3797091302382ea841498bc93a7b0a50f7c1448333d5e946d2d1608d0c5f43d AS builder
ENV TRANSFORMERS_CACHE=/cache \
PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PATH="/opt/venv/bin:$PATH" \
PYTHONPATH=/usr/src
COPY --chown=$MAMBA_USER:$MAMBA_USER conda-lock.yml /tmp/conda-lock.yml
RUN micromamba install -y -n base -f /tmp/conda-lock.yml && \
micromamba remove -y -n base cxx-compiler && \
micromamba clean --all --yes
WORKDIR /usr/src/app
COPY --chown=$MAMBA_USER:$MAMBA_USER start.sh .
COPY --chown=$MAMBA_USER:$MAMBA_USER app .
ENTRYPOINT ["/usr/local/bin/_entrypoint.sh"]
CMD ["./start.sh"]

File diff suppressed because it is too large Load Diff

View File

@@ -1,15 +0,0 @@
name: base
channels:
- conda-forge
platforms:
- linux-64
- linux-aarch64
dependencies:
- black
- conda-lock
- mypy
- pytest
- pytest-cov
- pytest-mock
- ruff
category: dev

View File

@@ -1,25 +0,0 @@
name: base
channels:
- conda-forge
- nvidia
- pytorch
platforms:
- linux-64
dependencies:
- cxx-compiler
- onnx==1.*
- onnxruntime==1.*
- open-clip-torch==2.*
- orjson==3.*
- pip
- python==3.11.*
- pytorch>=2.3
- rich==13.*
- safetensors==0.*
- setuptools==68.*
- torchvision
- transformers==4.*
- pip:
- multilingual-clip
- onnxsim
category: main

View File

@@ -0,0 +1,98 @@
from pathlib import Path
import typer
from tenacity import retry, stop_after_attempt, wait_fixed
from typing_extensions import Annotated
from .exporters.constants import DELETE_PATTERNS, SOURCE_TO_METADATA, ModelSource
from .exporters.onnx import export as onnx_export
from .exporters.rknn import export as rknn_export
app = typer.Typer(pretty_exceptions_show_locals=False)
def generate_readme(model_name: str, model_source: ModelSource) -> str:
(name, link, type) = SOURCE_TO_METADATA[model_source]
match model_source:
case ModelSource.MCLIP:
tags = ["immich", "clip", "multilingual"]
case ModelSource.OPENCLIP:
tags = ["immich", "clip"]
lowered = model_name.lower()
if "xlm" in lowered or "nllb" in lowered:
tags.append("multilingual")
case ModelSource.INSIGHTFACE:
tags = ["immich", "facial-recognition"]
case _:
raise ValueError(f"Unsupported model source {model_source}")
return f"""---
tags:
{" - " + "\n - ".join(tags)}
---
# Model Description
This repo contains ONNX exports for the associated {type} model by {name}. See the [{name}]({link}) repo for more info.
This repo is specifically intended for use with [Immich](https://immich.app/), a self-hosted photo library.
"""
@app.command()
def main(
model_name: str,
model_source: ModelSource,
output_dir: Path = Path("./models"),
no_cache: bool = False,
hf_organization: str = "immich-app",
hf_auth_token: Annotated[str | None, typer.Option(envvar="HF_AUTH_TOKEN")] = None,
) -> None:
hf_model_name = model_name.split("/")[-1]
hf_model_name = hf_model_name.replace("xlm-roberta-large", "XLM-Roberta-Large")
hf_model_name = hf_model_name.replace("xlm-roberta-base", "XLM-Roberta-Base")
output_dir = output_dir / hf_model_name
match model_source:
case ModelSource.MCLIP | ModelSource.OPENCLIP:
output_dir.mkdir(parents=True, exist_ok=True)
onnx_export(model_name, model_source, output_dir, no_cache=no_cache)
case ModelSource.INSIGHTFACE:
from huggingface_hub import snapshot_download
# TODO: start from insightface dump instead of downloading from HF
snapshot_download(f"immich-app/{hf_model_name}", local_dir=output_dir)
case _:
raise ValueError(f"Unsupported model source {model_source}")
try:
rknn_export(output_dir, no_cache=no_cache)
except Exception as e:
print(f"Failed to export model {model_name} to rknn: {e}")
(output_dir / "rknpu").unlink(missing_ok=True)
readme_path = output_dir / "README.md"
if no_cache or not readme_path.exists():
with open(readme_path, "w") as f:
f.write(generate_readme(model_name, model_source))
if hf_auth_token is not None:
from huggingface_hub import create_repo, upload_folder
repo_id = f"{hf_organization}/{hf_model_name}"
@retry(stop=stop_after_attempt(5), wait=wait_fixed(5))
def upload_model() -> None:
create_repo(repo_id, exist_ok=True, token=hf_auth_token)
upload_folder(
repo_id=repo_id,
folder_path=output_dir,
# remote repo files to be deleted before uploading
# deletion is in the same commit as the upload, so it's atomic
delete_patterns=DELETE_PATTERNS,
token=hf_auth_token,
)
upload_model()
if __name__ == "__main__":
typer.run(main)

View File

@@ -0,0 +1,42 @@
from enum import StrEnum
from typing import NamedTuple
class ModelSource(StrEnum):
INSIGHTFACE = "insightface"
MCLIP = "mclip"
OPENCLIP = "openclip"
class SourceMetadata(NamedTuple):
name: str
link: str
type: str
SOURCE_TO_METADATA = {
ModelSource.MCLIP: SourceMetadata("M-CLIP", "https://huggingface.co/M-CLIP", "CLIP"),
ModelSource.OPENCLIP: SourceMetadata("OpenCLIP", "https://github.com/mlfoundations/open_clip", "CLIP"),
ModelSource.INSIGHTFACE: SourceMetadata(
"InsightFace", "https://github.com/deepinsight/insightface/tree/master", "facial recognition"
),
}
RKNN_SOCS = ["rk3566", "rk3568", "rk3576", "rk3588"]
# glob to delete old UUID blobs when reuploading models
_uuid_char = "[a-fA-F0-9]"
_uuid_glob = _uuid_char * 8 + "-" + _uuid_char * 4 + "-" + _uuid_char * 4 + "-" + _uuid_char * 4 + "-" + _uuid_char * 12
DELETE_PATTERNS = [
"**/*onnx*",
"**/Constant*",
"**/*.weight",
"**/*.bias",
"**/*.proj",
"**/*in_proj_bias",
"**/*.npy",
"**/*.latent",
"**/*.pos_embed",
f"**/{_uuid_glob}",
]

View File

@@ -0,0 +1,20 @@
from pathlib import Path
from ..constants import ModelSource
from .models import mclip, openclip
def export(
model_name: str, model_source: ModelSource, output_dir: Path, opset_version: int = 19, no_cache: bool = False
) -> None:
visual_dir = output_dir / "visual"
textual_dir = output_dir / "textual"
match model_source:
case ModelSource.MCLIP:
mclip.to_onnx(model_name, opset_version, visual_dir, textual_dir, no_cache=no_cache)
case ModelSource.OPENCLIP:
name, _, pretrained = model_name.partition("__")
config = openclip.OpenCLIPModelConfig(name, pretrained)
openclip.to_onnx(config, opset_version, visual_dir, textual_dir, no_cache=no_cache)
case _:
raise ValueError(f"Unsupported model source {model_source}")

View File

@@ -1,11 +1,6 @@
import os
import tempfile
import warnings
from pathlib import Path
import torch
from multilingual_clip.pt_multilingual_clip import MultilingualCLIP
from transformers import AutoTokenizer
from typing import Any
from .openclip import OpenCLIPModelConfig
from .openclip import to_onnx as openclip_to_onnx
@@ -21,25 +16,40 @@ _MCLIP_TO_OPENCLIP = {
def to_onnx(
model_name: str,
opset_version: int,
output_dir_visual: Path | str,
output_dir_textual: Path | str,
no_cache: bool = False,
) -> tuple[Path, Path]:
textual_path = get_model_path(output_dir_textual)
with tempfile.TemporaryDirectory() as tmpdir:
model = MultilingualCLIP.from_pretrained(model_name, cache_dir=os.environ.get("CACHE_DIR", tmpdir))
if no_cache or not textual_path.exists():
import torch
from multilingual_clip.pt_multilingual_clip import MultilingualCLIP
from transformers import AutoTokenizer
torch.backends.mha.set_fastpath_enabled(False)
model = MultilingualCLIP.from_pretrained(model_name)
AutoTokenizer.from_pretrained(model_name).save_pretrained(output_dir_textual)
model.eval()
for param in model.parameters():
param.requires_grad_(False)
export_text_encoder(model, textual_path)
visual_path, _ = openclip_to_onnx(_MCLIP_TO_OPENCLIP[model_name], output_dir_visual)
assert visual_path is not None, "Visual model export failed"
_export_text_encoder(model, textual_path, opset_version)
else:
print(f"Model {textual_path} already exists, skipping")
visual_path, _ = openclip_to_onnx(
_MCLIP_TO_OPENCLIP[model_name], opset_version, output_dir_visual, no_cache=no_cache
)
assert visual_path is not None, "Visual model export failed"
return visual_path, textual_path
def export_text_encoder(model: MultilingualCLIP, output_path: Path | str) -> None:
def _export_text_encoder(model: Any, output_path: Path | str, opset_version: int) -> None:
import torch
from multilingual_clip.pt_multilingual_clip import MultilingualCLIP
output_path = Path(output_path)
def forward(self: MultilingualCLIP, input_ids: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
@@ -61,7 +71,7 @@ def export_text_encoder(model: MultilingualCLIP, output_path: Path | str) -> Non
output_path.as_posix(),
input_names=["input_ids", "attention_mask"],
output_names=["embedding"],
opset_version=17,
opset_version=opset_version,
# dynamic_axes={
# "input_ids": {0: "batch_size", 1: "sequence_length"},
# "attention_mask": {0: "batch_size", 1: "sequence_length"},

View File

@@ -0,0 +1,153 @@
import warnings
from dataclasses import dataclass
from functools import cached_property
from pathlib import Path
from typing import Any
from .util import get_model_path, save_config
@dataclass
class OpenCLIPModelConfig:
name: str
pretrained: str
@cached_property
def model_config(self) -> dict[str, Any]:
import open_clip
config: dict[str, Any] | None = open_clip.get_model_config(self.name)
if config is None:
raise ValueError(f"Unknown model {self.name}")
return config
@property
def image_size(self) -> int:
image_size: int = self.model_config["vision_cfg"]["image_size"]
return image_size
@property
def sequence_length(self) -> int:
context_length: int = self.model_config["text_cfg"].get("context_length", 77)
return context_length
def to_onnx(
model_cfg: OpenCLIPModelConfig,
opset_version: int,
output_dir_visual: Path | str | None = None,
output_dir_textual: Path | str | None = None,
no_cache: bool = False,
) -> tuple[Path | None, Path | None]:
visual_path = None
textual_path = None
if output_dir_visual is not None:
output_dir_visual = Path(output_dir_visual)
visual_path = get_model_path(output_dir_visual)
if output_dir_textual is not None:
output_dir_textual = Path(output_dir_textual)
textual_path = get_model_path(output_dir_textual)
if not no_cache and (
(textual_path is None or textual_path.exists()) and (visual_path is None or visual_path.exists())
):
print(f"Models {textual_path} and {visual_path} already exist, skipping")
return visual_path, textual_path
import open_clip
import torch
from transformers import AutoTokenizer
torch.backends.mha.set_fastpath_enabled(False)
model = open_clip.create_model(
model_cfg.name,
pretrained=model_cfg.pretrained,
jit=False,
require_pretrained=True,
)
text_vision_cfg = open_clip.get_model_config(model_cfg.name)
model.eval()
for param in model.parameters():
param.requires_grad_(False)
if visual_path is not None and output_dir_visual is not None:
if no_cache or not visual_path.exists():
save_config(
open_clip.get_model_preprocess_cfg(model),
output_dir_visual / "preprocess_cfg.json",
)
save_config(text_vision_cfg, output_dir_visual.parent / "config.json")
_export_image_encoder(model, model_cfg, visual_path, opset_version)
else:
print(f"Model {visual_path} already exists, skipping")
if textual_path is not None and output_dir_textual is not None:
if no_cache or not textual_path.exists():
tokenizer_name = text_vision_cfg["text_cfg"].get("hf_tokenizer_name", "openai/clip-vit-base-patch32")
AutoTokenizer.from_pretrained(tokenizer_name).save_pretrained(output_dir_textual)
_export_text_encoder(model, model_cfg, textual_path, opset_version)
else:
print(f"Model {textual_path} already exists, skipping")
return visual_path, textual_path
def _export_image_encoder(
model: Any, model_cfg: OpenCLIPModelConfig, output_path: Path | str, opset_version: int
) -> None:
import torch
output_path = Path(output_path)
def encode_image(image: torch.Tensor) -> torch.Tensor:
output = model.encode_image(image, normalize=True)
assert isinstance(output, torch.Tensor)
return output
model.forward = encode_image
args = (torch.randn(1, 3, model_cfg.image_size, model_cfg.image_size),)
with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning)
torch.onnx.export(
model,
args,
output_path.as_posix(),
input_names=["image"],
output_names=["embedding"],
opset_version=opset_version,
# dynamic_axes={"image": {0: "batch_size"}},
)
def _export_text_encoder(
model: Any, model_cfg: OpenCLIPModelConfig, output_path: Path | str, opset_version: int
) -> None:
import torch
output_path = Path(output_path)
def encode_text(text: torch.Tensor) -> torch.Tensor:
output = model.encode_text(text, normalize=True)
assert isinstance(output, torch.Tensor)
return output
model.forward = encode_text
args = (torch.ones(1, model_cfg.sequence_length, dtype=torch.int32),)
with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning)
torch.onnx.export(
model,
args,
output_path.as_posix(),
input_names=["text"],
output_names=["embedding"],
opset_version=opset_version,
# dynamic_axes={"text": {0: "batch_size"}},
)

View File

@@ -0,0 +1,96 @@
from pathlib import Path
from .constants import RKNN_SOCS
def _export_platform(
model_dir: Path,
target_platform: str,
inputs: list[str] | None = None,
input_size_list: list[list[int]] | None = None,
fuse_matmul_softmax_matmul_to_sdpa: bool = True,
no_cache: bool = False,
) -> None:
from rknn.api import RKNN
input_path = model_dir / "model.onnx"
output_path = model_dir / "rknpu" / target_platform / "model.rknn"
if not no_cache and output_path.exists():
print(f"Model {input_path} already exists at {output_path}, skipping")
return
print(f"Exporting model {input_path} to {output_path}")
rknn = RKNN(verbose=False)
rknn.config(
target_platform=target_platform,
disable_rules=["fuse_matmul_softmax_matmul_to_sdpa"] if not fuse_matmul_softmax_matmul_to_sdpa else [],
enable_flash_attention=False,
model_pruning=True,
)
ret = rknn.load_onnx(model=input_path.as_posix(), inputs=inputs, input_size_list=input_size_list)
if ret != 0:
raise RuntimeError("Load failed!")
ret = rknn.build(do_quantization=False)
if ret != 0:
raise RuntimeError("Build failed!")
output_path.parent.mkdir(parents=True, exist_ok=True)
ret = rknn.export_rknn(output_path.as_posix())
if ret != 0:
raise RuntimeError("Export rknn model failed!")
def _export_platforms(
model_dir: Path,
inputs: list[str] | None = None,
input_size_list: list[list[int]] | None = None,
no_cache: bool = False,
) -> None:
fuse_matmul_softmax_matmul_to_sdpa = True
for soc in RKNN_SOCS:
try:
_export_platform(
model_dir,
soc,
inputs=inputs,
input_size_list=input_size_list,
fuse_matmul_softmax_matmul_to_sdpa=fuse_matmul_softmax_matmul_to_sdpa,
no_cache=no_cache,
)
except Exception as e:
print(f"Failed to export model for {soc}: {e}")
if "inputs or 'outputs' must be set" in str(e):
print("Retrying without fuse_matmul_softmax_matmul_to_sdpa")
fuse_matmul_softmax_matmul_to_sdpa = False
_export_platform(
model_dir,
soc,
inputs=inputs,
input_size_list=input_size_list,
fuse_matmul_softmax_matmul_to_sdpa=fuse_matmul_softmax_matmul_to_sdpa,
no_cache=no_cache,
)
def export(model_dir: Path, no_cache: bool = False) -> None:
textual = model_dir / "textual"
visual = model_dir / "visual"
detection = model_dir / "detection"
recognition = model_dir / "recognition"
if textual.is_dir():
_export_platforms(textual, no_cache=no_cache)
if visual.is_dir():
_export_platforms(visual, no_cache=no_cache)
if detection.is_dir():
_export_platforms(detection, inputs=["input.1"], input_size_list=[[1, 3, 640, 640]], no_cache=no_cache)
if recognition.is_dir():
_export_platforms(recognition, inputs=["input.1"], input_size_list=[[1, 3, 112, 112]], no_cache=no_cache)

View File

@@ -0,0 +1,88 @@
import subprocess
from exporters.constants import ModelSource
mclip = [
"M-CLIP/LABSE-Vit-L-14",
"M-CLIP/XLM-Roberta-Large-Vit-B-16Plus",
"M-CLIP/XLM-Roberta-Large-Vit-B-32",
"M-CLIP/XLM-Roberta-Large-Vit-L-14",
]
openclip = [
"RN101__openai",
"RN101__yfcc15m",
"RN50__cc12m",
"RN50__openai",
"RN50__yfcc15m",
"RN50x16__openai",
"RN50x4__openai",
"RN50x64__openai",
"ViT-B-16-SigLIP-256__webli",
"ViT-B-16-SigLIP-384__webli",
"ViT-B-16-SigLIP-512__webli",
"ViT-B-16-SigLIP-i18n-256__webli",
"ViT-B-16-SigLIP2__webli",
"ViT-B-16-SigLIP__webli",
"ViT-B-16-plus-240__laion400m_e31",
"ViT-B-16-plus-240__laion400m_e32",
"ViT-B-16__laion400m_e31",
"ViT-B-16__laion400m_e32",
"ViT-B-16__openai",
"ViT-B-32-SigLIP2-256__webli",
"ViT-B-32__laion2b-s34b-b79k",
"ViT-B-32__laion2b_e16",
"ViT-B-32__laion400m_e31",
"ViT-B-32__laion400m_e32",
"ViT-B-32__openai",
"ViT-H-14-378-quickgelu__dfn5b",
"ViT-H-14-quickgelu__dfn5b",
"ViT-H-14__laion2b-s32b-b79k",
"ViT-L-14-336__openai",
"ViT-L-14-quickgelu__dfn2b",
"ViT-L-14__laion2b-s32b-b82k",
"ViT-L-14__laion400m_e31",
"ViT-L-14__laion400m_e32",
"ViT-L-14__openai",
"ViT-L-16-SigLIP-256__webli",
"ViT-L-16-SigLIP-384__webli",
"ViT-L-16-SigLIP2-256__webli",
"ViT-L-16-SigLIP2-384__webli",
"ViT-L-16-SigLIP2-512__webli",
"ViT-SO400M-14-SigLIP-384__webli",
"ViT-SO400M-14-SigLIP2-378__webli",
"ViT-SO400M-14-SigLIP2__webli",
"ViT-SO400M-16-SigLIP2-256__webli",
"ViT-SO400M-16-SigLIP2-384__webli",
"ViT-SO400M-16-SigLIP2-512__webli",
"ViT-gopt-16-SigLIP2-256__webli",
"ViT-gopt-16-SigLIP2-384__webli",
"nllb-clip-base-siglip__mrl",
"nllb-clip-base-siglip__v1",
"nllb-clip-large-siglip__mrl",
"nllb-clip-large-siglip__v1",
"xlm-roberta-base-ViT-B-32__laion5b_s13b_b90k",
"xlm-roberta-large-ViT-H-14__frozen_laion5b_s13b_b90k",
]
insightface = [
"antelopev2",
"buffalo_l",
"buffalo_m",
"buffalo_s",
]
def export_models(models: list[str], source: ModelSource) -> None:
for model in models:
try:
print(f"Exporting model {model}")
subprocess.check_call(["python", "-m", "immich_model_exporter.export", model, source])
except Exception as e:
print(f"Failed to export model {model}: {e}")
if __name__ == "__main__":
export_models(mclip, ModelSource.MCLIP)
export_models(openclip, ModelSource.OPENCLIP)
export_models(insightface, ModelSource.INSIGHTFACE)

View File

@@ -1,114 +0,0 @@
import os
import tempfile
import warnings
from dataclasses import dataclass, field
from pathlib import Path
import open_clip
import torch
from transformers import AutoTokenizer
from .util import get_model_path, save_config
@dataclass
class OpenCLIPModelConfig:
name: str
pretrained: str
image_size: int = field(init=False)
sequence_length: int = field(init=False)
def __post_init__(self) -> None:
open_clip_cfg = open_clip.get_model_config(self.name)
if open_clip_cfg is None:
raise ValueError(f"Unknown model {self.name}")
self.image_size = open_clip_cfg["vision_cfg"]["image_size"]
self.sequence_length = open_clip_cfg["text_cfg"].get("context_length", 77)
def to_onnx(
model_cfg: OpenCLIPModelConfig,
output_dir_visual: Path | str | None = None,
output_dir_textual: Path | str | None = None,
) -> tuple[Path | None, Path | None]:
visual_path = None
textual_path = None
with tempfile.TemporaryDirectory() as tmpdir:
model = open_clip.create_model(
model_cfg.name,
pretrained=model_cfg.pretrained,
jit=False,
cache_dir=os.environ.get("CACHE_DIR", tmpdir),
require_pretrained=True,
)
text_vision_cfg = open_clip.get_model_config(model_cfg.name)
model.eval()
for param in model.parameters():
param.requires_grad_(False)
if output_dir_visual is not None:
output_dir_visual = Path(output_dir_visual)
visual_path = get_model_path(output_dir_visual)
save_config(open_clip.get_model_preprocess_cfg(model), output_dir_visual / "preprocess_cfg.json")
save_config(text_vision_cfg, output_dir_visual.parent / "config.json")
export_image_encoder(model, model_cfg, visual_path)
if output_dir_textual is not None:
output_dir_textual = Path(output_dir_textual)
textual_path = get_model_path(output_dir_textual)
tokenizer_name = text_vision_cfg["text_cfg"].get("hf_tokenizer_name", "openai/clip-vit-base-patch32")
AutoTokenizer.from_pretrained(tokenizer_name).save_pretrained(output_dir_textual)
export_text_encoder(model, model_cfg, textual_path)
return visual_path, textual_path
def export_image_encoder(model: open_clip.CLIP, model_cfg: OpenCLIPModelConfig, output_path: Path | str) -> None:
output_path = Path(output_path)
def encode_image(image: torch.Tensor) -> torch.Tensor:
output = model.encode_image(image, normalize=True)
assert isinstance(output, torch.Tensor)
return output
args = (torch.randn(1, 3, model_cfg.image_size, model_cfg.image_size),)
traced = torch.jit.trace(encode_image, args) # type: ignore[no-untyped-call]
with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning)
torch.onnx.export(
traced,
args,
output_path.as_posix(),
input_names=["image"],
output_names=["embedding"],
opset_version=17,
# dynamic_axes={"image": {0: "batch_size"}},
)
def export_text_encoder(model: open_clip.CLIP, model_cfg: OpenCLIPModelConfig, output_path: Path | str) -> None:
output_path = Path(output_path)
def encode_text(text: torch.Tensor) -> torch.Tensor:
output = model.encode_text(text, normalize=True)
assert isinstance(output, torch.Tensor)
return output
args = (torch.ones(1, model_cfg.sequence_length, dtype=torch.int32),)
traced = torch.jit.trace(encode_text, args) # type: ignore[no-untyped-call]
with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning)
torch.onnx.export(
traced,
args,
output_path.as_posix(),
input_names=["text"],
output_names=["embedding"],
opset_version=17,
# dynamic_axes={"text": {0: "batch_size"}},
)

View File

@@ -1,49 +0,0 @@
from pathlib import Path
import onnx
import onnxruntime as ort
import onnxsim
def save_onnx(model: onnx.ModelProto, output_path: Path | str) -> None:
try:
onnx.save(model, output_path)
except ValueError as e:
if "The proto size is larger than the 2 GB limit." in str(e):
onnx.save(model, output_path, save_as_external_data=True, size_threshold=1_000_000)
else:
raise e
def optimize_onnxsim(model_path: Path | str, output_path: Path | str) -> None:
model_path = Path(model_path)
output_path = Path(output_path)
model = onnx.load(model_path.as_posix())
model, check = onnxsim.simplify(model)
assert check, "Simplified ONNX model could not be validated"
for file in model_path.parent.iterdir():
if file.name.startswith("Constant") or "onnx" in file.name or file.suffix == ".weight":
file.unlink()
save_onnx(model, output_path)
def optimize_ort(
model_path: Path | str,
output_path: Path | str,
level: ort.GraphOptimizationLevel = ort.GraphOptimizationLevel.ORT_ENABLE_BASIC,
) -> None:
model_path = Path(model_path)
output_path = Path(output_path)
sess_options = ort.SessionOptions()
sess_options.graph_optimization_level = level
sess_options.optimized_model_filepath = output_path.as_posix()
ort.InferenceSession(model_path.as_posix(), providers=["CPUExecutionProvider"], sess_options=sess_options)
def optimize(model_path: Path | str) -> None:
model_path = Path(model_path)
optimize_ort(model_path, model_path)
optimize_onnxsim(model_path, model_path)

View File

@@ -0,0 +1,67 @@
[project]
name = "immich_model_exporter"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10, <4.0"
dependencies = [
"huggingface-hub>=0.29.3",
"multilingual-clip>=1.0.10",
"onnx>=1.14.1",
"onnxruntime>=1.16.0",
"open-clip-torch>=2.31.0",
"typer>=0.15.2",
"rknn-toolkit2>=2.3.0",
"transformers>=4.49.0",
"tenacity>=9.0.0",
]
[dependency-groups]
dev = ["black>=23.3.0", "mypy>=1.3.0", "ruff>=0.0.272"]
[tool.uv]
override-dependencies = [
"onnx>=1.16.0,<2",
"onnxruntime>=1.18.2,<2",
"torch>=2.4",
"torchvision>=0.21",
]
[tool.uv.sources]
torch = [{ index = "pytorch-cpu" }]
torchvision = [{ index = "pytorch-cpu" }]
[[tool.uv.index]]
name = "pytorch-cpu"
url = "https://download.pytorch.org/whl/cpu"
explicit = true
[tool.hatch.build.targets.sdist]
include = ["immich_model_exporter"]
[tool.hatch.build.targets.wheel]
include = ["immich_model_exporter"]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.mypy]
python_version = "3.12"
follow_imports = "silent"
warn_redundant_casts = true
disallow_any_generics = true
check_untyped_defs = true
disallow_untyped_defs = true
ignore_missing_imports = true
[tool.ruff]
line-length = 120
target-version = "py312"
[tool.ruff.lint]
select = ["E", "F", "I"]
[tool.black]
line-length = 120
target-version = ['py312']

View File

@@ -1,113 +0,0 @@
import gc
import os
from pathlib import Path
from tempfile import TemporaryDirectory
import torch
from huggingface_hub import create_repo, upload_folder
from models import mclip, openclip
from models.optimize import optimize
from rich.progress import Progress
models = [
"M-CLIP/LABSE-Vit-L-14",
"M-CLIP/XLM-Roberta-Large-Vit-B-16Plus",
"M-CLIP/XLM-Roberta-Large-Vit-B-32",
"M-CLIP/XLM-Roberta-Large-Vit-L-14",
"RN101::openai",
"RN101::yfcc15m",
"RN50::cc12m",
"RN50::openai",
"RN50::yfcc15m",
"RN50x16::openai",
"RN50x4::openai",
"RN50x64::openai",
"ViT-B-16-SigLIP-256::webli",
"ViT-B-16-SigLIP-384::webli",
"ViT-B-16-SigLIP-512::webli",
"ViT-B-16-SigLIP-i18n-256::webli",
"ViT-B-16-SigLIP::webli",
"ViT-B-16-plus-240::laion400m_e31",
"ViT-B-16-plus-240::laion400m_e32",
"ViT-B-16::laion400m_e31",
"ViT-B-16::laion400m_e32",
"ViT-B-16::openai",
"ViT-B-32::laion2b-s34b-b79k",
"ViT-B-32::laion2b_e16",
"ViT-B-32::laion400m_e31",
"ViT-B-32::laion400m_e32",
"ViT-B-32::openai",
"ViT-H-14-378-quickgelu::dfn5b",
"ViT-H-14-quickgelu::dfn5b",
"ViT-H-14::laion2b-s32b-b79k",
"ViT-L-14-336::openai",
"ViT-L-14-quickgelu::dfn2b",
"ViT-L-14::laion2b-s32b-b82k",
"ViT-L-14::laion400m_e31",
"ViT-L-14::laion400m_e32",
"ViT-L-14::openai",
"ViT-L-16-SigLIP-256::webli",
"ViT-L-16-SigLIP-384::webli",
"ViT-SO400M-14-SigLIP-384::webli",
"ViT-g-14::laion2b-s12b-b42k",
"nllb-clip-base-siglip::mrl",
"nllb-clip-base-siglip::v1",
"nllb-clip-large-siglip::mrl",
"nllb-clip-large-siglip::v1",
"xlm-roberta-base-ViT-B-32::laion5b_s13b_b90k",
"xlm-roberta-large-ViT-H-14::frozen_laion5b_s13b_b90k",
]
# glob to delete old UUID blobs when reuploading models
uuid_char = "[a-fA-F0-9]"
uuid_glob = uuid_char * 8 + "-" + uuid_char * 4 + "-" + uuid_char * 4 + "-" + uuid_char * 4 + "-" + uuid_char * 12
# remote repo files to be deleted before uploading
# deletion is in the same commit as the upload, so it's atomic
delete_patterns = ["**/*onnx*", "**/Constant*", "**/*.weight", "**/*.bias", f"**/{uuid_glob}"]
with Progress() as progress:
task = progress.add_task("[green]Exporting models...", total=len(models))
token = os.environ.get("HF_AUTH_TOKEN")
torch.backends.mha.set_fastpath_enabled(False)
with TemporaryDirectory() as tmp:
tmpdir = Path(tmp)
for model in models:
model_name = model.split("/")[-1].replace("::", "__")
hf_model_name = model_name.replace("xlm-roberta-large", "XLM-Roberta-Large")
hf_model_name = model_name.replace("xlm-roberta-base", "XLM-Roberta-Base")
config_path = tmpdir / model_name / "config.json"
def export() -> None:
progress.update(task, description=f"[green]Exporting {hf_model_name}")
visual_dir = tmpdir / hf_model_name / "visual"
textual_dir = tmpdir / hf_model_name / "textual"
if model.startswith("M-CLIP"):
visual_path, textual_path = mclip.to_onnx(model, visual_dir, textual_dir)
else:
name, _, pretrained = model_name.partition("__")
config = openclip.OpenCLIPModelConfig(name, pretrained)
visual_path, textual_path = openclip.to_onnx(config, visual_dir, textual_dir)
progress.update(task, description=f"[green]Optimizing {hf_model_name} (visual)")
optimize(visual_path)
progress.update(task, description=f"[green]Optimizing {hf_model_name} (textual)")
optimize(textual_path)
gc.collect()
def upload() -> None:
progress.update(task, description=f"[yellow]Uploading {hf_model_name}")
repo_id = f"immich-app/{hf_model_name}"
create_repo(repo_id, exist_ok=True)
upload_folder(
repo_id=repo_id,
folder_path=tmpdir / hf_model_name,
delete_patterns=delete_patterns,
token=token,
)
export()
if token is not None:
upload()
progress.update(task, advance=1)

1395
machine-learning/export/uv.lock generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -51,6 +51,7 @@ cpu = ["onnxruntime>=1.15.0,<2"]
cuda = ["onnxruntime-gpu>=1.17.0,<2"]
openvino = ["onnxruntime-openvino>=1.17.1,<1.19.0"]
armnn = ["onnxruntime>=1.15.0,<2"]
rknn = ["onnxruntime>=1.15.0,<2", "rknn-toolkit-lite2>=2.3.0,<3"]
[tool.uv]
compile-bytecode = true

View File

@@ -1109,6 +1109,10 @@ cuda = [
openvino = [
{ name = "onnxruntime-openvino" },
]
rknn = [
{ name = "onnxruntime" },
{ name = "rknn-toolkit-lite2" },
]
[package.dev-dependencies]
dev = [
@@ -1162,6 +1166,7 @@ requires-dist = [
{ name = "insightface", specifier = ">=0.7.3,<1.0" },
{ name = "onnxruntime", marker = "extra == 'armnn'", specifier = ">=1.15.0,<2" },
{ name = "onnxruntime", marker = "extra == 'cpu'", specifier = ">=1.15.0,<2" },
{ name = "onnxruntime", marker = "extra == 'rknn'", specifier = ">=1.15.0,<2" },
{ name = "onnxruntime-gpu", marker = "extra == 'cuda'", specifier = ">=1.17.0,<2", index = "https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/" },
{ name = "onnxruntime-openvino", marker = "extra == 'openvino'", specifier = ">=1.17.1,<1.19.0" },
{ name = "opencv-python-headless", specifier = ">=4.7.0.72,<5.0" },
@@ -1171,10 +1176,11 @@ requires-dist = [
{ name = "pydantic-settings", specifier = ">=2.5.2,<3" },
{ name = "python-multipart", specifier = ">=0.0.6,<1.0" },
{ name = "rich", specifier = ">=13.4.2" },
{ name = "rknn-toolkit-lite2", marker = "extra == 'rknn'", specifier = ">=2.3.0,<3" },
{ name = "tokenizers", specifier = ">=0.15.0,<1.0" },
{ name = "uvicorn", extras = ["standard"], specifier = ">=0.22.0,<1.0" },
]
provides-extras = ["cpu", "cuda", "openvino", "armnn"]
provides-extras = ["cpu", "cuda", "openvino", "armnn", "rknn"]
[package.metadata.requires-dev]
dev = [
@@ -2131,6 +2137,77 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/19/71/39c7c0d87f8d4e6c020a393182060eaefeeae6c01dab6a84ec346f2567df/rich-13.9.4-py3-none-any.whl", hash = "sha256:6049d5e6ec054bf2779ab3358186963bac2ea89175919d699e378b99738c2a90", size = 242424 },
]
[[package]]
name = "rknn-toolkit-lite2"
version = "2.3.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "numpy" },
{ name = "psutil" },
{ name = "ruamel-yaml" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/ed/77/6af374a4a8cd2aee762a1fb8a3050dcf3f129134bbdc4bb6bed755c4325b/rknn_toolkit_lite2-2.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b6733689bd09a262bcb6ba4744e690dd4b37ebeac4ed427cf45242c4b4ce9a4", size = 559372 },
{ url = "https://files.pythonhosted.org/packages/9b/0c/76ff1eb09d09ce4394a6959d2343a321d28dd9e604348ffdafceafdc344c/rknn_toolkit_lite2-2.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3e4fefe355dc34a155680e4bcb9e4abb37ebc271f045ec9e0a4a3a018bc5beb", size = 569149 },
{ url = "https://files.pythonhosted.org/packages/0d/6e/8679562028051b02312212defc6e8c07248953f10dd7ad506e941b575bf3/rknn_toolkit_lite2-2.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37394371d1561f470c553f39869d7c35ff93405dffe3d0d72babf297a2b0aee9", size = 527457 },
]
[[package]]
name = "ruamel-yaml"
version = "0.18.10"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "ruamel-yaml-clib", marker = "python_full_version < '3.13' and platform_python_implementation == 'CPython'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/ea/46/f44d8be06b85bc7c4d8c95d658be2b68f27711f279bf9dd0612a5e4794f5/ruamel.yaml-0.18.10.tar.gz", hash = "sha256:20c86ab29ac2153f80a428e1254a8adf686d3383df04490514ca3b79a362db58", size = 143447 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c2/36/dfc1ebc0081e6d39924a2cc53654497f967a084a436bb64402dfce4254d9/ruamel.yaml-0.18.10-py3-none-any.whl", hash = "sha256:30f22513ab2301b3d2b577adc121c6471f28734d3d9728581245f1e76468b4f1", size = 117729 },
]
[[package]]
name = "ruamel-yaml-clib"
version = "0.2.12"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/20/84/80203abff8ea4993a87d823a5f632e4d92831ef75d404c9fc78d0176d2b5/ruamel.yaml.clib-0.2.12.tar.gz", hash = "sha256:6c8fbb13ec503f99a91901ab46e0b07ae7941cd527393187039aec586fdfd36f", size = 225315 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/70/57/40a958e863e299f0c74ef32a3bde9f2d1ea8d69669368c0c502a0997f57f/ruamel.yaml.clib-0.2.12-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:11f891336688faf5156a36293a9c362bdc7c88f03a8a027c2c1d8e0bcde998e5", size = 131301 },
{ url = "https://files.pythonhosted.org/packages/98/a8/29a3eb437b12b95f50a6bcc3d7d7214301c6c529d8fdc227247fa84162b5/ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:a606ef75a60ecf3d924613892cc603b154178ee25abb3055db5062da811fd969", size = 633728 },
{ url = "https://files.pythonhosted.org/packages/35/6d/ae05a87a3ad540259c3ad88d71275cbd1c0f2d30ae04c65dcbfb6dcd4b9f/ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd5415dded15c3822597455bc02bcd66e81ef8b7a48cb71a33628fc9fdde39df", size = 722230 },
{ url = "https://files.pythonhosted.org/packages/7f/b7/20c6f3c0b656fe609675d69bc135c03aac9e3865912444be6339207b6648/ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f66efbc1caa63c088dead1c4170d148eabc9b80d95fb75b6c92ac0aad2437d76", size = 686712 },
{ url = "https://files.pythonhosted.org/packages/cd/11/d12dbf683471f888d354dac59593873c2b45feb193c5e3e0f2ebf85e68b9/ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:22353049ba4181685023b25b5b51a574bce33e7f51c759371a7422dcae5402a6", size = 663936 },
{ url = "https://files.pythonhosted.org/packages/72/14/4c268f5077db5c83f743ee1daeb236269fa8577133a5cfa49f8b382baf13/ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:932205970b9f9991b34f55136be327501903f7c66830e9760a8ffb15b07f05cd", size = 696580 },
{ url = "https://files.pythonhosted.org/packages/30/fc/8cd12f189c6405a4c1cf37bd633aa740a9538c8e40497c231072d0fef5cf/ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a52d48f4e7bf9005e8f0a89209bf9a73f7190ddf0489eee5eb51377385f59f2a", size = 663393 },
{ url = "https://files.pythonhosted.org/packages/80/29/c0a017b704aaf3cbf704989785cd9c5d5b8ccec2dae6ac0c53833c84e677/ruamel.yaml.clib-0.2.12-cp310-cp310-win32.whl", hash = "sha256:3eac5a91891ceb88138c113f9db04f3cebdae277f5d44eaa3651a4f573e6a5da", size = 100326 },
{ url = "https://files.pythonhosted.org/packages/3a/65/fa39d74db4e2d0cd252355732d966a460a41cd01c6353b820a0952432839/ruamel.yaml.clib-0.2.12-cp310-cp310-win_amd64.whl", hash = "sha256:ab007f2f5a87bd08ab1499bdf96f3d5c6ad4dcfa364884cb4549aa0154b13a28", size = 118079 },
{ url = "https://files.pythonhosted.org/packages/fb/8f/683c6ad562f558cbc4f7c029abcd9599148c51c54b5ef0f24f2638da9fbb/ruamel.yaml.clib-0.2.12-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:4a6679521a58256a90b0d89e03992c15144c5f3858f40d7c18886023d7943db6", size = 132224 },
{ url = "https://files.pythonhosted.org/packages/3c/d2/b79b7d695e2f21da020bd44c782490578f300dd44f0a4c57a92575758a76/ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:d84318609196d6bd6da0edfa25cedfbabd8dbde5140a0a23af29ad4b8f91fb1e", size = 641480 },
{ url = "https://files.pythonhosted.org/packages/68/6e/264c50ce2a31473a9fdbf4fa66ca9b2b17c7455b31ef585462343818bd6c/ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb43a269eb827806502c7c8efb7ae7e9e9d0573257a46e8e952f4d4caba4f31e", size = 739068 },
{ url = "https://files.pythonhosted.org/packages/86/29/88c2567bc893c84d88b4c48027367c3562ae69121d568e8a3f3a8d363f4d/ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:811ea1594b8a0fb466172c384267a4e5e367298af6b228931f273b111f17ef52", size = 703012 },
{ url = "https://files.pythonhosted.org/packages/11/46/879763c619b5470820f0cd6ca97d134771e502776bc2b844d2adb6e37753/ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cf12567a7b565cbf65d438dec6cfbe2917d3c1bdddfce84a9930b7d35ea59642", size = 704352 },
{ url = "https://files.pythonhosted.org/packages/02/80/ece7e6034256a4186bbe50dee28cd032d816974941a6abf6a9d65e4228a7/ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7dd5adc8b930b12c8fc5b99e2d535a09889941aa0d0bd06f4749e9a9397c71d2", size = 737344 },
{ url = "https://files.pythonhosted.org/packages/f0/ca/e4106ac7e80efbabdf4bf91d3d32fc424e41418458251712f5672eada9ce/ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1492a6051dab8d912fc2adeef0e8c72216b24d57bd896ea607cb90bb0c4981d3", size = 714498 },
{ url = "https://files.pythonhosted.org/packages/67/58/b1f60a1d591b771298ffa0428237afb092c7f29ae23bad93420b1eb10703/ruamel.yaml.clib-0.2.12-cp311-cp311-win32.whl", hash = "sha256:bd0a08f0bab19093c54e18a14a10b4322e1eacc5217056f3c063bd2f59853ce4", size = 100205 },
{ url = "https://files.pythonhosted.org/packages/b4/4f/b52f634c9548a9291a70dfce26ca7ebce388235c93588a1068028ea23fcc/ruamel.yaml.clib-0.2.12-cp311-cp311-win_amd64.whl", hash = "sha256:a274fb2cb086c7a3dea4322ec27f4cb5cc4b6298adb583ab0e211a4682f241eb", size = 118185 },
{ url = "https://files.pythonhosted.org/packages/48/41/e7a405afbdc26af961678474a55373e1b323605a4f5e2ddd4a80ea80f628/ruamel.yaml.clib-0.2.12-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:20b0f8dc160ba83b6dcc0e256846e1a02d044e13f7ea74a3d1d56ede4e48c632", size = 133433 },
{ url = "https://files.pythonhosted.org/packages/ec/b0/b850385604334c2ce90e3ee1013bd911aedf058a934905863a6ea95e9eb4/ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:943f32bc9dedb3abff9879edc134901df92cfce2c3d5c9348f172f62eb2d771d", size = 647362 },
{ url = "https://files.pythonhosted.org/packages/44/d0/3f68a86e006448fb6c005aee66565b9eb89014a70c491d70c08de597f8e4/ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95c3829bb364fdb8e0332c9931ecf57d9be3519241323c5274bd82f709cebc0c", size = 754118 },
{ url = "https://files.pythonhosted.org/packages/52/a9/d39f3c5ada0a3bb2870d7db41901125dbe2434fa4f12ca8c5b83a42d7c53/ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:749c16fcc4a2b09f28843cda5a193e0283e47454b63ec4b81eaa2242f50e4ccd", size = 706497 },
{ url = "https://files.pythonhosted.org/packages/b0/fa/097e38135dadd9ac25aecf2a54be17ddf6e4c23e43d538492a90ab3d71c6/ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bf165fef1f223beae7333275156ab2022cffe255dcc51c27f066b4370da81e31", size = 698042 },
{ url = "https://files.pythonhosted.org/packages/ec/d5/a659ca6f503b9379b930f13bc6b130c9f176469b73b9834296822a83a132/ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:32621c177bbf782ca5a18ba4d7af0f1082a3f6e517ac2a18b3974d4edf349680", size = 745831 },
{ url = "https://files.pythonhosted.org/packages/db/5d/36619b61ffa2429eeaefaab4f3374666adf36ad8ac6330d855848d7d36fd/ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b82a7c94a498853aa0b272fd5bc67f29008da798d4f93a2f9f289feb8426a58d", size = 715692 },
{ url = "https://files.pythonhosted.org/packages/b1/82/85cb92f15a4231c89b95dfe08b09eb6adca929ef7df7e17ab59902b6f589/ruamel.yaml.clib-0.2.12-cp312-cp312-win32.whl", hash = "sha256:e8c4ebfcfd57177b572e2040777b8abc537cdef58a2120e830124946aa9b42c5", size = 98777 },
{ url = "https://files.pythonhosted.org/packages/d7/8f/c3654f6f1ddb75daf3922c3d8fc6005b1ab56671ad56ffb874d908bfa668/ruamel.yaml.clib-0.2.12-cp312-cp312-win_amd64.whl", hash = "sha256:0467c5965282c62203273b838ae77c0d29d7638c8a4e3a1c8bdd3602c10904e4", size = 115523 },
{ url = "https://files.pythonhosted.org/packages/29/00/4864119668d71a5fa45678f380b5923ff410701565821925c69780356ffa/ruamel.yaml.clib-0.2.12-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:4c8c5d82f50bb53986a5e02d1b3092b03622c02c2eb78e29bec33fd9593bae1a", size = 132011 },
{ url = "https://files.pythonhosted.org/packages/7f/5e/212f473a93ae78c669ffa0cb051e3fee1139cb2d385d2ae1653d64281507/ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux2014_aarch64.whl", hash = "sha256:e7e3736715fbf53e9be2a79eb4db68e4ed857017344d697e8b9749444ae57475", size = 642488 },
{ url = "https://files.pythonhosted.org/packages/1f/8f/ecfbe2123ade605c49ef769788f79c38ddb1c8fa81e01f4dbf5cf1a44b16/ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b7e75b4965e1d4690e93021adfcecccbca7d61c7bddd8e22406ef2ff20d74ef", size = 745066 },
{ url = "https://files.pythonhosted.org/packages/e2/a9/28f60726d29dfc01b8decdb385de4ced2ced9faeb37a847bd5cf26836815/ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96777d473c05ee3e5e3c3e999f5d23c6f4ec5b0c38c098b3a5229085f74236c6", size = 701785 },
{ url = "https://files.pythonhosted.org/packages/84/7e/8e7ec45920daa7f76046578e4f677a3215fe8f18ee30a9cb7627a19d9b4c/ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:3bc2a80e6420ca8b7d3590791e2dfc709c88ab9152c00eeb511c9875ce5778bf", size = 693017 },
{ url = "https://files.pythonhosted.org/packages/c5/b3/d650eaade4ca225f02a648321e1ab835b9d361c60d51150bac49063b83fa/ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e188d2699864c11c36cdfdada94d781fd5d6b0071cd9c427bceb08ad3d7c70e1", size = 741270 },
{ url = "https://files.pythonhosted.org/packages/87/b8/01c29b924dcbbed75cc45b30c30d565d763b9c4d540545a0eeecffb8f09c/ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4f6f3eac23941b32afccc23081e1f50612bdbe4e982012ef4f5797986828cd01", size = 709059 },
{ url = "https://files.pythonhosted.org/packages/30/8c/ed73f047a73638257aa9377ad356bea4d96125b305c34a28766f4445cc0f/ruamel.yaml.clib-0.2.12-cp313-cp313-win32.whl", hash = "sha256:6442cb36270b3afb1b4951f060eccca1ce49f3d087ca1ca4563a6eb479cb3de6", size = 98583 },
{ url = "https://files.pythonhosted.org/packages/b0/85/e8e751d8791564dd333d5d9a4eab0a7a115f7e349595417fd50ecae3395c/ruamel.yaml.clib-0.2.12-cp313-cp313-win_amd64.whl", hash = "sha256:e5b8daf27af0b90da7bb903a876477a9e6d7270be6146906b276605997c7e9a3", size = 115190 },
]
[[package]]
name = "ruff"
version = "0.9.9"

View File

@@ -96,6 +96,18 @@ export const CLIP_MODEL_INFO: Record<string, ModelInfo> = {
'ViT-SO400M-14-SigLIP-384__webli': { dimSize: 1152 },
'nllb-clip-large-siglip__mrl': { dimSize: 1152 },
'nllb-clip-large-siglip__v1': { dimSize: 1152 },
'ViT-B-16-SigLIP2__webli': { dimSize: 768 },
'ViT-B-32-SigLIP2-256__webli': { dimSize: 768 },
'ViT-L-16-SigLIP2-256__webli': { dimSize: 1024 },
'ViT-L-16-SigLIP2-384__webli': { dimSize: 1024 },
'ViT-L-16-SigLIP2-512__webli': { dimSize: 1024 },
'ViT-SO400M-14-SigLIP2__webli': { dimSize: 1152 },
'ViT-SO400M-14-SigLIP2-378__webli': { dimSize: 1152 },
'ViT-SO400M-16-SigLIP2-256__webli': { dimSize: 1152 },
'ViT-SO400M-16-SigLIP2-384__webli': { dimSize: 1152 },
'ViT-SO400M-16-SigLIP2-512__webli': { dimSize: 1152 },
'ViT-gopt-16-SigLIP2-256__webli': { dimSize: 1536 },
'ViT-gopt-16-SigLIP2-384__webli': { dimSize: 1536 },
};
type SharpRotationData = {