From 50b34c44549707969d59ec27617b5f02295d039c Mon Sep 17 00:00:00 2001 From: Mateusz Sluszniak <56299341+msluszniak@users.noreply.github.com> Date: Tue, 3 Mar 2026 16:42:55 +0100 Subject: [PATCH 1/2] Update dependabot configuration for npm packages Consolidate npm update configuration and change schedule to monthly. --- .github/dependabot.yml | 92 ++++++------------------------------------ 1 file changed, 12 insertions(+), 80 deletions(-) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 29651a3c01..a06b1c5f58 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,91 +1,23 @@ version: 2 updates: - package-ecosystem: "npm" - directory: "/" + directories: + - "/" + - "/packages/*" + - "/apps/*" + - "/docs" schedule: - interval: "weekly" + interval: "monthly" groups: security-updates: applies-to: security-updates patterns: - "*" - - - package-ecosystem: "npm" - directory: "/packages/react-native-executorch" - schedule: - interval: "weekly" - groups: - security-updates: - applies-to: security-updates - patterns: - - "*" - - - package-ecosystem: "npm" - directory: "/packages/bare-resource-fetcher" - schedule: - interval: "weekly" - groups: - security-updates: - applies-to: security-updates - patterns: - - "*" - - - package-ecosystem: "npm" - directory: "/packages/expo-resource-fetcher" - schedule: - interval: "weekly" - groups: - security-updates: - applies-to: security-updates - patterns: - - "*" - - - package-ecosystem: "npm" - directory: "/apps/llm" - schedule: - interval: "weekly" - groups: - security-updates: - applies-to: security-updates - patterns: - - "*" - - - package-ecosystem: "npm" - directory: "/apps/computer-vision" - schedule: - interval: "weekly" - groups: - security-updates: - applies-to: security-updates - patterns: - - "*" - - - package-ecosystem: "npm" - directory: "/apps/speech" - schedule: - interval: "weekly" - groups: - security-updates: - applies-to: security-updates - patterns: - - "*" - - - package-ecosystem: "npm" - directory: "/apps/text-embeddings" - schedule: - interval: "weekly" - groups: - security-updates: - applies-to: security-updates - patterns: - - "*" - - - package-ecosystem: "npm" - directory: "/docs" - schedule: - interval: "weekly" - groups: - security-updates: - applies-to: security-updates + all-updates: + applies-to: version-updates patterns: - "*" + ignore: + - dependency-name: "*" + update-types: + - "version-update:semver-patch" From 5a5dab3952c2c025ec3063cc23336a833c0a4f03 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 7 May 2026 19:07:23 +0000 Subject: [PATCH 2/2] refactor(ocr): reduce OCR bounding boxes from 4 vertices to 2-point AABB Resolves #760. The OCR and VerticalOCR pipelines previously exposed all four rotated-rectangle corners in OCRDetection.bbox. Two points (top-left and bottom-right of the axis-aligned bounding box) are sufficient for downstream rendering and are simpler to consume. Changes: - Types.h: shrink OCRDetection.bbox from std::array to std::array - RecognitionHandler.cpp: compute AABB (min/max x,y) over the four detector corners instead of forwarding them verbatim - VerticalOCR.cpp: same AABB reduction in _processSingleTextBox - OCR.cpp / VerticalOCR.cpp generateFromFrame: re-normalize the two bbox corners after inverseRotatePoints to guarantee bbox[0] <= bbox[1] - JsiConversions.h: serialize 2 points instead of 4 to JavaScript - OCRTest.cpp / VerticalOCRTest.cpp: assert size==2 and that bbox[1] >= bbox[0] - ocr.ts: narrow TypeScript type from Point[] to [Point,Point] and update docs --- .../host_objects/JsiConversions.h | 4 +-- .../common/rnexecutorch/models/ocr/OCR.cpp | 8 +++++ .../models/ocr/RecognitionHandler.cpp | 22 ++++++++++--- .../common/rnexecutorch/models/ocr/Types.h | 2 +- .../models/vertical_ocr/VerticalOCR.cpp | 31 ++++++++++++++----- .../tests/integration/OCRTest.cpp | 12 +++---- .../tests/integration/VerticalOCRTest.cpp | 22 +++++++------ .../react-native-executorch/src/types/ocr.ts | 5 +-- 8 files changed, 73 insertions(+), 33 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h index a20fd7b1bc..fac780ad50 100644 --- a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h +++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h @@ -526,9 +526,9 @@ getJsiValue(const std::vector &detections, auto jsiDetectionObject = jsi::Object(runtime); - auto jsiBboxArray = jsi::Array(runtime, 4); + auto jsiBboxArray = jsi::Array(runtime, 2); #pragma unroll - for (size_t j = 0; j < 4u; ++j) { + for (size_t j = 0; j < 2u; ++j) { auto jsiPointObject = jsi::Object(runtime); jsiPointObject.setProperty(runtime, "x", detection.bbox[j].x); jsiPointObject.setProperty(runtime, "y", detection.bbox[j].y); diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.cpp b/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.cpp index 3c08d16daa..60887e0f7b 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.cpp @@ -1,5 +1,6 @@ #include "OCR.h" #include "Constants.h" +#include #include #include #include @@ -69,6 +70,13 @@ OCR::generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData) { for (auto &det : detections) { ::rnexecutorch::utils::inverseRotatePoints(det.bbox, orient, rotated.size()); + // Re-normalize to a proper AABB after the coordinate rotation. + float minX = std::min(det.bbox[0].x, det.bbox[1].x); + float minY = std::min(det.bbox[0].y, det.bbox[1].y); + float maxX = std::max(det.bbox[0].x, det.bbox[1].x); + float maxY = std::max(det.bbox[0].y, det.bbox[1].y); + det.bbox[0] = {minX, minY}; + det.bbox[1] = {maxX, maxY}; } return detections; } diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/RecognitionHandler.cpp b/packages/react-native-executorch/common/rnexecutorch/models/ocr/RecognitionHandler.cpp index dfde737655..258edf340e 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/RecognitionHandler.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/RecognitionHandler.cpp @@ -1,4 +1,6 @@ #include "RecognitionHandler.h" +#include +#include #include #include #include @@ -55,14 +57,24 @@ void RecognitionHandler::processBBox(std::vector &boxList, /* Since the boxes were corresponding to the image resized to 1280x1280, we want to return the boxes shifted and rescaled to match the original - image dimensions. + image dimensions. Compute the axis-aligned bounding box (AABB) from the + four rotated corners and store only the top-left and bottom-right points. */ - for (auto &point : box.bbox) { - point.x = (point.x - ratioAndPadding.left) * ratioAndPadding.resizeRatio; - point.y = (point.y - ratioAndPadding.top) * ratioAndPadding.resizeRatio; + float minX = std::numeric_limits::max(); + float minY = std::numeric_limits::max(); + float maxX = std::numeric_limits::lowest(); + float maxY = std::numeric_limits::lowest(); + for (const auto &point : box.bbox) { + float x = (point.x - ratioAndPadding.left) * ratioAndPadding.resizeRatio; + float y = (point.y - ratioAndPadding.top) * ratioAndPadding.resizeRatio; + minX = std::min(minX, x); + minY = std::min(minY, y); + maxX = std::max(maxX, x); + maxY = std::max(maxY, y); } boxList.emplace_back( - box.bbox, + std::array{types::Point{minX, minY}, + types::Point{maxX, maxY}}, converter.decodeGreedy(predictionIndices, predictionIndices.size())[0], confidenceScore); } diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Types.h b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Types.h index bb0a24aad1..af623a07b4 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/Types.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/Types.h @@ -27,7 +27,7 @@ struct PaddingInfo { }; struct OCRDetection { - std::array bbox; + std::array bbox; std::string text; float score; }; diff --git a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp index 88a027d01b..3b9fcbef2a 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp @@ -1,4 +1,6 @@ #include "VerticalOCR.h" +#include +#include #include #include #include @@ -73,6 +75,13 @@ VerticalOCR::generateFromFrame(jsi::Runtime &runtime, for (auto &det : detections) { ::rnexecutorch::utils::inverseRotatePoints(det.bbox, orient, rotated.size()); + // Re-normalize to a proper AABB after the coordinate rotation. + float minX = std::min(det.bbox[0].x, det.bbox[1].x); + float minY = std::min(det.bbox[0].y, det.bbox[1].y); + float maxX = std::max(det.bbox[0].x, det.bbox[1].x); + float maxY = std::max(det.bbox[0].y, det.bbox[1].y); + det.bbox[0] = {minX, minY}; + det.bbox[1] = {maxX, maxY}; } return detections; } @@ -204,16 +213,24 @@ types::OCRDetection VerticalOCR::_processSingleTextBox( : _handleJointCharacters(box, originalImage, characterBoxes, paddingsBox, imagePaddings); } - // Modify the returned boxes to match the original image size - std::array finalBbox; + // Modify the returned boxes to match the original image size. Compute the + // axis-aligned bounding box (AABB) from the four rotated corners and store + // only the top-left and bottom-right points. + float minX = std::numeric_limits::max(); + float minY = std::numeric_limits::max(); + float maxX = std::numeric_limits::lowest(); + float maxY = std::numeric_limits::lowest(); for (size_t i = 0; i < box.bbox.size(); ++i) { - finalBbox[i].x = - (box.bbox[i].x - imagePaddings.left) * imagePaddings.resizeRatio; - finalBbox[i].y = - (box.bbox[i].y - imagePaddings.top) * imagePaddings.resizeRatio; + float x = (box.bbox[i].x - imagePaddings.left) * imagePaddings.resizeRatio; + float y = (box.bbox[i].y - imagePaddings.top) * imagePaddings.resizeRatio; + minX = std::min(minX, x); + minY = std::min(minY, y); + maxX = std::max(maxX, x); + maxY = std::max(maxY, y); } - return {finalBbox, text, confidenceScore}; + return {{types::Point{minX, minY}, types::Point{maxX, maxY}}, text, + confidenceScore}; } void VerticalOCR::unload() noexcept { diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/OCRTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/OCRTest.cpp index 072c761164..de995fa9f9 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/OCRTest.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/OCRTest.cpp @@ -100,12 +100,12 @@ TEST(OCRGenerateTests, DetectionsHaveValidBoundingBoxes) { auto results = model.generateFromString(kValidTestImagePath); for (const auto &detection : results) { - // Each bbox should have 4 points - EXPECT_EQ(detection.bbox.size(), 4u); - for (const auto &point : detection.bbox) { - EXPECT_GE(point.x, 0.0f); - EXPECT_GE(point.y, 0.0f); - } + // Each bbox has 2 points: top-left [0] and bottom-right [1] + EXPECT_EQ(detection.bbox.size(), 2u); + EXPECT_GE(detection.bbox[0].x, 0.0f); + EXPECT_GE(detection.bbox[0].y, 0.0f); + EXPECT_GE(detection.bbox[1].x, detection.bbox[0].x); + EXPECT_GE(detection.bbox[1].y, detection.bbox[0].y); } } diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/VerticalOCRTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/VerticalOCRTest.cpp index fd6d59441d..f409926b83 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/VerticalOCRTest.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/VerticalOCRTest.cpp @@ -117,11 +117,12 @@ TEST(VerticalOCRGenerateTests, IndependentCharsDetectionsHaveValidBBoxes) { auto results = model.generateFromString(kValidVerticalTestImagePath); for (const auto &detection : results) { - EXPECT_EQ(detection.bbox.size(), 4u); - for (const auto &point : detection.bbox) { - EXPECT_GE(point.x, 0.0f); - EXPECT_GE(point.y, 0.0f); - } + // Each bbox has 2 points: top-left [0] and bottom-right [1] + EXPECT_EQ(detection.bbox.size(), 2u); + EXPECT_GE(detection.bbox[0].x, 0.0f); + EXPECT_GE(detection.bbox[0].y, 0.0f); + EXPECT_GE(detection.bbox[1].x, detection.bbox[0].x); + EXPECT_GE(detection.bbox[1].y, detection.bbox[0].y); } } @@ -180,11 +181,12 @@ TEST(VerticalOCRGenerateTests, JointCharsDetectionsHaveValidBBoxes) { auto results = model.generateFromString(kValidVerticalTestImagePath); for (const auto &detection : results) { - EXPECT_EQ(detection.bbox.size(), 4u); - for (const auto &point : detection.bbox) { - EXPECT_GE(point.x, 0.0f); - EXPECT_GE(point.y, 0.0f); - } + // Each bbox has 2 points: top-left [0] and bottom-right [1] + EXPECT_EQ(detection.bbox.size(), 2u); + EXPECT_GE(detection.bbox[0].x, 0.0f); + EXPECT_GE(detection.bbox[0].y, 0.0f); + EXPECT_GE(detection.bbox[1].x, detection.bbox[0].x); + EXPECT_GE(detection.bbox[1].y, detection.bbox[0].y); } } diff --git a/packages/react-native-executorch/src/types/ocr.ts b/packages/react-native-executorch/src/types/ocr.ts index d2f3781095..16f9fbcff1 100644 --- a/packages/react-native-executorch/src/types/ocr.ts +++ b/packages/react-native-executorch/src/types/ocr.ts @@ -6,12 +6,13 @@ import { Frame, PixelData, ResourceSource } from './common'; * OCRDetection represents a single detected text instance in an image, * including its bounding box, recognized text, and confidence score. * @category Types - * @property {Point[]} bbox - An array of points defining the bounding box around the detected text. + * @property {[Point, Point]} bbox - A tuple of two points defining the axis-aligned bounding box + * around the detected text: `bbox[0]` is the top-left corner and `bbox[1]` is the bottom-right corner. * @property {string} text - The recognized text within the bounding box. * @property {number} score - The confidence score of the OCR detection, ranging from 0 to 1. */ export interface OCRDetection { - bbox: Point[]; + bbox: [Point, Point]; text: string; score: number; }