From 94cb11457dabd517cae689bc1f8a77d2b886477d Mon Sep 17 00:00:00 2001 From: alex Date: Mon, 27 Apr 2026 11:27:05 +0300 Subject: [PATCH 01/10] [MS-1421] Creating OCR framework to work with the OCR readouts with text filter and spatial constraints --- .../externalcredential/model/BoundingBox.kt | 2 +- .../screens/scanocr/reader/OcrBuilder.kt | 41 +++++++++++ .../screens/scanocr/reader/OcrModel.kt | 43 ++++++++++++ .../screens/scanocr/reader/OcrQuery.kt | 68 +++++++++++++++++++ .../screens/scanocr/reader/OcrReader.kt | 27 ++++++++ .../GetCredentialCoordinatesUseCase.kt | 42 +++++------- .../usecase/GhanaIdCardOcrSelectorUseCase.kt | 4 +- .../GhanaNhisCardOcrSelectorUseCase.kt | 4 +- 8 files changed, 204 insertions(+), 27 deletions(-) create mode 100644 feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrBuilder.kt create mode 100644 feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrModel.kt create mode 100644 feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt create mode 100644 feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReader.kt diff --git a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/model/BoundingBox.kt b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/model/BoundingBox.kt index 06cb649755..ed89e87670 100644 --- a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/model/BoundingBox.kt +++ b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/model/BoundingBox.kt @@ -18,6 +18,6 @@ data class BoundingBox( val bottom: Int, ) : JavaSerializable -fun Rect.toBoundingBox(): BoundingBox = BoundingBox(left, top, right, bottom) +fun Rect?.toBoundingBox(): BoundingBox = if (this == null) BoundingBox(0, 0, 0, 0) else BoundingBox(left, top, right, bottom) fun BoundingBox.toRect(): Rect = Rect(left, top, right, bottom) diff --git a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrBuilder.kt b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrBuilder.kt new file mode 100644 index 0000000000..52c60a5457 --- /dev/null +++ b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrBuilder.kt @@ -0,0 +1,41 @@ +package com.simprints.feature.externalcredential.screens.scanocr.reader + +import com.google.mlkit.vision.text.Text +import com.simprints.feature.externalcredential.model.toBoundingBox + +/** + * Converts a ML Kit [Text] result into custom [OcrText]. + * + * Each lines is assigned an ID in the order they are encountered in blocks + * The resulting [OcrText.allLines] are sorted by bounding box top (ascending). + */ +internal object OcrModelBuilder { + fun build(mlKitText: Text): OcrText { + var nextLineId = 0 + + val blocks = mlKitText.textBlocks.map { block -> + val lines = block.lines.map { line -> + OcrLine( + id = nextLineId++, + text = line.text.trim().replace(" ", ""), + boundingBox = line.boundingBox.toBoundingBox(), + blockBoundingBox = block.boundingBox.toBoundingBox(), + confidence = line.confidence, + ) + } + OcrBlock( + boundingBox = block.boundingBox.toBoundingBox(), + lines = lines, + ) + } + + val allLinesSorted = blocks + .flatMap { it.lines } + .sortedBy { it.boundingBox.top } + + return OcrText( + blocks = blocks, + allLines = allLinesSorted, + ) + } +} diff --git a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrModel.kt b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrModel.kt new file mode 100644 index 0000000000..f528d72b1a --- /dev/null +++ b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrModel.kt @@ -0,0 +1,43 @@ +package com.simprints.feature.externalcredential.screens.scanocr.reader + +import com.simprints.feature.externalcredential.model.BoundingBox + +/** + * Wrapper for all scanned text after the OCR + * + * @param blocks blocks of text, each containing multiple [OcrLine] + * @param allLines all lines from blocks sorted by top-right bounding box coordinates ascending + */ +data class OcrText( + val blocks: List, + val allLines: List, +) + +/** + * Representation of a single block detected by the OCR kit. A block can contain multiple lines that he OCR kit labeled as belonging to the + * same paragraph of text. + * + * @param boundingBox coordinates of the block + * @param lines nested lines of the block + */ +data class OcrBlock( + val boundingBox: BoundingBox, + val lines: List, +) + +/** + * A single line of text detected by the OCR kit. + * + * @param id unique id of the line in a single OCR scan + * @param text normalized text (extra spaces removed) + * @param boundingBox coordinates of the line + * @param blockBoundingBox parent coordinates + * @param confidence overall confidence of the text value based on the average confidence for each character (aka element) in [text] + */ +data class OcrLine( + val id: Int, + val text: String, + val boundingBox: BoundingBox, + val blockBoundingBox: BoundingBox, + val confidence: Float, +) diff --git a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt new file mode 100644 index 0000000000..aef9df91b3 --- /dev/null +++ b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt @@ -0,0 +1,68 @@ +package com.simprints.feature.externalcredential.screens.scanocr.reader + +/** + * Defines the search criteria for locating a single line of text within document scanned with OCR. + */ +class OcrQueryScope internal constructor( + internal val allLines: List, +) { + private val filters = mutableListOf<(OcrLine) -> Boolean>() + private var belowResolver: (() -> OcrLine?)? = null + private var aboveResolver: (() -> OcrLine?)? = null + + fun matchesPattern(regex: Regex): OcrQueryScope = apply { + filters += { line -> regex.containsMatchIn(line.text) } + } + + fun matchesNormalizedPattern(regex: Regex): OcrQueryScope = apply { + filters += { line -> regex.containsMatchIn(line.text) } + } + + fun containsText(text: String): OcrQueryScope = apply { + filters += { line -> line.text.contains(text, ignoreCase = true) } + } + + fun hasExactText(text: String): OcrQueryScope = apply { + filters += { line -> line.text.equals(text, ignoreCase = true) } + } + + fun hasId(id: Int): OcrQueryScope = apply { + filters += { line -> line.id == id } + } + + fun isBelow(resolveAnchor: OcrQueryScope.() -> Unit): OcrQueryScope = apply { + belowResolver = { OcrQueryScope(allLines).apply(resolveAnchor).find() } + } + + fun isBelow(anchor: OcrLine): OcrQueryScope = apply { + belowResolver = { anchor } + } + + fun isAbove(resolveAnchor: OcrQueryScope.() -> Unit): OcrQueryScope = apply { + aboveResolver = { OcrQueryScope(allLines).apply(resolveAnchor).find() } + } + + fun isAbove(anchor: OcrLine): OcrQueryScope = apply { + aboveResolver = { anchor } + } + + internal fun find(): OcrLine? { + val belowAnchor = belowResolver?.invoke() + val aboveAnchor = aboveResolver?.invoke() + + if (belowResolver != null && belowAnchor == null) return null + if (aboveResolver != null && aboveAnchor == null) return null + + return allLines.firstOrNull { line -> + val isBelowAnchor = belowAnchor == null || ( + line.boundingBox.top > belowAnchor.boundingBox.top && + line.boundingBox.left < belowAnchor.boundingBox.right + ) + val isAboveAnchor = aboveAnchor == null || ( + line.boundingBox.top < aboveAnchor.boundingBox.top && + line.boundingBox.left < aboveAnchor.boundingBox.right + ) + filters.all { it(line) } && isBelowAnchor && isAboveAnchor + } + } +} diff --git a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReader.kt b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReader.kt new file mode 100644 index 0000000000..71a7c92560 --- /dev/null +++ b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReader.kt @@ -0,0 +1,27 @@ +package com.simprints.feature.externalcredential.screens.scanocr.reader + +/** + * Entry point for querying an [OcrText]. + * + * Usage: + * ``` + * val model = OcrModelBuilder.build(mlKitText) + * val reader = OcrReader(model) + * + * val membershipNumber = reader.query { + * matchesPattern(Regex("\\d{8}")) + * isBelow { containsText("membership number") } + * isAbove { containsText("expiry date") } + * } + * ``` + */ +class OcrReader( + val model: OcrText, +) { + /** + * Executes the query defined in [block] and returns the first matching [OcrLine], or null. + * The [block] receives an [OcrQueryScope] as its receiver — call filter methods directly + * without any chaining or terminal call. + */ + fun find(block: OcrQueryScope.() -> Unit): OcrLine? = OcrQueryScope(model.allLines).apply(block).find() +} diff --git a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GetCredentialCoordinatesUseCase.kt b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GetCredentialCoordinatesUseCase.kt index 20fe8554a7..8692dfd488 100644 --- a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GetCredentialCoordinatesUseCase.kt +++ b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GetCredentialCoordinatesUseCase.kt @@ -6,9 +6,10 @@ import com.google.mlkit.vision.common.InputImage import com.google.mlkit.vision.text.TextRecognition import com.google.mlkit.vision.text.latin.TextRecognizerOptions import com.simprints.core.ExcludedFromGeneratedTestCoverageReports -import com.simprints.feature.externalcredential.model.toBoundingBox import com.simprints.feature.externalcredential.screens.scanocr.model.DetectedOcrBlock import com.simprints.feature.externalcredential.screens.scanocr.model.OcrDocumentType +import com.simprints.feature.externalcredential.screens.scanocr.reader.OcrModelBuilder +import com.simprints.feature.externalcredential.screens.scanocr.reader.OcrReader import com.simprints.infra.credential.store.CredentialImageRepository import com.simprints.infra.credential.store.model.CredentialScanImageType.FullDocument import com.simprints.infra.logging.LoggingConstants.CrashReportTag.MULTI_FACTOR_ID @@ -51,29 +52,22 @@ internal class GetCredentialCoordinatesUseCase @Inject constructor( val image = InputImage.fromBitmap(bitmap, 0) return try { val result = Tasks.await(recognizer.process(image)) ?: return null - return result.textBlocks.firstNotNullOfOrNull { textBlock -> - textBlock.lines.firstNotNullOfOrNull { textLine -> - // Getting text from the entire line readout, and normalizing to avoid any extra spaces - val lineReadout = textLine.text.trim().replace(" ", "") - val isValid = when (documentType) { - OcrDocumentType.NhisCard -> ghanaNhisCardOcrSelectorUseCase(lineReadout) - OcrDocumentType.GhanaIdCard -> ghanaIdCardOcrSelectorUseCase(lineReadout) - } - if (isValid) { - val blockBoundingRect = textBlock.boundingBox ?: return@firstNotNullOfOrNull null - val lineBoundingRect = textLine.boundingBox ?: return@firstNotNullOfOrNull null - val savedImagePath = credentialImageRepository.saveCredentialScan(bitmap, imageType = FullDocument) - return@firstNotNullOfOrNull DetectedOcrBlock( - imagePath = savedImagePath, - documentType = documentType, - blockBoundingBox = blockBoundingRect.toBoundingBox(), - lineBoundingBox = lineBoundingRect.toBoundingBox(), - readoutValue = lineReadout, - ) - } else { - return@firstNotNullOfOrNull null - } - } + val ocrReader = OcrReader(OcrModelBuilder.build(result)) + val credentialOcrLine = when (documentType) { + OcrDocumentType.NhisCard -> ghanaNhisCardOcrSelectorUseCase(ocrReader) + OcrDocumentType.GhanaIdCard -> ghanaIdCardOcrSelectorUseCase(ocrReader) + } + if (credentialOcrLine != null) { + val savedImagePath = credentialImageRepository.saveCredentialScan(bitmap, imageType = FullDocument) + return DetectedOcrBlock( + imagePath = savedImagePath, + documentType = documentType, + blockBoundingBox = credentialOcrLine.blockBoundingBox, + lineBoundingBox = credentialOcrLine.boundingBox, + readoutValue = credentialOcrLine.text, + ) + } else { + return null } } catch (e: Exception) { Simber.e("OCR failed for $documentType", e, tag = MULTI_FACTOR_ID) diff --git a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaIdCardOcrSelectorUseCase.kt b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaIdCardOcrSelectorUseCase.kt index b5672f17fa..9d5b306382 100644 --- a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaIdCardOcrSelectorUseCase.kt +++ b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaIdCardOcrSelectorUseCase.kt @@ -1,9 +1,11 @@ package com.simprints.feature.externalcredential.screens.scanocr.usecase +import com.simprints.feature.externalcredential.screens.scanocr.reader.OcrLine +import com.simprints.feature.externalcredential.screens.scanocr.reader.OcrReader import javax.inject.Inject internal class GhanaIdCardOcrSelectorUseCase @Inject constructor() { - operator fun invoke(readoutValue: String): Boolean = GHANA_ID_PATTERN.matches(readoutValue) + operator fun invoke(ocrReader: OcrReader): OcrLine? = ocrReader.find { matchesPattern(GHANA_ID_PATTERN) } companion object { // Ghana ID card number pattern is "GHA-12345789-0" diff --git a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaNhisCardOcrSelectorUseCase.kt b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaNhisCardOcrSelectorUseCase.kt index e7f4133529..95c6c3ca47 100644 --- a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaNhisCardOcrSelectorUseCase.kt +++ b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaNhisCardOcrSelectorUseCase.kt @@ -1,9 +1,11 @@ package com.simprints.feature.externalcredential.screens.scanocr.usecase +import com.simprints.feature.externalcredential.screens.scanocr.reader.OcrLine +import com.simprints.feature.externalcredential.screens.scanocr.reader.OcrReader import javax.inject.Inject internal class GhanaNhisCardOcrSelectorUseCase @Inject constructor() { - operator fun invoke(readoutValue: String): Boolean = NHIS_PATTERN.matches(readoutValue) + operator fun invoke(ocrReader: OcrReader): OcrLine? = ocrReader.find { matchesPattern(NHIS_PATTERN) } companion object { // NHIS Card membership is 8 digits long From bfdaf0de355d89fc43c25f83bc0c30d7de3c77c7 Mon Sep 17 00:00:00 2001 From: alex Date: Mon, 27 Apr 2026 11:42:45 +0300 Subject: [PATCH 02/10] [MS-1421] Removing redundant method --- .../externalcredential/screens/scanocr/reader/OcrQuery.kt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt index aef9df91b3..1e84fb4e07 100644 --- a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt +++ b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt @@ -14,10 +14,6 @@ class OcrQueryScope internal constructor( filters += { line -> regex.containsMatchIn(line.text) } } - fun matchesNormalizedPattern(regex: Regex): OcrQueryScope = apply { - filters += { line -> regex.containsMatchIn(line.text) } - } - fun containsText(text: String): OcrQueryScope = apply { filters += { line -> line.text.contains(text, ignoreCase = true) } } From c5e0363784be7b0e21b88c3be416b7008e4fcfa9 Mon Sep 17 00:00:00 2001 From: alex Date: Wed, 29 Apr 2026 11:23:41 +0300 Subject: [PATCH 03/10] =?UTF-8?q?[MS-1421]=20Adding=20test=20coverage=20to?= =?UTF-8?q?=20the=20OCR=20Reader=CB=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../screens/scanocr/reader/OcrModel.kt | 10 +- .../screens/scanocr/reader/OcrQuery.kt | 4 +- .../screens/scanocr/reader/OcrReader.kt | 4 +- .../screens/scanocr/reader/OcrReaderTest.kt | 214 ++++++++++++++++++ .../GhanaIdCardOcrSelectorUseCaseTest.kt | 78 +++++-- .../GhanaNhisCardOcrSelectorUseCaseTest.kt | 68 ++++-- 6 files changed, 327 insertions(+), 51 deletions(-) create mode 100644 feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReaderTest.kt diff --git a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrModel.kt b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrModel.kt index f528d72b1a..9ddf0af011 100644 --- a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrModel.kt +++ b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrModel.kt @@ -1,5 +1,6 @@ package com.simprints.feature.externalcredential.screens.scanocr.reader +import com.simprints.core.ExcludedFromGeneratedTestCoverageReports import com.simprints.feature.externalcredential.model.BoundingBox /** @@ -8,7 +9,8 @@ import com.simprints.feature.externalcredential.model.BoundingBox * @param blocks blocks of text, each containing multiple [OcrLine] * @param allLines all lines from blocks sorted by top-right bounding box coordinates ascending */ -data class OcrText( +@ExcludedFromGeneratedTestCoverageReports("Data class") +internal data class OcrText( val blocks: List, val allLines: List, ) @@ -20,7 +22,8 @@ data class OcrText( * @param boundingBox coordinates of the block * @param lines nested lines of the block */ -data class OcrBlock( +@ExcludedFromGeneratedTestCoverageReports("Data class") +internal data class OcrBlock( val boundingBox: BoundingBox, val lines: List, ) @@ -34,7 +37,8 @@ data class OcrBlock( * @param blockBoundingBox parent coordinates * @param confidence overall confidence of the text value based on the average confidence for each character (aka element) in [text] */ -data class OcrLine( +@ExcludedFromGeneratedTestCoverageReports("Data class") +internal data class OcrLine( val id: Int, val text: String, val boundingBox: BoundingBox, diff --git a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt index 1e84fb4e07..24bf609619 100644 --- a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt +++ b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt @@ -3,7 +3,7 @@ package com.simprints.feature.externalcredential.screens.scanocr.reader /** * Defines the search criteria for locating a single line of text within document scanned with OCR. */ -class OcrQueryScope internal constructor( +internal class OcrQueryScope internal constructor( internal val allLines: List, ) { private val filters = mutableListOf<(OcrLine) -> Boolean>() @@ -52,10 +52,12 @@ class OcrQueryScope internal constructor( return allLines.firstOrNull { line -> val isBelowAnchor = belowAnchor == null || ( line.boundingBox.top > belowAnchor.boundingBox.top && + line.boundingBox.left >= belowAnchor.boundingBox.left && line.boundingBox.left < belowAnchor.boundingBox.right ) val isAboveAnchor = aboveAnchor == null || ( line.boundingBox.top < aboveAnchor.boundingBox.top && + line.boundingBox.left >= aboveAnchor.boundingBox.left && line.boundingBox.left < aboveAnchor.boundingBox.right ) filters.all { it(line) } && isBelowAnchor && isAboveAnchor diff --git a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReader.kt b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReader.kt index 71a7c92560..b52dba060b 100644 --- a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReader.kt +++ b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReader.kt @@ -8,14 +8,14 @@ package com.simprints.feature.externalcredential.screens.scanocr.reader * val model = OcrModelBuilder.build(mlKitText) * val reader = OcrReader(model) * - * val membershipNumber = reader.query { + * val membershipNumber = reader.find { * matchesPattern(Regex("\\d{8}")) * isBelow { containsText("membership number") } * isAbove { containsText("expiry date") } * } * ``` */ -class OcrReader( +internal class OcrReader( val model: OcrText, ) { /** diff --git a/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReaderTest.kt b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReaderTest.kt new file mode 100644 index 0000000000..09d76d37e3 --- /dev/null +++ b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReaderTest.kt @@ -0,0 +1,214 @@ +package com.simprints.feature.externalcredential.screens.scanocr.reader + +import com.google.common.truth.Truth.assertThat +import com.simprints.feature.externalcredential.model.BoundingBox +import io.mockk.MockKAnnotations +import org.junit.Before +import org.junit.Test + +internal class OcrReaderTest { + private lateinit var reader: OcrReader + + private lateinit var labelMembership: OcrLine + private lateinit var membershipValue: OcrLine + private lateinit var labelIssueDate: OcrLine + private lateinit var issueDateValue: OcrLine + private lateinit var labelExpiryDate: OcrLine + private lateinit var expiryDateValue: OcrLine + + @Before + fun setUp() { + MockKAnnotations.init(this, relaxed = true) + + labelMembership = createLine(id = 0, text = "membership number", top = 100) + membershipValue = createLine(id = 1, text = "12345678", top = 140) + labelIssueDate = createLine(id = 2, text = "issue date", top = 200) + issueDateValue = createLine(id = 3, text = "03/03", top = 240) + labelExpiryDate = createLine(id = 4, text = "expiry date", top = 200, right = 200) + expiryDateValue = createLine(id = 5, text = "11/11", top = 240, right = 200) + + reader = OcrReader( + OcrText( + blocks = emptyList(), + allLines = listOf(labelMembership, membershipValue, labelExpiryDate, expiryDateValue, labelIssueDate, issueDateValue), + ), + ) + } + + @Test + fun `find returns null when no lines match`() { + val result = reader.find { containsText("$labelMembership extra") } + assertThat(result).isNull() + } + + @Test + fun `find returns first line in iteration order when multiple lines match`() { + val result = reader.find { matchesPattern(Regex("\\d+")) } + assertThat(result).isEqualTo(membershipValue) + } + + @Test + fun `containsText returns first matching line`() { + val result = reader.find { containsText("member") } + assertThat(result).isEqualTo(labelMembership) + } + + @Test + fun `containsText is case-insensitive`() { + val result = reader.find { containsText("MEMBER") } + assertThat(result).isEqualTo(labelMembership) + } + + @Test + fun `hasExactText matches full string only`() { + val result = reader.find { hasExactText(labelExpiryDate.text) } + assertThat(result).isEqualTo(labelExpiryDate) + } + + @Test + fun `hasExactText returns null for partial match`() { + val result = reader.find { hasExactText("expiry") } + assertThat(result).isNull() + } + + @Test + fun `hasExactText is case-insensitive`() { + val result = reader.find { hasExactText(labelExpiryDate.text.uppercase()) } + assertThat(result).isEqualTo(labelExpiryDate) + } + + @Test + fun `matchesPattern finds 8-digit number`() { + val result = reader.find { matchesPattern(Regex("^\\d{8}$")) } + assertThat(result).isEqualTo(membershipValue) + } + + @Test + fun `matchesPattern finds date format`() { + val result = reader.find { matchesPattern(Regex("^\\d{2}/\\d{2}$")) } + assertThat(result).isEqualTo(expiryDateValue) + } + + @Test + fun `hasId finds line by id`() { + val result = reader.find { hasId(labelExpiryDate.id) } + assertThat(result).isEqualTo(labelExpiryDate) + } + + @Test + fun `hasId returns null for unknown id`() { + val result = reader.find { hasId(labelExpiryDate.id + 99) } + assertThat(result).isNull() + } + + @Test + fun `isBelow direct OcrLine returns first line below anchor`() { + val result = reader.find { isBelow(labelMembership) } + assertThat(result).isEqualTo(membershipValue) + } + + @Test + fun `isBelow block resolves anchor via text containment`() { + val result = reader.find { + matchesPattern(Regex("^\\d{8}$")) + isBelow { containsText("membership") } + } + assertThat(result).isEqualTo(membershipValue) + } + + @Test + fun `isBelow block resolves anchor via pattern`() { + val result = reader.find { + matchesPattern(Regex("^\\d{8}$")) + isBelow { matchesPattern(Regex("membership")) } + } + assertThat(result).isEqualTo(membershipValue) + } + + @Test + fun `isBelow block resolves anchor via line id`() { + val result = reader.find { + matchesPattern(Regex("^\\d{8}$")) + isBelow { hasId(labelMembership.id) } + } + assertThat(result).isEqualTo(membershipValue) + } + + @Test + fun `isBelow returns null when anchor cannot be resolved`() { + val result = reader.find { isBelow { containsText("$labelMembership extra") } } + assertThat(result).isNull() + } + + @Test + fun `isAbove direct OcrLine returns first line above anchor`() { + val result = reader.find { isAbove(labelExpiryDate) } + assertThat(result).isEqualTo(labelMembership) + } + + @Test + fun `isAbove block resolves anchor via text containment`() { + val result = reader.find { + matchesPattern(Regex("^\\d{8}$")) + isAbove { containsText("expiry date") } + } + assertThat(result).isEqualTo(membershipValue) + } + + @Test + fun `isAbove block resolves anchor via pattern`() { + val result = reader.find { + matchesPattern(Regex("^\\d{8}$")) + isAbove { matchesPattern(Regex("expiry")) } + } + assertThat(result).isEqualTo(membershipValue) + } + + @Test + fun `isAbove block resolves anchor via line id`() { + val result = reader.find { + matchesPattern(Regex("^\\d{8}$")) + isAbove { hasId(labelIssueDate.id) } + } + assertThat(result).isEqualTo(membershipValue) + } + + @Test + fun `isAbove returns null when anchor cannot be resolved`() { + val result = reader.find { isAbove { containsText("nonexistent") } } + assertThat(result).isNull() + } + + @Test + fun `isBelow and isAbove combined finds value sandwiched between labels`() { + val result = reader.find { + matchesPattern(Regex("^\\d{8}$")) + isBelow { containsText(labelMembership.text) } + isAbove { containsText(labelIssueDate.text) } + } + assertThat(result).isEqualTo(membershipValue) + } + + @Test + fun `isBelow and isAbove returns null when no line fits between anchors`() { + val result = reader.find { + isBelow(labelExpiryDate) + isAbove(membershipValue) + } + assertThat(result).isNull() + } + + private fun createLine( + id: Int, + text: String, + top: Int, + left: Int = 0, + right: Int = 100, + ) = OcrLine( + id = id, + text = text, + boundingBox = BoundingBox(left = left, top = top, right = right, bottom = top + 30), + blockBoundingBox = BoundingBox(left = left, top = top, right = right, bottom = top + 30), + confidence = 1f, + ) +} diff --git a/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaIdCardOcrSelectorUseCaseTest.kt b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaIdCardOcrSelectorUseCaseTest.kt index 121662b35b..eddbcb4acc 100644 --- a/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaIdCardOcrSelectorUseCaseTest.kt +++ b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaIdCardOcrSelectorUseCaseTest.kt @@ -1,12 +1,34 @@ package com.simprints.feature.externalcredential.screens.scanocr.usecase import com.google.common.truth.Truth.assertThat +import com.simprints.feature.externalcredential.model.BoundingBox +import com.simprints.feature.externalcredential.screens.scanocr.reader.OcrLine +import com.simprints.feature.externalcredential.screens.scanocr.reader.OcrReader +import com.simprints.feature.externalcredential.screens.scanocr.reader.OcrText import io.mockk.MockKAnnotations import org.junit.Before import org.junit.Test internal class GhanaIdCardOcrSelectorUseCaseTest { private lateinit var useCase: GhanaIdCardOcrSelectorUseCase + private val label = "Ghana Card Number" + private val validIds = listOf( + "GHA-123456789-0", + "GHA-987654321-5", + "GHA-000000000-9", + ) + private val invalidIds = listOf( + "GHB-123456789-0", + "GHA123456789-0", + "GHA-1234567890", + "GHA-12345678-0", + "GHA-1234567890-0", + "GHA-12345678A-0", + "GHA-123456789-A", + "GHA-123456789-01", + "", + "GHA-123456789-0 ", + ) @Before fun setUp() { @@ -15,35 +37,41 @@ internal class GhanaIdCardOcrSelectorUseCaseTest { } @Test - fun `Returns true for valid Ghana ID formats`() { - val validIds = listOf( - "GHA-123456789-0", - "GHA-987654321-5", - "GHA-000000000-9", - ) - - validIds.forEach { id -> - assertThat(useCase(id)).isTrue() + fun `returns matching line for valid Ghana ID formats`() { + validIds.forEachIndexed { id, ghanaId -> + val nonMatching = line(id = id, text = label, top = 100) + val expected = line(id = id, text = ghanaId, top = 140) + val reader = buildReader(nonMatching, expected) + + assertThat(useCase(reader)).isEqualTo(expected) } } @Test - fun `Returns false for invalid Ghana ID formats`() { - val invalidIds = listOf( - "GHB-123456789-0", - "GHA123456789-0", - "GHA-1234567890", - "GHA-12345678-0", - "GHA-1234567890-0", - "GHA-12345678A-0", - "GHA-123456789-A", - "GHA-123456789-01", - "", - "GHA-123456789-0 ", - ) - - invalidIds.forEach { id -> - assertThat(useCase(id)).isFalse() + fun `returns null for invalid Ghana ID formats`() { + invalidIds.forEachIndexed { id, ghanaId -> + val reader = buildReader( + line(id = id, text = label, top = 100), + line(id = id, text = ghanaId, top = 140), + ) + + assertThat(useCase(reader)).isNull() } } + + private fun buildReader(vararg lines: OcrLine) = OcrReader( + OcrText(blocks = emptyList(), allLines = lines.toList()), + ) + + private fun line( + id: Int, + text: String, + top: Int, + ) = OcrLine( + id = id, + text = text, + boundingBox = BoundingBox(left = 0, top = top, right = 200, bottom = top + 30), + blockBoundingBox = BoundingBox(left = 0, top = top, right = 200, bottom = top + 30), + confidence = 1f, + ) } diff --git a/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaNhisCardOcrSelectorUseCaseTest.kt b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaNhisCardOcrSelectorUseCaseTest.kt index 52816d93ec..618222615b 100644 --- a/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaNhisCardOcrSelectorUseCaseTest.kt +++ b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaNhisCardOcrSelectorUseCaseTest.kt @@ -1,12 +1,29 @@ package com.simprints.feature.externalcredential.screens.scanocr.usecase import com.google.common.truth.Truth.assertThat +import com.simprints.feature.externalcredential.model.BoundingBox +import com.simprints.feature.externalcredential.screens.scanocr.reader.OcrLine +import com.simprints.feature.externalcredential.screens.scanocr.reader.OcrReader +import com.simprints.feature.externalcredential.screens.scanocr.reader.OcrText import io.mockk.MockKAnnotations import org.junit.Before import org.junit.Test internal class GhanaNhisCardOcrSelectorUseCaseTest { private lateinit var useCase: GhanaNhisCardOcrSelectorUseCase + private val label = "membership number" + private val validNumbers = listOf( + "12345678", + "98765432", + "00000000", + ) + private val invalidNumbers = listOf( + "1234567", + "123456789", + "1234567A", + "12345-78", + "", + ) @Before fun setUp() { @@ -15,30 +32,41 @@ internal class GhanaNhisCardOcrSelectorUseCaseTest { } @Test - fun `Returns true for valid NHIS membership numbers`() { - val validNumbers = listOf( - "12345678", - "98765432", - "00000000", - ) - - validNumbers.forEach { number -> - assertThat(useCase(number)).isTrue() + fun `returns matching line for valid NHIS membership numbers`() { + validNumbers.forEachIndexed { id, number -> + val label = line(id = id, text = label, top = 100) + val expected = line(id = id, text = number, top = 140) + val reader = buildReader(label, expected) + + assertThat(useCase(reader)).isEqualTo(expected) } } @Test - fun `Returns false for invalid NHIS membership numbers`() { - val invalidNumbers = listOf( - "1234567", - "123456789", - "1234567A", - "12345-78", - "", - ) - - invalidNumbers.forEach { number -> - assertThat(useCase(number)).isFalse() + fun `returns null for invalid NHIS membership numbers`() { + invalidNumbers.forEachIndexed { id, number -> + val reader = buildReader( + line(id = id, text = "membership number", top = 100), + line(id = id, text = number, top = 140), + ) + + assertThat(useCase(reader)).isNull() } } + + private fun buildReader(vararg lines: OcrLine) = OcrReader( + OcrText(blocks = emptyList(), allLines = lines.toList()), + ) + + private fun line( + id: Int, + text: String, + top: Int, + ) = OcrLine( + id = id, + text = text, + boundingBox = BoundingBox(left = 0, top = top, right = 200, bottom = top + 30), + blockBoundingBox = BoundingBox(left = 0, top = top, right = 200, bottom = top + 30), + confidence = 1f, + ) } From ac66a5287bf5bea94ff27678fa10a551fc2248dc Mon Sep 17 00:00:00 2001 From: alex Date: Wed, 29 Apr 2026 11:44:38 +0300 Subject: [PATCH 04/10] [MS-1421] Separating OcrQuery from OcrReader --- .../screens/scanocr/reader/OcrQuery.kt | 55 ++---- .../screens/scanocr/reader/OcrReader.kt | 26 ++- .../scanocr/reader/OcrQueryScopeTest.kt | 165 ++++++++++++++++++ 3 files changed, 206 insertions(+), 40 deletions(-) create mode 100644 feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQueryScopeTest.kt diff --git a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt index 24bf609619..24b4db3f21 100644 --- a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt +++ b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt @@ -1,66 +1,45 @@ package com.simprints.feature.externalcredential.screens.scanocr.reader /** - * Defines the search criteria for locating a single line of text within document scanned with OCR. + * Defines the search criteria for locating a single line of text within a scanned document. + * Used as the receiver of the [OcrReader.find] block. */ -internal class OcrQueryScope internal constructor( - internal val allLines: List, +internal class OcrQuery( + private val subQuery: (OcrQuery) -> OcrLine?, ) { - private val filters = mutableListOf<(OcrLine) -> Boolean>() - private var belowResolver: (() -> OcrLine?)? = null - private var aboveResolver: (() -> OcrLine?)? = null + internal val filters = mutableListOf<(OcrLine) -> Boolean>() + internal var belowResolver: (() -> OcrLine?)? = null + internal var aboveResolver: (() -> OcrLine?)? = null - fun matchesPattern(regex: Regex): OcrQueryScope = apply { + fun matchesPattern(regex: Regex): OcrQuery = apply { filters += { line -> regex.containsMatchIn(line.text) } } - fun containsText(text: String): OcrQueryScope = apply { + fun containsText(text: String): OcrQuery = apply { filters += { line -> line.text.contains(text, ignoreCase = true) } } - fun hasExactText(text: String): OcrQueryScope = apply { + fun hasExactText(text: String): OcrQuery = apply { filters += { line -> line.text.equals(text, ignoreCase = true) } } - fun hasId(id: Int): OcrQueryScope = apply { + fun hasId(id: Int): OcrQuery = apply { filters += { line -> line.id == id } } - fun isBelow(resolveAnchor: OcrQueryScope.() -> Unit): OcrQueryScope = apply { - belowResolver = { OcrQueryScope(allLines).apply(resolveAnchor).find() } + fun isBelow(resolveAnchor: OcrQuery.() -> Unit): OcrQuery = apply { + belowResolver = { subQuery(OcrQuery(subQuery).apply(resolveAnchor)) } } - fun isBelow(anchor: OcrLine): OcrQueryScope = apply { + fun isBelow(anchor: OcrLine): OcrQuery = apply { belowResolver = { anchor } } - fun isAbove(resolveAnchor: OcrQueryScope.() -> Unit): OcrQueryScope = apply { - aboveResolver = { OcrQueryScope(allLines).apply(resolveAnchor).find() } + fun isAbove(resolveAnchor: OcrQuery.() -> Unit): OcrQuery = apply { + aboveResolver = { subQuery(OcrQuery(subQuery).apply(resolveAnchor)) } } - fun isAbove(anchor: OcrLine): OcrQueryScope = apply { + fun isAbove(anchor: OcrLine): OcrQuery = apply { aboveResolver = { anchor } } - - internal fun find(): OcrLine? { - val belowAnchor = belowResolver?.invoke() - val aboveAnchor = aboveResolver?.invoke() - - if (belowResolver != null && belowAnchor == null) return null - if (aboveResolver != null && aboveAnchor == null) return null - - return allLines.firstOrNull { line -> - val isBelowAnchor = belowAnchor == null || ( - line.boundingBox.top > belowAnchor.boundingBox.top && - line.boundingBox.left >= belowAnchor.boundingBox.left && - line.boundingBox.left < belowAnchor.boundingBox.right - ) - val isAboveAnchor = aboveAnchor == null || ( - line.boundingBox.top < aboveAnchor.boundingBox.top && - line.boundingBox.left >= aboveAnchor.boundingBox.left && - line.boundingBox.left < aboveAnchor.boundingBox.right - ) - filters.all { it(line) } && isBelowAnchor && isAboveAnchor - } - } } diff --git a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReader.kt b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReader.kt index b52dba060b..9de2af36f5 100644 --- a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReader.kt +++ b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReader.kt @@ -20,8 +20,30 @@ internal class OcrReader( ) { /** * Executes the query defined in [block] and returns the first matching [OcrLine], or null. - * The [block] receives an [OcrQueryScope] as its receiver — call filter methods directly + * The [block] receives an [OcrQuery] as its receiver — call filter methods directly * without any chaining or terminal call. */ - fun find(block: OcrQueryScope.() -> Unit): OcrLine? = OcrQueryScope(model.allLines).apply(block).find() + fun find(block: OcrQuery.() -> Unit): OcrLine? = runQuery(OcrQuery(::runQuery).apply(block)) + + private fun runQuery(scope: OcrQuery): OcrLine? { + val belowAnchor = scope.belowResolver?.invoke() + val aboveAnchor = scope.aboveResolver?.invoke() + + if (scope.belowResolver != null && belowAnchor == null) return null + if (scope.aboveResolver != null && aboveAnchor == null) return null + + return model.allLines.firstOrNull { line -> + val isBelowAnchor = belowAnchor == null || ( + line.boundingBox.top > belowAnchor.boundingBox.top && + line.boundingBox.left >= belowAnchor.boundingBox.left && + line.boundingBox.left < belowAnchor.boundingBox.right + ) + val isAboveAnchor = aboveAnchor == null || ( + line.boundingBox.top < aboveAnchor.boundingBox.top && + line.boundingBox.left >= aboveAnchor.boundingBox.left && + line.boundingBox.left < aboveAnchor.boundingBox.right + ) + scope.filters.all { it(line) } && isBelowAnchor && isAboveAnchor + } + } } diff --git a/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQueryScopeTest.kt b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQueryScopeTest.kt new file mode 100644 index 0000000000..2a3c240268 --- /dev/null +++ b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQueryScopeTest.kt @@ -0,0 +1,165 @@ +package com.simprints.feature.externalcredential.screens.scanocr.reader + +import com.google.common.truth.Truth.assertThat +import com.simprints.feature.externalcredential.model.BoundingBox +import io.mockk.MockKAnnotations +import io.mockk.mockk +import org.junit.Before +import org.junit.Test + +internal class OcrQueryTest { + private val noOpSubQuery: (OcrQuery) -> OcrLine? = { null } + + private lateinit var query: OcrQuery + + @Before + fun setUp() { + MockKAnnotations.init(this, relaxed = true) + query = OcrQuery(noOpSubQuery) + } + + // ── Filter registration ─────────────────────────────────────────────────── + + @Test + fun `matchesPattern registers a filter`() { + query.matchesPattern(Regex("\\d+")) + assertThat(query.filters).hasSize(1) + } + + @Test + fun `containsText registers a filter`() { + query.containsText("membership") + assertThat(query.filters).hasSize(1) + } + + @Test + fun `hasExactText registers a filter`() { + query.hasExactText("membership number") + assertThat(query.filters).hasSize(1) + } + + @Test + fun `hasId registers a filter`() { + query.hasId(1) + assertThat(query.filters).hasSize(1) + } + + @Test + fun `multiple filters are all registered`() { + query.matchesPattern(Regex("\\d+")) + query.containsText("membership") + query.hasId(1) + assertThat(query.filters).hasSize(3) + } + + // ── Filter correctness ──────────────────────────────────────────────────── + + @Test + fun `matchesPattern filter passes matching line`() { + query.matchesPattern(Regex("^\\d{8}$")) + assertThat(query.filters.all { it(line(text = "12345678")) }).isTrue() + } + + @Test + fun `matchesPattern filter rejects non-matching line`() { + query.matchesPattern(Regex("^\\d{8}$")) + assertThat(query.filters.all { it(line(text = "random string")) }).isFalse() + } + + @Test + fun `containsText filter passes line containing text`() { + query.containsText("member") + assertThat(query.filters.all { it(line(text = "membership number")) }).isTrue() + } + + @Test + fun `containsText filter is case-insensitive`() { + query.containsText("MEMBER") + assertThat(query.filters.all { it(line(text = "membership number")) }).isTrue() + } + + @Test + fun `containsText filter rejects line not containing text`() { + query.containsText("expiry") + assertThat(query.filters.all { it(line(text = "membership number")) }).isFalse() + } + + @Test + fun `hasExactText filter passes line with exact text`() { + query.hasExactText("expiry date") + assertThat(query.filters.all { it(line(text = "expiry date")) }).isTrue() + } + + @Test + fun `hasExactText filter is case-insensitive`() { + query.hasExactText("EXPIRY DATE") + assertThat(query.filters.all { it(line(text = "expiry date")) }).isTrue() + } + + @Test + fun `hasExactText filter rejects partial match`() { + query.hasExactText("expiry date") + assertThat(query.filters.all { it(line(text = "expiry")) }).isFalse() + } + + @Test + fun `hasId filter passes line with matching id`() { + query.hasId(2) + assertThat(query.filters.all { it(line(id = 2)) }).isTrue() + } + + @Test + fun `hasId filter rejects line with different id`() { + query.hasId(2) + assertThat(query.filters.all { it(line(id = 99)) }).isFalse() + } + + @Test + fun `isBelow with block registers belowResolver`() { + query.isBelow { containsText("membership") } + assertThat(query.belowResolver).isNotNull() + } + + @Test + fun `isBelow with OcrLine registers belowResolver`() { + query.isBelow(mockk(relaxed = true)) + assertThat(query.belowResolver).isNotNull() + } + + @Test + fun `isAbove with block registers aboveResolver`() { + query.isAbove { containsText("expiry") } + assertThat(query.aboveResolver).isNotNull() + } + + @Test + fun `isAbove with OcrLine registers aboveResolver`() { + query.isAbove(mockk(relaxed = true)) + assertThat(query.aboveResolver).isNotNull() + } + + @Test + fun `isBelow with direct OcrLine resolver returns that line`() { + val anchor = line(id = 0, text = "anchor") + query.isBelow(anchor) + assertThat(query.belowResolver?.invoke()).isEqualTo(anchor) + } + + @Test + fun `isAbove with direct OcrLine resolver returns that line`() { + val anchor = line(id = 0, text = "anchor") + query.isAbove(anchor) + assertThat(query.aboveResolver?.invoke()).isEqualTo(anchor) + } + + private fun line( + id: Int = 0, + text: String = "", + ) = OcrLine( + id = id, + text = text, + boundingBox = BoundingBox(left = 0, top = 0, right = 100, bottom = 30), + blockBoundingBox = BoundingBox(left = 0, top = 0, right = 100, bottom = 30), + confidence = 1f, + ) +} From d4483712a0042156095b958e0f642f52ff444fef Mon Sep 17 00:00:00 2001 From: alex Date: Wed, 29 Apr 2026 12:13:04 +0300 Subject: [PATCH 05/10] [MS-1421] Removing Ocrbuilder in favor of ReadTextFromImageUseCase. This allows to isolate ML kit dependencies in tests --- .../screens/scanocr/reader/OcrBuilder.kt | 41 ----- .../GetCredentialCoordinatesUseCase.kt | 38 +--- .../usecase/ReadTextFromImageUseCase.kt | 56 ++++++ .../{OcrQueryScopeTest.kt => OcrQueryTest.kt} | 0 .../GetCredentialCoordinatesUseCaseTest.kt | 172 ++++++++++++++++++ 5 files changed, 233 insertions(+), 74 deletions(-) delete mode 100644 feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrBuilder.kt create mode 100644 feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/ReadTextFromImageUseCase.kt rename feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/{OcrQueryScopeTest.kt => OcrQueryTest.kt} (100%) create mode 100644 feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GetCredentialCoordinatesUseCaseTest.kt diff --git a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrBuilder.kt b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrBuilder.kt deleted file mode 100644 index 52c60a5457..0000000000 --- a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrBuilder.kt +++ /dev/null @@ -1,41 +0,0 @@ -package com.simprints.feature.externalcredential.screens.scanocr.reader - -import com.google.mlkit.vision.text.Text -import com.simprints.feature.externalcredential.model.toBoundingBox - -/** - * Converts a ML Kit [Text] result into custom [OcrText]. - * - * Each lines is assigned an ID in the order they are encountered in blocks - * The resulting [OcrText.allLines] are sorted by bounding box top (ascending). - */ -internal object OcrModelBuilder { - fun build(mlKitText: Text): OcrText { - var nextLineId = 0 - - val blocks = mlKitText.textBlocks.map { block -> - val lines = block.lines.map { line -> - OcrLine( - id = nextLineId++, - text = line.text.trim().replace(" ", ""), - boundingBox = line.boundingBox.toBoundingBox(), - blockBoundingBox = block.boundingBox.toBoundingBox(), - confidence = line.confidence, - ) - } - OcrBlock( - boundingBox = block.boundingBox.toBoundingBox(), - lines = lines, - ) - } - - val allLinesSorted = blocks - .flatMap { it.lines } - .sortedBy { it.boundingBox.top } - - return OcrText( - blocks = blocks, - allLines = allLinesSorted, - ) - } -} diff --git a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GetCredentialCoordinatesUseCase.kt b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GetCredentialCoordinatesUseCase.kt index 8692dfd488..5fa0df2651 100644 --- a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GetCredentialCoordinatesUseCase.kt +++ b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GetCredentialCoordinatesUseCase.kt @@ -1,14 +1,8 @@ package com.simprints.feature.externalcredential.screens.scanocr.usecase import android.graphics.Bitmap -import com.google.android.gms.tasks.Tasks -import com.google.mlkit.vision.common.InputImage -import com.google.mlkit.vision.text.TextRecognition -import com.google.mlkit.vision.text.latin.TextRecognizerOptions -import com.simprints.core.ExcludedFromGeneratedTestCoverageReports import com.simprints.feature.externalcredential.screens.scanocr.model.DetectedOcrBlock import com.simprints.feature.externalcredential.screens.scanocr.model.OcrDocumentType -import com.simprints.feature.externalcredential.screens.scanocr.reader.OcrModelBuilder import com.simprints.feature.externalcredential.screens.scanocr.reader.OcrReader import com.simprints.infra.credential.store.CredentialImageRepository import com.simprints.infra.credential.store.model.CredentialScanImageType.FullDocument @@ -18,48 +12,26 @@ import javax.inject.Inject import javax.inject.Singleton @Singleton -@ExcludedFromGeneratedTestCoverageReports("Unable to mock Google ML Kit") internal class GetCredentialCoordinatesUseCase @Inject constructor( + private val readTextFromImage: ReadTextFromImageUseCase, private val ghanaNhisCardOcrSelectorUseCase: GhanaNhisCardOcrSelectorUseCase, private val ghanaIdCardOcrSelectorUseCase: GhanaIdCardOcrSelectorUseCase, private val credentialImageRepository: CredentialImageRepository, ) { - private val recognizer = TextRecognition.getClient(TextRecognizerOptions.DEFAULT_OPTIONS) - - /** - * OCR uses Google ML kit. It has a following hierarchy: - * - Block. A contiguous set of text lines, such as a paragraph or column, - * - Line. A contiguous set of words on the same axis. There can be multiple Lines in the Block - * - Element. A contiguous set of alphanumeric characters ("word") on the same axis. There can be Elements in one Line - * - Symbol. A single alphanumeric character in an Element. - * - * This method returns a [DetectedOcrBlock] class if the OCR managed to find a line that satisfies the given [documentType] pattern. - * If such Line is found, then it is returned in a [DetectedOcrBlock] alongside its parent block, and a normalized value. - * - * Lines are used instead of Elements because the OCR might mistakenly read an extra white space in a Line, resulting in multiple - * Elements. Since Lines are geometrically in one plane, we just take the concatenation of all underlying child Elements, and analyze - * them it as a single string. - * - * @param bitmap bitmap to run OCR on - * @param documentType type of the document - * - * @return [DetectedOcrBlock] if any Line satisfies the [documentType] pattern, or null if none. - */ suspend operator fun invoke( bitmap: Bitmap, documentType: OcrDocumentType, ): DetectedOcrBlock? { - val image = InputImage.fromBitmap(bitmap, 0) return try { - val result = Tasks.await(recognizer.process(image)) ?: return null - val ocrReader = OcrReader(OcrModelBuilder.build(result)) + val ocrText = readTextFromImage(bitmap) ?: return null + val ocrReader = OcrReader(ocrText) val credentialOcrLine = when (documentType) { OcrDocumentType.NhisCard -> ghanaNhisCardOcrSelectorUseCase(ocrReader) OcrDocumentType.GhanaIdCard -> ghanaIdCardOcrSelectorUseCase(ocrReader) } if (credentialOcrLine != null) { val savedImagePath = credentialImageRepository.saveCredentialScan(bitmap, imageType = FullDocument) - return DetectedOcrBlock( + DetectedOcrBlock( imagePath = savedImagePath, documentType = documentType, blockBoundingBox = credentialOcrLine.blockBoundingBox, @@ -67,7 +39,7 @@ internal class GetCredentialCoordinatesUseCase @Inject constructor( readoutValue = credentialOcrLine.text, ) } else { - return null + null } } catch (e: Exception) { Simber.e("OCR failed for $documentType", e, tag = MULTI_FACTOR_ID) diff --git a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/ReadTextFromImageUseCase.kt b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/ReadTextFromImageUseCase.kt new file mode 100644 index 0000000000..6fa3c335e7 --- /dev/null +++ b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/ReadTextFromImageUseCase.kt @@ -0,0 +1,56 @@ +package com.simprints.feature.externalcredential.screens.scanocr.usecase + +import android.graphics.Bitmap +import com.google.android.gms.tasks.Tasks +import com.google.mlkit.vision.common.InputImage +import com.google.mlkit.vision.text.Text +import com.google.mlkit.vision.text.TextRecognition +import com.google.mlkit.vision.text.latin.TextRecognizerOptions +import com.simprints.core.ExcludedFromGeneratedTestCoverageReports +import com.simprints.feature.externalcredential.model.toBoundingBox +import com.simprints.feature.externalcredential.screens.scanocr.reader.OcrBlock +import com.simprints.feature.externalcredential.screens.scanocr.reader.OcrLine +import com.simprints.feature.externalcredential.screens.scanocr.reader.OcrText +import javax.inject.Inject +import javax.inject.Singleton + +@Singleton +@ExcludedFromGeneratedTestCoverageReports("Unable to mock Google ML Kit") +internal class ReadTextFromImageUseCase @Inject constructor() { + private val recognizer = TextRecognition.getClient(TextRecognizerOptions.DEFAULT_OPTIONS) + + operator fun invoke(bitmap: Bitmap): OcrText? { + val image = InputImage.fromBitmap(bitmap, 0) + val result = Tasks.await(recognizer.process(image)) ?: return null + return build(result) + } + + private fun build(mlKitText: Text): OcrText { + var nextLineId = 0 + + val blocks = mlKitText.textBlocks.map { block -> + val lines = block.lines.map { line -> + OcrLine( + id = nextLineId++, + text = line.text.trim().replace(" ", ""), + boundingBox = line.boundingBox.toBoundingBox(), + blockBoundingBox = block.boundingBox.toBoundingBox(), + confidence = line.confidence, + ) + } + OcrBlock( + boundingBox = block.boundingBox.toBoundingBox(), + lines = lines, + ) + } + + val allLinesSorted = blocks + .flatMap { it.lines } + .sortedBy { it.boundingBox.top } + + return OcrText( + blocks = blocks, + allLines = allLinesSorted, + ) + } +} diff --git a/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQueryScopeTest.kt b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQueryTest.kt similarity index 100% rename from feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQueryScopeTest.kt rename to feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQueryTest.kt diff --git a/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GetCredentialCoordinatesUseCaseTest.kt b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GetCredentialCoordinatesUseCaseTest.kt new file mode 100644 index 0000000000..8803479d93 --- /dev/null +++ b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GetCredentialCoordinatesUseCaseTest.kt @@ -0,0 +1,172 @@ +package com.simprints.feature.externalcredential.screens.scanocr.usecase + +import android.graphics.Bitmap +import com.google.common.truth.Truth.assertThat +import com.simprints.feature.externalcredential.model.BoundingBox +import com.simprints.feature.externalcredential.screens.scanocr.model.OcrDocumentType +import com.simprints.feature.externalcredential.screens.scanocr.reader.OcrLine +import com.simprints.infra.credential.store.CredentialImageRepository +import io.mockk.MockKAnnotations +import io.mockk.coEvery +import io.mockk.coVerify +import io.mockk.every +import io.mockk.impl.annotations.MockK +import io.mockk.mockk +import kotlinx.coroutines.test.runTest +import org.junit.Before +import org.junit.Test + +internal class GetCredentialCoordinatesUseCaseTest { + @MockK + private lateinit var readTextFromImage: ReadTextFromImageUseCase + + @MockK + private lateinit var ghanaNhisCardOcrSelectorUseCase: GhanaNhisCardOcrSelectorUseCase + + @MockK + private lateinit var ghanaIdCardOcrSelectorUseCase: GhanaIdCardOcrSelectorUseCase + + @MockK + private lateinit var credentialImageRepository: CredentialImageRepository + + private lateinit var useCase: GetCredentialCoordinatesUseCase + + private val bitmap = mockk(relaxed = true) + private val savedImagePath = "path/to/image.jpg" + + @Before + fun setUp() { + MockKAnnotations.init(this, relaxed = true) + useCase = GetCredentialCoordinatesUseCase( + readTextFromImage = readTextFromImage, + ghanaNhisCardOcrSelectorUseCase = ghanaNhisCardOcrSelectorUseCase, + ghanaIdCardOcrSelectorUseCase = ghanaIdCardOcrSelectorUseCase, + credentialImageRepository = credentialImageRepository, + ) + coEvery { credentialImageRepository.saveCredentialScan(any(), any()) } returns savedImagePath + } + + @Test + fun `returns null when readTextFromImage returns null`() = runTest { + every { readTextFromImage(bitmap) } returns null + + val result = useCase(bitmap, OcrDocumentType.NhisCard) + + assertThat(result).isNull() + } + + @Test + fun `returns null when selector finds no matching line for NhisCard`() = runTest { + every { readTextFromImage(bitmap) } returns mockk(relaxed = true) + every { ghanaNhisCardOcrSelectorUseCase(any()) } returns null + + val result = useCase(bitmap, OcrDocumentType.NhisCard) + + assertThat(result).isNull() + } + + @Test + fun `returns null when selector finds no matching line for GhanaIdCard`() = runTest { + every { readTextFromImage(bitmap) } returns mockk(relaxed = true) + every { ghanaIdCardOcrSelectorUseCase(any()) } returns null + + val result = useCase(bitmap, OcrDocumentType.GhanaIdCard) + + assertThat(result).isNull() + } + + @Test + fun `returns DetectedOcrBlock for NhisCard when line is found`() = runTest { + val ocrLine = ocrLine(text = "12345678") + every { readTextFromImage(bitmap) } returns mockk(relaxed = true) + every { ghanaNhisCardOcrSelectorUseCase(any()) } returns ocrLine + + val result = useCase(bitmap, OcrDocumentType.NhisCard) + + assertThat(result).isNotNull() + assertThat(result?.readoutValue).isEqualTo(ocrLine.text) + assertThat(result?.lineBoundingBox).isEqualTo(ocrLine.boundingBox) + assertThat(result?.blockBoundingBox).isEqualTo(ocrLine.blockBoundingBox) + assertThat(result?.documentType).isEqualTo(OcrDocumentType.NhisCard) + } + + @Test + fun `returns DetectedOcrBlock for GhanaIdCard when line is found`() = runTest { + val ocrLine = ocrLine(text = "GHA-123456789-0") + every { readTextFromImage(bitmap) } returns mockk(relaxed = true) + every { ghanaIdCardOcrSelectorUseCase(any()) } returns ocrLine + + val result = useCase(bitmap, OcrDocumentType.GhanaIdCard) + + assertThat(result).isNotNull() + assertThat(result?.readoutValue).isEqualTo(ocrLine.text) + assertThat(result?.documentType).isEqualTo(OcrDocumentType.GhanaIdCard) + } + + @Test + fun `saves image when line is found`() = runTest { + every { readTextFromImage(bitmap) } returns mockk(relaxed = true) + every { ghanaNhisCardOcrSelectorUseCase(any()) } returns ocrLine() + + useCase(bitmap, OcrDocumentType.NhisCard) + + coVerify(exactly = 1) { credentialImageRepository.saveCredentialScan(bitmap, any()) } + } + + @Test + fun `does not save image when no line is found`() = runTest { + every { readTextFromImage(bitmap) } returns mockk(relaxed = true) + every { ghanaNhisCardOcrSelectorUseCase(any()) } returns null + + useCase(bitmap, OcrDocumentType.NhisCard) + + coVerify(exactly = 0) { credentialImageRepository.saveCredentialScan(any(), any()) } + } + + @Test + fun `saved image path is set in result`() = runTest { + every { readTextFromImage(bitmap) } returns mockk(relaxed = true) + every { ghanaNhisCardOcrSelectorUseCase(any()) } returns ocrLine() + + val result = useCase(bitmap, OcrDocumentType.NhisCard) + + assertThat(result?.imagePath).isEqualTo(savedImagePath) + } + + @Test + fun `returns null when exception is thrown`() = runTest { + every { readTextFromImage(bitmap) } throws RuntimeException("OCR failed") + + val result = useCase(bitmap, OcrDocumentType.NhisCard) + + assertThat(result).isNull() + } + + @Test + fun `delegates NhisCard to nhis selector`() = runTest { + every { readTextFromImage(bitmap) } returns mockk(relaxed = true) + + useCase(bitmap, OcrDocumentType.NhisCard) + + coVerify(exactly = 1) { ghanaNhisCardOcrSelectorUseCase(any()) } + coVerify(exactly = 0) { ghanaIdCardOcrSelectorUseCase(any()) } + } + + @Test + fun `delegates GhanaIdCard to ghana id selector`() = runTest { + every { readTextFromImage(bitmap) } returns mockk(relaxed = true) + + useCase(bitmap, OcrDocumentType.GhanaIdCard) + + coVerify(exactly = 1) { ghanaIdCardOcrSelectorUseCase(any()) } + coVerify(exactly = 0) { ghanaNhisCardOcrSelectorUseCase(any()) } + } + + private fun ocrLine(text: String = "12345678") = OcrLine( + id = 0, + text = text, + boundingBox = BoundingBox(left = 0, top = 100, right = 200, bottom = 130), + blockBoundingBox = BoundingBox(left = 0, top = 90, right = 200, bottom = 140), + confidence = 1f, + ) +} From ce6929ad16d6fa4367534f08bce0932ecab6a7ab Mon Sep 17 00:00:00 2001 From: alex Date: Thu, 30 Apr 2026 14:54:10 +0300 Subject: [PATCH 06/10] [MS-1421] KDoc updates and test fixes --- .../externalcredential/screens/scanocr/reader/OcrModel.kt | 4 ++-- .../externalcredential/screens/scanocr/reader/OcrQuery.kt | 2 +- .../externalcredential/screens/scanocr/reader/OcrReader.kt | 7 +++---- .../screens/scanocr/usecase/ReadTextFromImageUseCase.kt | 2 +- .../scanocr/usecase/GhanaIdCardOcrSelectorUseCaseTest.kt | 5 ++--- .../scanocr/usecase/GhanaNhisCardOcrSelectorUseCaseTest.kt | 4 ++-- 6 files changed, 11 insertions(+), 13 deletions(-) diff --git a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrModel.kt b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrModel.kt index 9ddf0af011..7c0f82fae9 100644 --- a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrModel.kt +++ b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrModel.kt @@ -7,7 +7,7 @@ import com.simprints.feature.externalcredential.model.BoundingBox * Wrapper for all scanned text after the OCR * * @param blocks blocks of text, each containing multiple [OcrLine] - * @param allLines all lines from blocks sorted by top-right bounding box coordinates ascending + * @param allLines all lines from blocks sorted by bounding box top coordinate ascending */ @ExcludedFromGeneratedTestCoverageReports("Data class") internal data class OcrText( @@ -16,7 +16,7 @@ internal data class OcrText( ) /** - * Representation of a single block detected by the OCR kit. A block can contain multiple lines that he OCR kit labeled as belonging to the + * Representation of a single block detected by the OCR kit. A block can contain multiple lines that the OCR kit labeled as belonging to the * same paragraph of text. * * @param boundingBox coordinates of the block diff --git a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt index 24b4db3f21..6fd1a0475e 100644 --- a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt +++ b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt @@ -12,7 +12,7 @@ internal class OcrQuery( internal var aboveResolver: (() -> OcrLine?)? = null fun matchesPattern(regex: Regex): OcrQuery = apply { - filters += { line -> regex.containsMatchIn(line.text) } + filters += { line -> regex.matches(line.text) } } fun containsText(text: String): OcrQuery = apply { diff --git a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReader.kt b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReader.kt index 9de2af36f5..7568fe8337 100644 --- a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReader.kt +++ b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReader.kt @@ -5,8 +5,7 @@ package com.simprints.feature.externalcredential.screens.scanocr.reader * * Usage: * ``` - * val model = OcrModelBuilder.build(mlKitText) - * val reader = OcrReader(model) + * val reader = OcrReader(ocrText) * * val membershipNumber = reader.find { * matchesPattern(Regex("\\d{8}")) @@ -16,7 +15,7 @@ package com.simprints.feature.externalcredential.screens.scanocr.reader * ``` */ internal class OcrReader( - val model: OcrText, + val ocrText: OcrText, ) { /** * Executes the query defined in [block] and returns the first matching [OcrLine], or null. @@ -32,7 +31,7 @@ internal class OcrReader( if (scope.belowResolver != null && belowAnchor == null) return null if (scope.aboveResolver != null && aboveAnchor == null) return null - return model.allLines.firstOrNull { line -> + return ocrText.allLines.firstOrNull { line -> val isBelowAnchor = belowAnchor == null || ( line.boundingBox.top > belowAnchor.boundingBox.top && line.boundingBox.left >= belowAnchor.boundingBox.left && diff --git a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/ReadTextFromImageUseCase.kt b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/ReadTextFromImageUseCase.kt index 6fa3c335e7..a6634d6269 100644 --- a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/ReadTextFromImageUseCase.kt +++ b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/ReadTextFromImageUseCase.kt @@ -32,7 +32,7 @@ internal class ReadTextFromImageUseCase @Inject constructor() { val lines = block.lines.map { line -> OcrLine( id = nextLineId++, - text = line.text.trim().replace(" ", ""), + text = line.text.trim(), boundingBox = line.boundingBox.toBoundingBox(), blockBoundingBox = block.boundingBox.toBoundingBox(), confidence = line.confidence, diff --git a/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaIdCardOcrSelectorUseCaseTest.kt b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaIdCardOcrSelectorUseCaseTest.kt index eddbcb4acc..869657f1a3 100644 --- a/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaIdCardOcrSelectorUseCaseTest.kt +++ b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaIdCardOcrSelectorUseCaseTest.kt @@ -27,7 +27,6 @@ internal class GhanaIdCardOcrSelectorUseCaseTest { "GHA-123456789-A", "GHA-123456789-01", "", - "GHA-123456789-0 ", ) @Before @@ -40,7 +39,7 @@ internal class GhanaIdCardOcrSelectorUseCaseTest { fun `returns matching line for valid Ghana ID formats`() { validIds.forEachIndexed { id, ghanaId -> val nonMatching = line(id = id, text = label, top = 100) - val expected = line(id = id, text = ghanaId, top = 140) + val expected = line(id = id + 1, text = ghanaId, top = 140) val reader = buildReader(nonMatching, expected) assertThat(useCase(reader)).isEqualTo(expected) @@ -52,7 +51,7 @@ internal class GhanaIdCardOcrSelectorUseCaseTest { invalidIds.forEachIndexed { id, ghanaId -> val reader = buildReader( line(id = id, text = label, top = 100), - line(id = id, text = ghanaId, top = 140), + line(id = id + 1, text = ghanaId, top = 140), ) assertThat(useCase(reader)).isNull() diff --git a/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaNhisCardOcrSelectorUseCaseTest.kt b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaNhisCardOcrSelectorUseCaseTest.kt index 618222615b..44e23b80a9 100644 --- a/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaNhisCardOcrSelectorUseCaseTest.kt +++ b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaNhisCardOcrSelectorUseCaseTest.kt @@ -35,7 +35,7 @@ internal class GhanaNhisCardOcrSelectorUseCaseTest { fun `returns matching line for valid NHIS membership numbers`() { validNumbers.forEachIndexed { id, number -> val label = line(id = id, text = label, top = 100) - val expected = line(id = id, text = number, top = 140) + val expected = line(id = id + 1, text = number, top = 140) val reader = buildReader(label, expected) assertThat(useCase(reader)).isEqualTo(expected) @@ -47,7 +47,7 @@ internal class GhanaNhisCardOcrSelectorUseCaseTest { invalidNumbers.forEachIndexed { id, number -> val reader = buildReader( line(id = id, text = "membership number", top = 100), - line(id = id, text = number, top = 140), + line(id = id + 1, text = number, top = 140), ) assertThat(useCase(reader)).isNull() From 53e64f7befa4f8bb225bb0ddd9d69c701de7e56e Mon Sep 17 00:00:00 2001 From: alex Date: Thu, 30 Apr 2026 16:35:49 +0300 Subject: [PATCH 07/10] [MS-1426] adding ::containsPattern method to OcrQuery to be specfic in query methods --- .../screens/scanocr/reader/OcrQuery.kt | 4 ++ .../screens/scanocr/reader/OcrQueryTest.kt | 56 ++++++++++++++----- .../screens/scanocr/reader/OcrReaderTest.kt | 12 ++-- 3 files changed, 51 insertions(+), 21 deletions(-) diff --git a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt index 6fd1a0475e..c14cac88f1 100644 --- a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt +++ b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt @@ -15,6 +15,10 @@ internal class OcrQuery( filters += { line -> regex.matches(line.text) } } + fun containsPattern(regex: Regex): OcrQuery = apply { + filters += { line -> regex.containsMatchIn(line.text) } + } + fun containsText(text: String): OcrQuery = apply { filters += { line -> line.text.contains(text, ignoreCase = true) } } diff --git a/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQueryTest.kt b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQueryTest.kt index 2a3c240268..773b3eaf87 100644 --- a/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQueryTest.kt +++ b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQueryTest.kt @@ -18,14 +18,18 @@ internal class OcrQueryTest { query = OcrQuery(noOpSubQuery) } - // ── Filter registration ─────────────────────────────────────────────────── - @Test fun `matchesPattern registers a filter`() { query.matchesPattern(Regex("\\d+")) assertThat(query.filters).hasSize(1) } + @Test + fun `containsPattern registers a filter`() { + query.containsPattern(Regex("\\d+")) + assertThat(query.filters).hasSize(1) + } + @Test fun `containsText registers a filter`() { query.containsText("membership") @@ -52,64 +56,86 @@ internal class OcrQueryTest { assertThat(query.filters).hasSize(3) } - // ── Filter correctness ──────────────────────────────────────────────────── - @Test - fun `matchesPattern filter passes matching line`() { + fun `matchesPattern passes line whose full text matches pattern`() { query.matchesPattern(Regex("^\\d{8}$")) assertThat(query.filters.all { it(line(text = "12345678")) }).isTrue() } @Test - fun `matchesPattern filter rejects non-matching line`() { + fun `matchesPattern rejects line where pattern matches only a substring`() { + query.matchesPattern(Regex("\\d{8}")) + assertThat(query.filters.all { it(line(text = "ID:12345678")) }).isFalse() + } + + @Test + fun `matchesPattern rejects non-matching line`() { query.matchesPattern(Regex("^\\d{8}$")) - assertThat(query.filters.all { it(line(text = "random string")) }).isFalse() + assertThat(query.filters.all { it(line(text = "abcdefgh")) }).isFalse() + } + + @Test + fun `containsPattern passes line containing a partial match`() { + query.containsPattern(Regex("membership")) + assertThat(query.filters.all { it(line(text = "membership number")) }).isTrue() + } + + @Test + fun `containsPattern passes line where pattern matches full text`() { + query.containsPattern(Regex("membership")) + assertThat(query.filters.all { it(line(text = "membership")) }).isTrue() + } + + @Test + fun `containsPattern rejects line with no match`() { + query.containsPattern(Regex("expiry")) + assertThat(query.filters.all { it(line(text = "membership number")) }).isFalse() } @Test - fun `containsText filter passes line containing text`() { + fun `containsText passes line containing text`() { query.containsText("member") assertThat(query.filters.all { it(line(text = "membership number")) }).isTrue() } @Test - fun `containsText filter is case-insensitive`() { + fun `containsText is case-insensitive`() { query.containsText("MEMBER") assertThat(query.filters.all { it(line(text = "membership number")) }).isTrue() } @Test - fun `containsText filter rejects line not containing text`() { + fun `containsText rejects line not containing text`() { query.containsText("expiry") assertThat(query.filters.all { it(line(text = "membership number")) }).isFalse() } @Test - fun `hasExactText filter passes line with exact text`() { + fun `hasExactText passes line with exact text`() { query.hasExactText("expiry date") assertThat(query.filters.all { it(line(text = "expiry date")) }).isTrue() } @Test - fun `hasExactText filter is case-insensitive`() { + fun `hasExactText is case-insensitive`() { query.hasExactText("EXPIRY DATE") assertThat(query.filters.all { it(line(text = "expiry date")) }).isTrue() } @Test - fun `hasExactText filter rejects partial match`() { + fun `hasExactText rejects partial match`() { query.hasExactText("expiry date") assertThat(query.filters.all { it(line(text = "expiry")) }).isFalse() } @Test - fun `hasId filter passes line with matching id`() { + fun `hasId passes line with matching id`() { query.hasId(2) assertThat(query.filters.all { it(line(id = 2)) }).isTrue() } @Test - fun `hasId filter rejects line with different id`() { + fun `hasId rejects line with different id`() { query.hasId(2) assertThat(query.filters.all { it(line(id = 99)) }).isFalse() } diff --git a/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReaderTest.kt b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReaderTest.kt index 09d76d37e3..15c351b190 100644 --- a/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReaderTest.kt +++ b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReaderTest.kt @@ -30,7 +30,7 @@ internal class OcrReaderTest { reader = OcrReader( OcrText( blocks = emptyList(), - allLines = listOf(labelMembership, membershipValue, labelExpiryDate, expiryDateValue, labelIssueDate, issueDateValue), + allLines = listOf(labelMembership, membershipValue, labelIssueDate, issueDateValue, labelExpiryDate, expiryDateValue), ), ) } @@ -111,16 +111,16 @@ internal class OcrReaderTest { fun `isBelow block resolves anchor via text containment`() { val result = reader.find { matchesPattern(Regex("^\\d{8}$")) - isBelow { containsText("membership") } + isBelow { containsText("membership number") } } assertThat(result).isEqualTo(membershipValue) } @Test - fun `isBelow block resolves anchor via pattern`() { + fun `isBelow block resolves anchor via containsPattern`() { val result = reader.find { matchesPattern(Regex("^\\d{8}$")) - isBelow { matchesPattern(Regex("membership")) } + isBelow { containsPattern(Regex("membership")) } } assertThat(result).isEqualTo(membershipValue) } @@ -156,10 +156,10 @@ internal class OcrReaderTest { } @Test - fun `isAbove block resolves anchor via pattern`() { + fun `isAbove block resolves anchor via containsPattern`() { val result = reader.find { matchesPattern(Regex("^\\d{8}$")) - isAbove { matchesPattern(Regex("expiry")) } + isAbove { containsPattern(Regex("expiry")) } } assertThat(result).isEqualTo(membershipValue) } From c49df3a74b624781bc758a63021e27aca15da927 Mon Sep 17 00:00:00 2001 From: alex Date: Thu, 30 Apr 2026 17:26:16 +0300 Subject: [PATCH 08/10] [MS-1426] Fixing tests --- .../externalcredential/screens/scanocr/reader/OcrReaderTest.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReaderTest.kt b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReaderTest.kt index 15c351b190..6e5ff6f7ec 100644 --- a/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReaderTest.kt +++ b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReaderTest.kt @@ -86,7 +86,7 @@ internal class OcrReaderTest { @Test fun `matchesPattern finds date format`() { val result = reader.find { matchesPattern(Regex("^\\d{2}/\\d{2}$")) } - assertThat(result).isEqualTo(expiryDateValue) + assertThat(result).isEqualTo(issueDateValue) } @Test From 3c5ef917017e530d7417758a0bbedd5335ba6324 Mon Sep 17 00:00:00 2001 From: alex Date: Tue, 5 May 2026 14:05:45 +0300 Subject: [PATCH 09/10] [MS-1421] Decoupling OcrQuery execution from construction by moving resolution logic into OcrReader --- .../screens/scanocr/reader/OcrQuery.kt | 16 ++++----- .../screens/scanocr/reader/OcrReader.kt | 14 ++++---- .../screens/scanocr/reader/OcrQueryTest.kt | 36 ++++++++++--------- 3 files changed, 33 insertions(+), 33 deletions(-) diff --git a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt index c14cac88f1..e5785fdeed 100644 --- a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt +++ b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt @@ -4,12 +4,10 @@ package com.simprints.feature.externalcredential.screens.scanocr.reader * Defines the search criteria for locating a single line of text within a scanned document. * Used as the receiver of the [OcrReader.find] block. */ -internal class OcrQuery( - private val subQuery: (OcrQuery) -> OcrLine?, -) { +internal class OcrQuery { internal val filters = mutableListOf<(OcrLine) -> Boolean>() - internal var belowResolver: (() -> OcrLine?)? = null - internal var aboveResolver: (() -> OcrLine?)? = null + internal var belowAnchor: OcrQuery? = null + internal var aboveAnchor: OcrQuery? = null fun matchesPattern(regex: Regex): OcrQuery = apply { filters += { line -> regex.matches(line.text) } @@ -32,18 +30,18 @@ internal class OcrQuery( } fun isBelow(resolveAnchor: OcrQuery.() -> Unit): OcrQuery = apply { - belowResolver = { subQuery(OcrQuery(subQuery).apply(resolveAnchor)) } + belowAnchor = OcrQuery().apply(resolveAnchor) } fun isBelow(anchor: OcrLine): OcrQuery = apply { - belowResolver = { anchor } + belowAnchor = OcrQuery().apply { filters += { line -> line.id == anchor.id } } } fun isAbove(resolveAnchor: OcrQuery.() -> Unit): OcrQuery = apply { - aboveResolver = { subQuery(OcrQuery(subQuery).apply(resolveAnchor)) } + aboveAnchor = OcrQuery().apply(resolveAnchor) } fun isAbove(anchor: OcrLine): OcrQuery = apply { - aboveResolver = { anchor } + aboveAnchor = OcrQuery().apply { filters += { line -> line.id == anchor.id } } } } diff --git a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReader.kt b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReader.kt index 7568fe8337..19035a99fa 100644 --- a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReader.kt +++ b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReader.kt @@ -22,14 +22,14 @@ internal class OcrReader( * The [block] receives an [OcrQuery] as its receiver — call filter methods directly * without any chaining or terminal call. */ - fun find(block: OcrQuery.() -> Unit): OcrLine? = runQuery(OcrQuery(::runQuery).apply(block)) + fun find(block: OcrQuery.() -> Unit): OcrLine? = runQuery(OcrQuery().apply(block)) - private fun runQuery(scope: OcrQuery): OcrLine? { - val belowAnchor = scope.belowResolver?.invoke() - val aboveAnchor = scope.aboveResolver?.invoke() + private fun runQuery(query: OcrQuery): OcrLine? { + val belowAnchor = query.belowAnchor?.let { runQuery(it) } + val aboveAnchor = query.aboveAnchor?.let { runQuery(it) } - if (scope.belowResolver != null && belowAnchor == null) return null - if (scope.aboveResolver != null && aboveAnchor == null) return null + if (query.belowAnchor != null && belowAnchor == null) return null + if (query.aboveAnchor != null && aboveAnchor == null) return null return ocrText.allLines.firstOrNull { line -> val isBelowAnchor = belowAnchor == null || ( @@ -42,7 +42,7 @@ internal class OcrReader( line.boundingBox.left >= aboveAnchor.boundingBox.left && line.boundingBox.left < aboveAnchor.boundingBox.right ) - scope.filters.all { it(line) } && isBelowAnchor && isAboveAnchor + query.filters.all { it(line) } && isBelowAnchor && isAboveAnchor } } } diff --git a/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQueryTest.kt b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQueryTest.kt index 773b3eaf87..f1c70b3479 100644 --- a/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQueryTest.kt +++ b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQueryTest.kt @@ -8,14 +8,12 @@ import org.junit.Before import org.junit.Test internal class OcrQueryTest { - private val noOpSubQuery: (OcrQuery) -> OcrLine? = { null } - private lateinit var query: OcrQuery @Before fun setUp() { MockKAnnotations.init(this, relaxed = true) - query = OcrQuery(noOpSubQuery) + query = OcrQuery() } @Test @@ -141,41 +139,45 @@ internal class OcrQueryTest { } @Test - fun `isBelow with block registers belowResolver`() { + fun `isBelow with block registers belowAnchor`() { query.isBelow { containsText("membership") } - assertThat(query.belowResolver).isNotNull() + assertThat(query.belowAnchor).isNotNull() } @Test - fun `isBelow with OcrLine registers belowResolver`() { + fun `isBelow with OcrLine registers belowAnchor`() { query.isBelow(mockk(relaxed = true)) - assertThat(query.belowResolver).isNotNull() + assertThat(query.belowAnchor).isNotNull() } @Test - fun `isAbove with block registers aboveResolver`() { + fun `isAbove with block registers aboveAnchor`() { query.isAbove { containsText("expiry") } - assertThat(query.aboveResolver).isNotNull() + assertThat(query.aboveAnchor).isNotNull() } @Test - fun `isAbove with OcrLine registers aboveResolver`() { + fun `isAbove with OcrLine registers aboveAnchor`() { query.isAbove(mockk(relaxed = true)) - assertThat(query.aboveResolver).isNotNull() + assertThat(query.aboveAnchor).isNotNull() } @Test - fun `isBelow with direct OcrLine resolver returns that line`() { - val anchor = line(id = 0, text = "anchor") + fun `isBelow with direct OcrLine stores id filter in anchor query`() { + val targetId = 17 + val anchor = line(id = targetId, text = "anchor") query.isBelow(anchor) - assertThat(query.belowResolver?.invoke()).isEqualTo(anchor) + assertThat(query.belowAnchor?.filters?.all { it(line(id = targetId)) }).isTrue() + assertThat(query.belowAnchor?.filters?.all { it(line(id = 99)) }).isFalse() } @Test - fun `isAbove with direct OcrLine resolver returns that line`() { - val anchor = line(id = 0, text = "anchor") + fun `isAbove with direct OcrLine stores id filter in anchor query`() { + val targetId = 17 + val anchor = line(id = targetId, text = "anchor") query.isAbove(anchor) - assertThat(query.aboveResolver?.invoke()).isEqualTo(anchor) + assertThat(query.aboveAnchor?.filters?.all { it(line(id = targetId)) }).isTrue() + assertThat(query.aboveAnchor?.filters?.all { it(line(id = 99)) }).isFalse() } private fun line( From e9b6a5348708f2a46081903d2c8c35e83afb7656 Mon Sep 17 00:00:00 2001 From: alex Date: Tue, 5 May 2026 17:01:06 +0300 Subject: [PATCH 10/10] [MS-1421] Clearing the redundant code, removing OcrBlock class, adding KDoc explaining the nesting limits of the OcrReader queries --- .../screens/scanocr/reader/OcrModel.kt | 15 -------- .../screens/scanocr/reader/OcrQuery.kt | 18 ++++----- .../screens/scanocr/reader/OcrReader.kt | 22 +++++++++++ .../usecase/ReadTextFromImageUseCase.kt | 37 +++++++------------ .../screens/scanocr/reader/OcrReaderTest.kt | 1 - .../GhanaIdCardOcrSelectorUseCaseTest.kt | 2 +- .../GhanaNhisCardOcrSelectorUseCaseTest.kt | 2 +- 7 files changed, 46 insertions(+), 51 deletions(-) diff --git a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrModel.kt b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrModel.kt index 7c0f82fae9..0ef455af6e 100644 --- a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrModel.kt +++ b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrModel.kt @@ -6,28 +6,13 @@ import com.simprints.feature.externalcredential.model.BoundingBox /** * Wrapper for all scanned text after the OCR * - * @param blocks blocks of text, each containing multiple [OcrLine] * @param allLines all lines from blocks sorted by bounding box top coordinate ascending */ @ExcludedFromGeneratedTestCoverageReports("Data class") internal data class OcrText( - val blocks: List, val allLines: List, ) -/** - * Representation of a single block detected by the OCR kit. A block can contain multiple lines that the OCR kit labeled as belonging to the - * same paragraph of text. - * - * @param boundingBox coordinates of the block - * @param lines nested lines of the block - */ -@ExcludedFromGeneratedTestCoverageReports("Data class") -internal data class OcrBlock( - val boundingBox: BoundingBox, - val lines: List, -) - /** * A single line of text detected by the OCR kit. * diff --git a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt index e5785fdeed..3616adf803 100644 --- a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt +++ b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrQuery.kt @@ -9,39 +9,39 @@ internal class OcrQuery { internal var belowAnchor: OcrQuery? = null internal var aboveAnchor: OcrQuery? = null - fun matchesPattern(regex: Regex): OcrQuery = apply { + fun matchesPattern(regex: Regex) { filters += { line -> regex.matches(line.text) } } - fun containsPattern(regex: Regex): OcrQuery = apply { + fun containsPattern(regex: Regex) { filters += { line -> regex.containsMatchIn(line.text) } } - fun containsText(text: String): OcrQuery = apply { + fun containsText(text: String) { filters += { line -> line.text.contains(text, ignoreCase = true) } } - fun hasExactText(text: String): OcrQuery = apply { + fun hasExactText(text: String) { filters += { line -> line.text.equals(text, ignoreCase = true) } } - fun hasId(id: Int): OcrQuery = apply { + fun hasId(id: Int) { filters += { line -> line.id == id } } - fun isBelow(resolveAnchor: OcrQuery.() -> Unit): OcrQuery = apply { + fun isBelow(resolveAnchor: OcrQuery.() -> Unit) { belowAnchor = OcrQuery().apply(resolveAnchor) } - fun isBelow(anchor: OcrLine): OcrQuery = apply { + fun isBelow(anchor: OcrLine) { belowAnchor = OcrQuery().apply { filters += { line -> line.id == anchor.id } } } - fun isAbove(resolveAnchor: OcrQuery.() -> Unit): OcrQuery = apply { + fun isAbove(resolveAnchor: OcrQuery.() -> Unit) { aboveAnchor = OcrQuery().apply(resolveAnchor) } - fun isAbove(anchor: OcrLine): OcrQuery = apply { + fun isAbove(anchor: OcrLine) { aboveAnchor = OcrQuery().apply { filters += { line -> line.id == anchor.id } } } } diff --git a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReader.kt b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReader.kt index 19035a99fa..8360083064 100644 --- a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReader.kt +++ b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReader.kt @@ -21,6 +21,28 @@ internal class OcrReader( * Executes the query defined in [block] and returns the first matching [OcrLine], or null. * The [block] receives an [OcrQuery] as its receiver — call filter methods directly * without any chaining or terminal call. + * + * Usage: + * ``` + * val reader = OcrReader(ocrText) + * + * val membershipNumber = reader.find { + * matchesPattern(Regex("\\d{8}")) + * isBelow { containsText("membership number") } + * isAbove { containsText("expiry date") } + * } + * ``` + * + * Only one level of nesting is supported. Spatial filters inside an anchor block are silently ignored. + * The following is NOT supported: + * ``` + * reader.find { + * isBelow { + * isBelow { containsText("some major title") } // ignored — has no effect + * containsText("some subtitle") + * } + * } + * ``` */ fun find(block: OcrQuery.() -> Unit): OcrLine? = runQuery(OcrQuery().apply(block)) diff --git a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/ReadTextFromImageUseCase.kt b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/ReadTextFromImageUseCase.kt index a6634d6269..5f2f5ffd1a 100644 --- a/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/ReadTextFromImageUseCase.kt +++ b/feature/external-credential/src/main/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/ReadTextFromImageUseCase.kt @@ -8,7 +8,6 @@ import com.google.mlkit.vision.text.TextRecognition import com.google.mlkit.vision.text.latin.TextRecognizerOptions import com.simprints.core.ExcludedFromGeneratedTestCoverageReports import com.simprints.feature.externalcredential.model.toBoundingBox -import com.simprints.feature.externalcredential.screens.scanocr.reader.OcrBlock import com.simprints.feature.externalcredential.screens.scanocr.reader.OcrLine import com.simprints.feature.externalcredential.screens.scanocr.reader.OcrText import javax.inject.Inject @@ -28,29 +27,19 @@ internal class ReadTextFromImageUseCase @Inject constructor() { private fun build(mlKitText: Text): OcrText { var nextLineId = 0 - val blocks = mlKitText.textBlocks.map { block -> - val lines = block.lines.map { line -> - OcrLine( - id = nextLineId++, - text = line.text.trim(), - boundingBox = line.boundingBox.toBoundingBox(), - blockBoundingBox = block.boundingBox.toBoundingBox(), - confidence = line.confidence, - ) - } - OcrBlock( - boundingBox = block.boundingBox.toBoundingBox(), - lines = lines, - ) - } + val allLinesSorted = mlKitText.textBlocks + .flatMap { block -> + block.lines.map { line -> + OcrLine( + id = nextLineId++, + text = line.text.trim(), + boundingBox = line.boundingBox.toBoundingBox(), + blockBoundingBox = block.boundingBox.toBoundingBox(), + confidence = line.confidence, + ) + } + }.sortedBy { it.boundingBox.top } - val allLinesSorted = blocks - .flatMap { it.lines } - .sortedBy { it.boundingBox.top } - - return OcrText( - blocks = blocks, - allLines = allLinesSorted, - ) + return OcrText(allLines = allLinesSorted) } } diff --git a/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReaderTest.kt b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReaderTest.kt index 6e5ff6f7ec..42f1acc880 100644 --- a/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReaderTest.kt +++ b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/reader/OcrReaderTest.kt @@ -29,7 +29,6 @@ internal class OcrReaderTest { reader = OcrReader( OcrText( - blocks = emptyList(), allLines = listOf(labelMembership, membershipValue, labelIssueDate, issueDateValue, labelExpiryDate, expiryDateValue), ), ) diff --git a/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaIdCardOcrSelectorUseCaseTest.kt b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaIdCardOcrSelectorUseCaseTest.kt index 869657f1a3..356f10d58e 100644 --- a/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaIdCardOcrSelectorUseCaseTest.kt +++ b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaIdCardOcrSelectorUseCaseTest.kt @@ -59,7 +59,7 @@ internal class GhanaIdCardOcrSelectorUseCaseTest { } private fun buildReader(vararg lines: OcrLine) = OcrReader( - OcrText(blocks = emptyList(), allLines = lines.toList()), + OcrText(allLines = lines.toList()), ) private fun line( diff --git a/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaNhisCardOcrSelectorUseCaseTest.kt b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaNhisCardOcrSelectorUseCaseTest.kt index 44e23b80a9..3db7f26b91 100644 --- a/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaNhisCardOcrSelectorUseCaseTest.kt +++ b/feature/external-credential/src/test/java/com/simprints/feature/externalcredential/screens/scanocr/usecase/GhanaNhisCardOcrSelectorUseCaseTest.kt @@ -55,7 +55,7 @@ internal class GhanaNhisCardOcrSelectorUseCaseTest { } private fun buildReader(vararg lines: OcrLine) = OcrReader( - OcrText(blocks = emptyList(), allLines = lines.toList()), + OcrText(allLines = lines.toList()), ) private fun line(