Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ data class BoundingBox(
val bottom: Int,
) : JavaSerializable

fun Rect.toBoundingBox(): BoundingBox = BoundingBox(left, top, right, bottom)
fun Rect?.toBoundingBox(): BoundingBox = if (this == null) BoundingBox(0, 0, 0, 0) else BoundingBox(left, top, right, bottom)
Comment thread
alexandr-simprints marked this conversation as resolved.

fun BoundingBox.toRect(): Rect = Rect(left, top, right, bottom)
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package com.simprints.feature.externalcredential.screens.scanocr.reader

import com.simprints.core.ExcludedFromGeneratedTestCoverageReports
import com.simprints.feature.externalcredential.model.BoundingBox

/**
* Wrapper for all scanned text after the OCR
*
* @param allLines all lines from blocks sorted by bounding box top coordinate ascending
*/
@ExcludedFromGeneratedTestCoverageReports("Data class")
internal data class OcrText(
val allLines: List<OcrLine>,
)

/**
* A single line of text detected by the OCR kit.
*
* @param id unique id of the line in a single OCR scan
* @param text normalized text (extra spaces removed)
* @param boundingBox coordinates of the line
* @param blockBoundingBox parent coordinates
* @param confidence overall confidence of the text value based on the average confidence for each character (aka element) in [text]
*/
@ExcludedFromGeneratedTestCoverageReports("Data class")
internal data class OcrLine(
val id: Int,
val text: String,
val boundingBox: BoundingBox,
val blockBoundingBox: BoundingBox,
val confidence: Float,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package com.simprints.feature.externalcredential.screens.scanocr.reader

/**
* Defines the search criteria for locating a single line of text within a scanned document.
* Used as the receiver of the [OcrReader.find] block.
*/
internal class OcrQuery {
internal val filters = mutableListOf<(OcrLine) -> Boolean>()
internal var belowAnchor: OcrQuery? = null
internal var aboveAnchor: OcrQuery? = null

fun matchesPattern(regex: Regex) {
filters += { line -> regex.matches(line.text) }
}

fun containsPattern(regex: Regex) {
filters += { line -> regex.containsMatchIn(line.text) }
}

fun containsText(text: String) {
filters += { line -> line.text.contains(text, ignoreCase = true) }
}

fun hasExactText(text: String) {
filters += { line -> line.text.equals(text, ignoreCase = true) }
Comment thread
alexandr-simprints marked this conversation as resolved.
}

fun hasId(id: Int) {
filters += { line -> line.id == id }
}

fun isBelow(resolveAnchor: OcrQuery.() -> Unit) {
belowAnchor = OcrQuery().apply(resolveAnchor)
}

fun isBelow(anchor: OcrLine) {
belowAnchor = OcrQuery().apply { filters += { line -> line.id == anchor.id } }
}

fun isAbove(resolveAnchor: OcrQuery.() -> Unit) {
aboveAnchor = OcrQuery().apply(resolveAnchor)
}

fun isAbove(anchor: OcrLine) {
aboveAnchor = OcrQuery().apply { filters += { line -> line.id == anchor.id } }
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package com.simprints.feature.externalcredential.screens.scanocr.reader

/**
* Entry point for querying an [OcrText].
*
* Usage:
* ```
* val reader = OcrReader(ocrText)
*
* val membershipNumber = reader.find {
* matchesPattern(Regex("\\d{8}"))
Comment thread
alexandr-simprints marked this conversation as resolved.
* isBelow { containsText("membership number") }
* isAbove { containsText("expiry date") }
* }
* ```
*/
internal class OcrReader(
val ocrText: OcrText,
) {
/**
* Executes the query defined in [block] and returns the first matching [OcrLine], or null.
* The [block] receives an [OcrQuery] as its receiver — call filter methods directly
* without any chaining or terminal call.
*
* Usage:
* ```
* val reader = OcrReader(ocrText)
*
* val membershipNumber = reader.find {
* matchesPattern(Regex("\\d{8}"))
* isBelow { containsText("membership number") }
* isAbove { containsText("expiry date") }
* }
* ```
*
* Only one level of nesting is supported. Spatial filters inside an anchor block are silently ignored.
* The following is NOT supported:
* ```
* reader.find {
* isBelow {
* isBelow { containsText("some major title") } // ignored — has no effect
* containsText("some subtitle")
* }
* }
* ```
*/
fun find(block: OcrQuery.() -> Unit): OcrLine? = runQuery(OcrQuery().apply(block))

private fun runQuery(query: OcrQuery): OcrLine? {
val belowAnchor = query.belowAnchor?.let { runQuery(it) }
val aboveAnchor = query.aboveAnchor?.let { runQuery(it) }

if (query.belowAnchor != null && belowAnchor == null) return null
if (query.aboveAnchor != null && aboveAnchor == null) return null

return ocrText.allLines.firstOrNull { line ->
val isBelowAnchor = belowAnchor == null || (
line.boundingBox.top > belowAnchor.boundingBox.top &&
line.boundingBox.left >= belowAnchor.boundingBox.left &&
line.boundingBox.left < belowAnchor.boundingBox.right
)
val isAboveAnchor = aboveAnchor == null || (
line.boundingBox.top < aboveAnchor.boundingBox.top &&
Comment thread
alexandr-simprints marked this conversation as resolved.
line.boundingBox.left >= aboveAnchor.boundingBox.left &&
line.boundingBox.left < aboveAnchor.boundingBox.right
)
query.filters.all { it(line) } && isBelowAnchor && isAboveAnchor
}
}
}
Original file line number Diff line number Diff line change
@@ -1,14 +1,9 @@
package com.simprints.feature.externalcredential.screens.scanocr.usecase

import android.graphics.Bitmap
import com.google.android.gms.tasks.Tasks
import com.google.mlkit.vision.common.InputImage
import com.google.mlkit.vision.text.TextRecognition
import com.google.mlkit.vision.text.latin.TextRecognizerOptions
import com.simprints.core.ExcludedFromGeneratedTestCoverageReports
import com.simprints.feature.externalcredential.model.toBoundingBox
import com.simprints.feature.externalcredential.screens.scanocr.model.DetectedOcrBlock
import com.simprints.feature.externalcredential.screens.scanocr.model.OcrDocumentType
import com.simprints.feature.externalcredential.screens.scanocr.reader.OcrReader
import com.simprints.infra.credential.store.CredentialImageRepository
import com.simprints.infra.credential.store.model.CredentialScanImageType.FullDocument
import com.simprints.infra.logging.LoggingConstants.CrashReportTag.MULTI_FACTOR_ID
Expand All @@ -17,63 +12,34 @@ import javax.inject.Inject
import javax.inject.Singleton

@Singleton
@ExcludedFromGeneratedTestCoverageReports("Unable to mock Google ML Kit")
internal class GetCredentialCoordinatesUseCase @Inject constructor(
private val readTextFromImage: ReadTextFromImageUseCase,
private val ghanaNhisCardOcrSelectorUseCase: GhanaNhisCardOcrSelectorUseCase,
private val ghanaIdCardOcrSelectorUseCase: GhanaIdCardOcrSelectorUseCase,
private val credentialImageRepository: CredentialImageRepository,
) {
private val recognizer = TextRecognition.getClient(TextRecognizerOptions.DEFAULT_OPTIONS)

/**
* OCR uses Google ML kit. It has a following hierarchy:
* - Block. A contiguous set of text lines, such as a paragraph or column,
* - Line. A contiguous set of words on the same axis. There can be multiple Lines in the Block
* - Element. A contiguous set of alphanumeric characters ("word") on the same axis. There can be Elements in one Line
* - Symbol. A single alphanumeric character in an Element.
*
* This method returns a [DetectedOcrBlock] class if the OCR managed to find a line that satisfies the given [documentType] pattern.
* If such Line is found, then it is returned in a [DetectedOcrBlock] alongside its parent block, and a normalized value.
*
* Lines are used instead of Elements because the OCR might mistakenly read an extra white space in a Line, resulting in multiple
* Elements. Since Lines are geometrically in one plane, we just take the concatenation of all underlying child Elements, and analyze
* them it as a single string.
*
* @param bitmap bitmap to run OCR on
* @param documentType type of the document
*
* @return [DetectedOcrBlock] if any Line satisfies the [documentType] pattern, or null if none.
*/
suspend operator fun invoke(
bitmap: Bitmap,
documentType: OcrDocumentType,
): DetectedOcrBlock? {
val image = InputImage.fromBitmap(bitmap, 0)
return try {
val result = Tasks.await(recognizer.process(image)) ?: return null
return result.textBlocks.firstNotNullOfOrNull { textBlock ->
textBlock.lines.firstNotNullOfOrNull { textLine ->
// Getting text from the entire line readout, and normalizing to avoid any extra spaces
val lineReadout = textLine.text.trim().replace(" ", "")
val isValid = when (documentType) {
OcrDocumentType.NhisCard -> ghanaNhisCardOcrSelectorUseCase(lineReadout)
OcrDocumentType.GhanaIdCard -> ghanaIdCardOcrSelectorUseCase(lineReadout)
}
if (isValid) {
val blockBoundingRect = textBlock.boundingBox ?: return@firstNotNullOfOrNull null
val lineBoundingRect = textLine.boundingBox ?: return@firstNotNullOfOrNull null
val savedImagePath = credentialImageRepository.saveCredentialScan(bitmap, imageType = FullDocument)
return@firstNotNullOfOrNull DetectedOcrBlock(
imagePath = savedImagePath,
documentType = documentType,
blockBoundingBox = blockBoundingRect.toBoundingBox(),
lineBoundingBox = lineBoundingRect.toBoundingBox(),
readoutValue = lineReadout,
)
} else {
return@firstNotNullOfOrNull null
}
}
val ocrText = readTextFromImage(bitmap) ?: return null
val ocrReader = OcrReader(ocrText)
val credentialOcrLine = when (documentType) {
OcrDocumentType.NhisCard -> ghanaNhisCardOcrSelectorUseCase(ocrReader)
OcrDocumentType.GhanaIdCard -> ghanaIdCardOcrSelectorUseCase(ocrReader)
}
if (credentialOcrLine != null) {
val savedImagePath = credentialImageRepository.saveCredentialScan(bitmap, imageType = FullDocument)
DetectedOcrBlock(
imagePath = savedImagePath,
documentType = documentType,
blockBoundingBox = credentialOcrLine.blockBoundingBox,
lineBoundingBox = credentialOcrLine.boundingBox,
readoutValue = credentialOcrLine.text,
)
} else {
null
}
} catch (e: Exception) {
Simber.e("OCR failed for $documentType", e, tag = MULTI_FACTOR_ID)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package com.simprints.feature.externalcredential.screens.scanocr.usecase

import com.simprints.feature.externalcredential.screens.scanocr.reader.OcrLine
import com.simprints.feature.externalcredential.screens.scanocr.reader.OcrReader
import javax.inject.Inject

internal class GhanaIdCardOcrSelectorUseCase @Inject constructor() {
operator fun invoke(readoutValue: String): Boolean = GHANA_ID_PATTERN.matches(readoutValue)
operator fun invoke(ocrReader: OcrReader): OcrLine? = ocrReader.find { matchesPattern(GHANA_ID_PATTERN) }

Comment thread
alexandr-simprints marked this conversation as resolved.
companion object {
// Ghana ID card number pattern is "GHA-12345789-0"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package com.simprints.feature.externalcredential.screens.scanocr.usecase

import com.simprints.feature.externalcredential.screens.scanocr.reader.OcrLine
import com.simprints.feature.externalcredential.screens.scanocr.reader.OcrReader
import javax.inject.Inject

internal class GhanaNhisCardOcrSelectorUseCase @Inject constructor() {
operator fun invoke(readoutValue: String): Boolean = NHIS_PATTERN.matches(readoutValue)
operator fun invoke(ocrReader: OcrReader): OcrLine? = ocrReader.find { matchesPattern(NHIS_PATTERN) }

Comment thread
alexandr-simprints marked this conversation as resolved.
companion object {
// NHIS Card membership is 8 digits long
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package com.simprints.feature.externalcredential.screens.scanocr.usecase

import android.graphics.Bitmap
import com.google.android.gms.tasks.Tasks
import com.google.mlkit.vision.common.InputImage
import com.google.mlkit.vision.text.Text
import com.google.mlkit.vision.text.TextRecognition
import com.google.mlkit.vision.text.latin.TextRecognizerOptions
import com.simprints.core.ExcludedFromGeneratedTestCoverageReports
import com.simprints.feature.externalcredential.model.toBoundingBox
import com.simprints.feature.externalcredential.screens.scanocr.reader.OcrLine
import com.simprints.feature.externalcredential.screens.scanocr.reader.OcrText
import javax.inject.Inject
import javax.inject.Singleton

@Singleton
@ExcludedFromGeneratedTestCoverageReports("Unable to mock Google ML Kit")
internal class ReadTextFromImageUseCase @Inject constructor() {
private val recognizer = TextRecognition.getClient(TextRecognizerOptions.DEFAULT_OPTIONS)

operator fun invoke(bitmap: Bitmap): OcrText? {
val image = InputImage.fromBitmap(bitmap, 0)
val result = Tasks.await(recognizer.process(image)) ?: return null
return build(result)
}

private fun build(mlKitText: Text): OcrText {
var nextLineId = 0

val allLinesSorted = mlKitText.textBlocks
.flatMap { block ->
block.lines.map { line ->
OcrLine(
id = nextLineId++,
text = line.text.trim(),
boundingBox = line.boundingBox.toBoundingBox(),
blockBoundingBox = block.boundingBox.toBoundingBox(),
confidence = line.confidence,
)
}
}.sortedBy { it.boundingBox.top }

return OcrText(allLines = allLinesSorted)
}
}
Loading
Loading