From 65ef375c33bd453362901d9b6d3ca8b39ded0f03 Mon Sep 17 00:00:00 2001 From: tkadziolka Date: Wed, 30 Jul 2025 20:53:22 +0200 Subject: [PATCH 1/3] Fixed keyword highlight when appeared on end --- .../dev/snipme/highlights/internal/Extensions.kt | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/commonMain/kotlin/dev/snipme/highlights/internal/Extensions.kt b/src/commonMain/kotlin/dev/snipme/highlights/internal/Extensions.kt index a03aa3c..b9a3534 100644 --- a/src/commonMain/kotlin/dev/snipme/highlights/internal/Extensions.kt +++ b/src/commonMain/kotlin/dev/snipme/highlights/internal/Extensions.kt @@ -2,9 +2,9 @@ package dev.snipme.highlights.internal import dev.snipme.highlights.model.CodeHighlight import dev.snipme.highlights.model.PhraseLocation +import kotlinx.coroutines.Job import kotlinx.serialization.encodeToString import kotlinx.serialization.json.Json -import kotlinx.coroutines.Job import kotlin.coroutines.cancellation.CancellationException fun List.toJson(): String { @@ -74,15 +74,21 @@ fun String.isIndependentPhrase( if (index == code.lastIndex) return true if (code.length == this.length) return true - val charBefore = code[maxOf(index - 1, 0)] + // Token is at start of the code val charAfter = code[minOf(index + this.length, code.lastIndex)] - if (index == 0) { return charAfter.isDigit().not() && charAfter.isLetter().not() } + // Token is at end of the code + val charBefore = code[maxOf(index - 1, 0)] + if (index + this.length == code.length) { + return charBefore.isLetter().not() + } + + // Token is in the middle of the code return charBefore.isLetter().not() && - charAfter.isDigit().not() && (charAfter == code.last() || charAfter.isLetter().not()) + charAfter.isDigit().not() && charAfter.isLetter().not() } fun Set.toRangeSet(): Set = From 2e35bcccd07d7daaf365762b8ff775e6eb72fb44 Mon Sep 17 00:00:00 2001 From: tkadziolka Date: Sat, 16 Aug 2025 23:19:57 +0200 Subject: [PATCH 2/3] Added test of code from issue --- .../snipme/highlights/internal/CodeSamples.kt | 23 +++++++++++++++++++ .../internal/language/PythonTest.kt | 21 +++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 src/commonTest/kotlin/dev/snipme/highlights/internal/language/PythonTest.kt diff --git a/src/commonTest/kotlin/dev/snipme/highlights/internal/CodeSamples.kt b/src/commonTest/kotlin/dev/snipme/highlights/internal/CodeSamples.kt index fd89b17..a97828b 100644 --- a/src/commonTest/kotlin/dev/snipme/highlights/internal/CodeSamples.kt +++ b/src/commonTest/kotlin/dev/snipme/highlights/internal/CodeSamples.kt @@ -1312,3 +1312,26 @@ val longJavaCode = """ } } """.trimIndent() + +val longPythonCode = """ + import polars as pl + + def transform_dataframe(df: pl.DataFrame, max_seq_len: int, end_action_value: int) -> pl.DataFrame: + end_val = pl.lit(end_action_value) + max_len = pl.lit(max_seq_len) + df_with_ends = df.with_columns( + end_indices=pl.col("actionType").list.eval( + pl.int_range(0, pl.len()).filter(pl.element() == end_val) + ) + ) + df_exploded = df_with_ends.explode("end_indices") + start_offset = (pl.col("end_indices") + 1 - max_len).clip(lower_bound=0) + slice_len = pl.col("end_indices") - start_offset + 1 + list_cols = ["actionType", "engagementTimeMs", "actionTweetIds"] + for col in list_cols: + df_exploded = df_exploded.with_columns( + pl.col(col).list.slice(start_offset, slice_len).alias(col) + ) + result = df_exploded.select(["userId", "actionType", "engagementTimeMs", "actionTweetIds"]) + return result +""".trimIndent() \ No newline at end of file diff --git a/src/commonTest/kotlin/dev/snipme/highlights/internal/language/PythonTest.kt b/src/commonTest/kotlin/dev/snipme/highlights/internal/language/PythonTest.kt new file mode 100644 index 0000000..bd692b9 --- /dev/null +++ b/src/commonTest/kotlin/dev/snipme/highlights/internal/language/PythonTest.kt @@ -0,0 +1,21 @@ +package dev.snipme.highlights.internal.language + +import dev.snipme.highlights.Highlights +import dev.snipme.highlights.internal.longPythonCode +import dev.snipme.highlights.model.SyntaxLanguage +import kotlin.test.Test +import kotlin.test.assertEquals + +class PythonTest { + + @Test + fun test() { + val result = Highlights.Builder() + .code(longPythonCode) + .language(SyntaxLanguage.PYTHON) + .build() + .getCodeStructure() + + assertEquals(6, result.keywords.size) + } +} \ No newline at end of file From dbcf4988eda05b3ad883ac539c662d6ce31b7721 Mon Sep 17 00:00:00 2001 From: tkadziolka Date: Sun, 17 Aug 2025 12:49:00 +0200 Subject: [PATCH 3/3] Covered other keyword locating cases --- .../snipme/highlights/internal/Extensions.kt | 67 +++++++++++++++++-- .../internal/locator/KeywordLocator.kt | 14 +--- .../internal/locator/NumericLiteralLocator.kt | 58 +++++++++++----- .../highlights/internal/ExtensionsKtTest.kt | 26 ++++++- .../internal/locator/KeywordLocatorTest.kt | 18 +++-- .../locator/NumericLiteralLocatorTest.kt | 9 +++ 6 files changed, 151 insertions(+), 41 deletions(-) diff --git a/src/commonMain/kotlin/dev/snipme/highlights/internal/Extensions.kt b/src/commonMain/kotlin/dev/snipme/highlights/internal/Extensions.kt index b9a3534..692691a 100644 --- a/src/commonMain/kotlin/dev/snipme/highlights/internal/Extensions.kt +++ b/src/commonMain/kotlin/dev/snipme/highlights/internal/Extensions.kt @@ -1,5 +1,8 @@ package dev.snipme.highlights.internal +import dev.snipme.highlights.internal.locator.NUMBER_TYPE_CHARACTERS +import dev.snipme.highlights.internal.SyntaxTokens.MARK_CHARACTERS +import dev.snipme.highlights.internal.SyntaxTokens.PUNCTUATION_CHARACTERS import dev.snipme.highlights.model.CodeHighlight import dev.snipme.highlights.model.PhraseLocation import kotlinx.coroutines.Job @@ -15,7 +18,7 @@ fun String.phraseLocationSetFromJson(): Set { return Json.decodeFromString(this) } -inline operator fun Set.get(i: Int): E? { +operator fun Set.get(i: Int): E? { this.forEachIndexed { index, t -> if (i == index) return t } @@ -77,18 +80,72 @@ fun String.isIndependentPhrase( // Token is at start of the code val charAfter = code[minOf(index + this.length, code.lastIndex)] if (index == 0) { - return charAfter.isDigit().not() && charAfter.isLetter().not() + return charAfter.isDigit().not() && charAfter.isLetter().not() && charAfter != '_' } // Token is at end of the code val charBefore = code[maxOf(index - 1, 0)] if (index + this.length == code.length) { - return charBefore.isLetter().not() + return (charBefore.isLetter().not() && charBefore != '_') || isAfterNumericSuffix(code, index) } // Token is in the middle of the code - return charBefore.isLetter().not() && - charAfter.isDigit().not() && charAfter.isLetter().not() + return ((charBefore.isLetter().not() || isAfterNumericSuffix(code, index)) && + charAfter.isDigit().not() && charAfter.isLetter().not() && charAfter != '_') +} + +private fun String.isAfterNumericSuffix(code: String, keywordIndex: Int): Boolean { + if (keywordIndex == 0) return false + + val charBefore = code[keywordIndex - 1] + + // Check if the character before is a valid numeric suffix + val validSuffixes = NUMBER_TYPE_CHARACTERS + if (charBefore !in validSuffixes) return false + + // Walk backwards to validate the number structure + var i = keywordIndex - 2 + var hasDigit = false + var hasDot = false + + while (i >= 0) { + val char = code[i] + + when { + char.isDigit() -> { + hasDigit = true + i-- + } + char == '.' -> { + if (hasDot) return false + hasDot = true + i-- + } + char == '-' -> { + if (i == 0) { + break + } else { + val prevChar = code[i - 1] + if (prevChar.isLetterOrDigit() || prevChar == '_') { + return false + } else { + break + } + } + } + char == '_' -> { + i-- + } + char.isLetter() -> { + return false + } + else -> { + break + } + } + } + + return hasDigit } fun Set.toRangeSet(): Set = diff --git a/src/commonMain/kotlin/dev/snipme/highlights/internal/locator/KeywordLocator.kt b/src/commonMain/kotlin/dev/snipme/highlights/internal/locator/KeywordLocator.kt index 4a5b3a1..41cba51 100644 --- a/src/commonMain/kotlin/dev/snipme/highlights/internal/locator/KeywordLocator.kt +++ b/src/commonMain/kotlin/dev/snipme/highlights/internal/locator/KeywordLocator.kt @@ -13,9 +13,8 @@ internal object KeywordLocator { ignoreRanges: Set = emptySet(), ): Set { val locations = mutableSetOf() - val foundKeywords = findKeywords(code, keywords) - foundKeywords.forEach { keyword -> + keywords.forEach { keyword -> val indices = code .indicesOf(keyword) .filterNot { index -> ignoreRanges.any { index in it } } @@ -28,15 +27,4 @@ internal object KeywordLocator { return locations } - - private fun findKeywords(code: String, keywords: Set): Set = - TOKEN_DELIMITERS.toTypedArray().let { delimiters -> - code.split(*delimiters, ignoreCase = true) // Split into words - .asSequence() // Reduce amount of operations - .filter { it.isNotBlank() } // Remove empty - .map { it.trim() } // Remove whitespaces from phrase - .map { it.lowercase() } // Standardize - .filter { it in keywords } // Get supported - .toSet() // Filter duplicates - } } \ No newline at end of file diff --git a/src/commonMain/kotlin/dev/snipme/highlights/internal/locator/NumericLiteralLocator.kt b/src/commonMain/kotlin/dev/snipme/highlights/internal/locator/NumericLiteralLocator.kt index 572bc56..056772d 100644 --- a/src/commonMain/kotlin/dev/snipme/highlights/internal/locator/NumericLiteralLocator.kt +++ b/src/commonMain/kotlin/dev/snipme/highlights/internal/locator/NumericLiteralLocator.kt @@ -5,9 +5,9 @@ import dev.snipme.highlights.internal.indicesOf import dev.snipme.highlights.model.PhraseLocation private val NUMBER_START_CHARACTERS = listOf('-', '.') -private val NUMBER_TYPE_CHARACTERS = listOf('e', 'u', 'f', 'l') private val HEX_NUMBER_CHARACTERS = listOf('a', 'b', 'c', 'd', 'e', 'f') private val NUMBER_SPECIAL_CHARACTERS = listOf('_') +public val NUMBER_TYPE_CHARACTERS = listOf('e', 'u', 'f', 'l') internal object NumericLiteralLocator { @@ -67,8 +67,6 @@ internal object NumericLiteralLocator { } private fun calculateNumberLength(number: String): Int { - val letters = number.filter { it.isLetter() } - if (number.startsWith("0x")) { return getLengthOfSubstringFor(number) { it.isDigit() || HEX_NUMBER_CHARACTERS.contains(it) @@ -81,23 +79,47 @@ internal object NumericLiteralLocator { } } - // Highlight only 4f when e.g. number is like 4fff - if (NUMBER_TYPE_CHARACTERS.any { letters.contains(it) }) { - var length = 1 // Single letter - length += number.count { it.isDigit() } - length += number.count { NUMBER_START_CHARACTERS.contains(it) } - length += number.count { NUMBER_SPECIAL_CHARACTERS.contains(it) } - if ("e+" in number) length++ - return length + var length = 0 + var foundE = false + var foundSignAfterE = false + var foundDot = false + var suffixCount = 0 + val maxSuffixes = 1 + + for (i in number.indices) { + val char = number[i] + when { + char.isDigit() -> { + length++ + } + ((char == '-' && i == 0) || char == '_') -> { + length++ + } + char == '.' && !foundDot -> { + foundDot = true + length++ + } + (char.lowercaseChar() == 'e' && !foundE) -> { + foundE = true + length++ + } + ((char == '+' || char == '-') && foundE && !foundSignAfterE) -> { + foundSignAfterE = true + length++ + } + NUMBER_TYPE_CHARACTERS.contains(char) -> { + if (suffixCount < maxSuffixes) { + length++ + suffixCount++ + } else { + break + } + } + else -> break + } } - return number.filter { - it.isDigit() || - NUMBER_START_CHARACTERS.contains(it) || - NUMBER_TYPE_CHARACTERS.contains(it) || - NUMBER_SPECIAL_CHARACTERS.contains(it) - - }.length + return length } private fun getLengthOfSubstringFor(number: String, condition: (Char) -> Boolean): Int { diff --git a/src/commonTest/kotlin/dev/snipme/highlights/internal/ExtensionsKtTest.kt b/src/commonTest/kotlin/dev/snipme/highlights/internal/ExtensionsKtTest.kt index 49f054e..efa606d 100644 --- a/src/commonTest/kotlin/dev/snipme/highlights/internal/ExtensionsKtTest.kt +++ b/src/commonTest/kotlin/dev/snipme/highlights/internal/ExtensionsKtTest.kt @@ -238,8 +238,32 @@ internal class ExtensionsKtTest { val index = 1 val result = "class".isIndependentPhrase(code, index) - assertEquals(true, result) } + + @Test + fun `Returns false for phrase after underscore`() { + val code = "int_class" + val index = 4 + + val result = "class".isIndependentPhrase(code, index) + + assertEquals(false, result) + } + + @Test + fun `Returns true for phrase after number with suffix`() { + val testCases = listOf( + Triple("9eclass", 2, "class"), + Triple("9uclass", 2, "class"), + Triple("9fclass", 2, "class"), + Triple("9lclass", 2, "class") + ) + + testCases.forEach { (code, keywordIndex, keyword) -> + val result = keyword.isIndependentPhrase(code, keywordIndex) + assertEquals(true, result, "Failed for: '$code' at index $keywordIndex for keyword '$keyword'") + } + } } } \ No newline at end of file diff --git a/src/commonTest/kotlin/dev/snipme/highlights/internal/locator/KeywordLocatorTest.kt b/src/commonTest/kotlin/dev/snipme/highlights/internal/locator/KeywordLocatorTest.kt index f2413fb..2e37a1c 100644 --- a/src/commonTest/kotlin/dev/snipme/highlights/internal/locator/KeywordLocatorTest.kt +++ b/src/commonTest/kotlin/dev/snipme/highlights/internal/locator/KeywordLocatorTest.kt @@ -44,7 +44,7 @@ internal class KeywordLocatorTest { @Test fun `Returns location of all keyword next to each other`() { val testCode = "this.class.abcd ) new" - val keywords = setOf("this", "new", "class") + val keywords = setOf("this", "class", "new") val result = KeywordLocator.locate(testCode, keywords) @@ -75,15 +75,15 @@ internal class KeywordLocatorTest { static class Example2 {} } """.trimIndent() - val keywords = setOf("static", "class", "extends") + val keywords = setOf("class", "static", "extends") val result = KeywordLocator.locate(testCode, keywords) assertEquals(4, result.size) assertEquals(PhraseLocation(0, 5), result[0]) assertEquals(PhraseLocation(42, 47), result[1]) - assertEquals(PhraseLocation(14, 21), result[2]) - assertEquals(PhraseLocation(35, 41), result[3]) + assertEquals(PhraseLocation(35, 41), result[2]) + assertEquals(PhraseLocation(14, 21), result[3]) } @Test @@ -147,4 +147,14 @@ internal class KeywordLocatorTest { assertEquals(0, result.size) } + + @Test + fun `Finds keywords after numeric literals`() { + val testCode = "9class" + val keywords = setOf("class") + val result = KeywordLocator.locate(testCode, keywords) + + assertEquals(1, result.size) + assertEquals(PhraseLocation(1, 6), result.first()) + } } \ No newline at end of file diff --git a/src/commonTest/kotlin/dev/snipme/highlights/internal/locator/NumericLiteralLocatorTest.kt b/src/commonTest/kotlin/dev/snipme/highlights/internal/locator/NumericLiteralLocatorTest.kt index bbdfb10..71aa221 100644 --- a/src/commonTest/kotlin/dev/snipme/highlights/internal/locator/NumericLiteralLocatorTest.kt +++ b/src/commonTest/kotlin/dev/snipme/highlights/internal/locator/NumericLiteralLocatorTest.kt @@ -189,4 +189,13 @@ internal class NumericLiteralLocatorTest { assertEquals(PhraseLocation(31, 38), result[5]) assertEquals(PhraseLocation(39, 45), result[6]) } + + @Test + fun `Returns location of the number with keyword after`() { + val testCode = "9class" + + val result = NumericLiteralLocator.locate(testCode) + + assertEquals(PhraseLocation(0, 1), result[0]) + } } \ No newline at end of file