Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 70 additions & 7 deletions src/commonMain/kotlin/dev/snipme/highlights/internal/Extensions.kt
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
package dev.snipme.highlights.internal

import dev.snipme.highlights.internal.locator.NUMBER_TYPE_CHARACTERS
import dev.snipme.highlights.internal.SyntaxTokens.MARK_CHARACTERS
import dev.snipme.highlights.internal.SyntaxTokens.PUNCTUATION_CHARACTERS
import dev.snipme.highlights.model.CodeHighlight
import dev.snipme.highlights.model.PhraseLocation
import kotlinx.coroutines.Job
import kotlinx.serialization.encodeToString
import kotlinx.serialization.json.Json
import kotlinx.coroutines.Job
import kotlin.coroutines.cancellation.CancellationException

fun List<CodeHighlight>.toJson(): String {
Expand All @@ -15,7 +18,7 @@ fun String.phraseLocationSetFromJson(): Set<PhraseLocation> {
return Json.decodeFromString(this)
}

inline operator fun <E> Set<E>.get(i: Int): E? {
operator fun <E> Set<E>.get(i: Int): E? {
this.forEachIndexed { index, t ->
if (i == index) return t
}
Expand Down Expand Up @@ -74,15 +77,75 @@ fun String.isIndependentPhrase(
if (index == code.lastIndex) return true
if (code.length == this.length) return true

val charBefore = code[maxOf(index - 1, 0)]
// Token is at start of the code
val charAfter = code[minOf(index + this.length, code.lastIndex)]

if (index == 0) {
return charAfter.isDigit().not() && charAfter.isLetter().not()
return charAfter.isDigit().not() && charAfter.isLetter().not() && charAfter != '_'
}

// Token is at end of the code
val charBefore = code[maxOf(index - 1, 0)]
if (index + this.length == code.length) {
return (charBefore.isLetter().not() && charBefore != '_') || isAfterNumericSuffix(code, index)
}

// Token is in the middle of the code
return ((charBefore.isLetter().not() || isAfterNumericSuffix(code, index)) &&
charAfter.isDigit().not() && charAfter.isLetter().not() && charAfter != '_')
}

private fun String.isAfterNumericSuffix(code: String, keywordIndex: Int): Boolean {
if (keywordIndex == 0) return false

val charBefore = code[keywordIndex - 1]

// Check if the character before is a valid numeric suffix
val validSuffixes = NUMBER_TYPE_CHARACTERS
if (charBefore !in validSuffixes) return false

// Walk backwards to validate the number structure
var i = keywordIndex - 2
var hasDigit = false
var hasDot = false

while (i >= 0) {
val char = code[i]

when {
char.isDigit() -> {
hasDigit = true
i--
}
char == '.' -> {
if (hasDot) return false
hasDot = true
i--
}
char == '-' -> {
if (i == 0) {
break
} else {
val prevChar = code[i - 1]
if (prevChar.isLetterOrDigit() || prevChar == '_') {
return false
} else {
break
}
}
}
char == '_' -> {
i--
}
char.isLetter() -> {
return false
}
else -> {
break
}
}
}

return charBefore.isLetter().not() &&
charAfter.isDigit().not() && (charAfter == code.last() || charAfter.isLetter().not())
return hasDigit
}

fun Set<PhraseLocation>.toRangeSet(): Set<IntRange> =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,8 @@ internal object KeywordLocator {
ignoreRanges: Set<IntRange> = emptySet(),
): Set<PhraseLocation> {
val locations = mutableSetOf<PhraseLocation>()
val foundKeywords = findKeywords(code, keywords)

foundKeywords.forEach { keyword ->
keywords.forEach { keyword ->
val indices = code
.indicesOf(keyword)
.filterNot { index -> ignoreRanges.any { index in it } }
Expand All @@ -28,15 +27,4 @@ internal object KeywordLocator {

return locations
}

private fun findKeywords(code: String, keywords: Set<String>): Set<String> =
TOKEN_DELIMITERS.toTypedArray().let { delimiters ->
code.split(*delimiters, ignoreCase = true) // Split into words
.asSequence() // Reduce amount of operations
.filter { it.isNotBlank() } // Remove empty
.map { it.trim() } // Remove whitespaces from phrase
.map { it.lowercase() } // Standardize
.filter { it in keywords } // Get supported
.toSet() // Filter duplicates
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ import dev.snipme.highlights.internal.indicesOf
import dev.snipme.highlights.model.PhraseLocation

private val NUMBER_START_CHARACTERS = listOf('-', '.')
private val NUMBER_TYPE_CHARACTERS = listOf('e', 'u', 'f', 'l')
private val HEX_NUMBER_CHARACTERS = listOf('a', 'b', 'c', 'd', 'e', 'f')
private val NUMBER_SPECIAL_CHARACTERS = listOf('_')
public val NUMBER_TYPE_CHARACTERS = listOf('e', 'u', 'f', 'l')

internal object NumericLiteralLocator {

Expand Down Expand Up @@ -67,8 +67,6 @@ internal object NumericLiteralLocator {
}

private fun calculateNumberLength(number: String): Int {
val letters = number.filter { it.isLetter() }

if (number.startsWith("0x")) {
return getLengthOfSubstringFor(number) {
it.isDigit() || HEX_NUMBER_CHARACTERS.contains(it)
Expand All @@ -81,23 +79,47 @@ internal object NumericLiteralLocator {
}
}

// Highlight only 4f when e.g. number is like 4fff
if (NUMBER_TYPE_CHARACTERS.any { letters.contains(it) }) {
var length = 1 // Single letter
length += number.count { it.isDigit() }
length += number.count { NUMBER_START_CHARACTERS.contains(it) }
length += number.count { NUMBER_SPECIAL_CHARACTERS.contains(it) }
if ("e+" in number) length++
return length
var length = 0
var foundE = false
var foundSignAfterE = false
var foundDot = false
var suffixCount = 0
val maxSuffixes = 1

for (i in number.indices) {
val char = number[i]
when {
char.isDigit() -> {
length++
}
((char == '-' && i == 0) || char == '_') -> {
length++
}
char == '.' && !foundDot -> {
foundDot = true
length++
}
(char.lowercaseChar() == 'e' && !foundE) -> {
foundE = true
length++
}
((char == '+' || char == '-') && foundE && !foundSignAfterE) -> {
foundSignAfterE = true
length++
}
NUMBER_TYPE_CHARACTERS.contains(char) -> {
if (suffixCount < maxSuffixes) {
length++
suffixCount++
} else {
break
}
}
else -> break
}
}

return number.filter {
it.isDigit() ||
NUMBER_START_CHARACTERS.contains(it) ||
NUMBER_TYPE_CHARACTERS.contains(it) ||
NUMBER_SPECIAL_CHARACTERS.contains(it)

}.length
return length
}

private fun getLengthOfSubstringFor(number: String, condition: (Char) -> Boolean): Int {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1312,3 +1312,26 @@ val longJavaCode = """
}
}
""".trimIndent()

val longPythonCode = """
import polars as pl

def transform_dataframe(df: pl.DataFrame, max_seq_len: int, end_action_value: int) -> pl.DataFrame:
end_val = pl.lit(end_action_value)
max_len = pl.lit(max_seq_len)
df_with_ends = df.with_columns(
end_indices=pl.col("actionType").list.eval(
pl.int_range(0, pl.len()).filter(pl.element() == end_val)
)
)
df_exploded = df_with_ends.explode("end_indices")
start_offset = (pl.col("end_indices") + 1 - max_len).clip(lower_bound=0)
slice_len = pl.col("end_indices") - start_offset + 1
list_cols = ["actionType", "engagementTimeMs", "actionTweetIds"]
for col in list_cols:
df_exploded = df_exploded.with_columns(
pl.col(col).list.slice(start_offset, slice_len).alias(col)
)
result = df_exploded.select(["userId", "actionType", "engagementTimeMs", "actionTweetIds"])
return result
""".trimIndent()
Original file line number Diff line number Diff line change
Expand Up @@ -238,8 +238,32 @@ internal class ExtensionsKtTest {
val index = 1

val result = "class".isIndependentPhrase(code, index)

assertEquals(true, result)
}

@Test
fun `Returns false for phrase after underscore`() {
val code = "int_class"
val index = 4

val result = "class".isIndependentPhrase(code, index)

assertEquals(false, result)
}

@Test
fun `Returns true for phrase after number with suffix`() {
val testCases = listOf(
Triple("9eclass", 2, "class"),
Triple("9uclass", 2, "class"),
Triple("9fclass", 2, "class"),
Triple("9lclass", 2, "class")
)

testCases.forEach { (code, keywordIndex, keyword) ->
val result = keyword.isIndependentPhrase(code, keywordIndex)
assertEquals(true, result, "Failed for: '$code' at index $keywordIndex for keyword '$keyword'")
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package dev.snipme.highlights.internal.language

import dev.snipme.highlights.Highlights
import dev.snipme.highlights.internal.longPythonCode
import dev.snipme.highlights.model.SyntaxLanguage
import kotlin.test.Test
import kotlin.test.assertEquals

class PythonTest {

@Test
fun test() {
val result = Highlights.Builder()
.code(longPythonCode)
.language(SyntaxLanguage.PYTHON)
.build()
.getCodeStructure()

assertEquals(6, result.keywords.size)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ internal class KeywordLocatorTest {
@Test
fun `Returns location of all keyword next to each other`() {
val testCode = "this.class.abcd ) new"
val keywords = setOf("this", "new", "class")
val keywords = setOf("this", "class", "new")

val result = KeywordLocator.locate(testCode, keywords)

Expand Down Expand Up @@ -75,15 +75,15 @@ internal class KeywordLocatorTest {
static class Example2 {}
}
""".trimIndent()
val keywords = setOf("static", "class", "extends")
val keywords = setOf("class", "static", "extends")

val result = KeywordLocator.locate(testCode, keywords)

assertEquals(4, result.size)
assertEquals(PhraseLocation(0, 5), result[0])
assertEquals(PhraseLocation(42, 47), result[1])
assertEquals(PhraseLocation(14, 21), result[2])
assertEquals(PhraseLocation(35, 41), result[3])
assertEquals(PhraseLocation(35, 41), result[2])
assertEquals(PhraseLocation(14, 21), result[3])
}

@Test
Expand Down Expand Up @@ -147,4 +147,14 @@ internal class KeywordLocatorTest {

assertEquals(0, result.size)
}

@Test
fun `Finds keywords after numeric literals`() {
val testCode = "9class"
val keywords = setOf("class")
val result = KeywordLocator.locate(testCode, keywords)

assertEquals(1, result.size)
assertEquals(PhraseLocation(1, 6), result.first())
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -189,4 +189,13 @@ internal class NumericLiteralLocatorTest {
assertEquals(PhraseLocation(31, 38), result[5])
assertEquals(PhraseLocation(39, 45), result[6])
}

@Test
fun `Returns location of the number with keyword after`() {
val testCode = "9class"

val result = NumericLiteralLocator.locate(testCode)

assertEquals(PhraseLocation(0, 1), result[0])
}
}