Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
b07ed78
fix: Align emoji background rendering with text positioning
kshivang Dec 21, 2025
2557506
fix: Improve emoji rendering with explicit font and correct width cla…
kshivang Dec 23, 2025
17de181
fix: Add grapheme-aware selection rendering for emoji
kshivang Dec 23, 2025
dc57570
refactor: Address code review feedback for emoji selection
kshivang Dec 23, 2025
6c647cf
fix: Address code review - gender symbol ZWJ check and docs
kshivang Dec 23, 2025
f92209b
refactor: Extract shared CharacterAnalysis helper to eliminate duplic…
kshivang Dec 23, 2025
b4cc6d0
fix: Extend emoji coverage and extract skip logic
kshivang Dec 23, 2025
6d4c31c
fix: Remove media control symbols from 2-cell emoji list
kshivang Dec 23, 2025
560473a
fix: Align renderer emoji classification with buffer
kshivang Dec 23, 2025
88b4622
perf: Cache character analysis to avoid redundant computation
kshivang Dec 23, 2025
eac56b9
feat: Add support for flag emoji (Regional Indicator sequences)
kshivang Dec 23, 2025
1399b6a
refactor: Deduplicate emoji classification logic
kshivang Dec 23, 2025
dbb6cd9
fix: Prevent index out of bounds in Regional Indicator handling
kshivang Dec 23, 2025
84d1da3
refactor: Deduplicate skip logic in renderBackgrounds()
kshivang Dec 23, 2025
6d77c4c
refactor: Extract Unicode constants to shared UnicodeConstants.kt
kshivang Dec 23, 2025
9097cd3
refactor: Add combining character ranges to UnicodeConstants
kshivang Dec 23, 2025
70f23fb
refactor: Remove unused isGraphemeExtender(Int) overload
kshivang Dec 23, 2025
e1070dc
refactor: Add emoji presentation constants to UnicodeConstants
kshivang Dec 23, 2025
abbc8f8
refactor: Return exact column count from checkRegionalIndicatorSequence
kshivang Dec 23, 2025
5a8d48a
docs: Add KDoc for character analysis edge cases and width properties
kshivang Dec 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
package ai.rever.bossterm.terminal.util

import ai.rever.bossterm.terminal.model.TerminalLine

/**
* Utility functions for converting between buffer columns and visual columns.
*
* Buffer columns include DWC markers, surrogate pairs, variation selectors, etc.
* Visual columns represent what the user sees on screen.
*/
object ColumnConversionUtils {

/**
* Result of checking if a character should be skipped during column iteration.
* @param shouldSkip True if the character should be skipped
* @param colsToAdvance Number of columns to advance (1 for single char, 2 for surrogate pair)
*/
data class SkipResult(val shouldSkip: Boolean, val colsToAdvance: Int = 1)

/**
* Check if character at given column should be skipped (doesn't consume visual space).
* This encapsulates the common skip logic used by column conversion and rendering.
*
* Characters that don't consume visual space:
* - DWC markers (placeholder for second cell of double-width char)
* - Variation selectors (FE0E, FE0F)
* - Zero-Width Joiner (ZWJ)
* - Low surrogates (part of previous high surrogate)
* - Skin tone modifiers (when part of emoji sequence)
* - Gender symbols (when preceded by ZWJ)
*/
fun shouldSkipChar(line: TerminalLine, col: Int, width: Int): SkipResult {
val char = line.charAt(col)

// Skip DWC markers (they don't add visual width)
if (char == CharUtils.DWC) {
return SkipResult(true, 1)
}

// Skip variation selectors (FE0E, FE0F)
if (UnicodeConstants.isVariationSelector(char)) {
return SkipResult(true, 1)
}

// Skip ZWJ (U+200D)
if (char.code == UnicodeConstants.ZWJ) {
return SkipResult(true, 1)
}

// Skip low surrogates (they're part of previous high surrogate)
if (Character.isLowSurrogate(char)) {
return SkipResult(true, 1)
}

// Skip skin tone modifiers (U+1F3FB-U+1F3FF, encoded as surrogate pairs)
if (Character.isHighSurrogate(char) && col + 1 < width) {
val nextChar = line.charAt(col + 1)
if (Character.isLowSurrogate(nextChar)) {
val codePoint = Character.toCodePoint(char, nextChar)
if (UnicodeConstants.isSkinToneModifier(codePoint)) {
return SkipResult(true, 2)
}
}
}

// Skip gender symbols only when preceded by ZWJ (part of ZWJ sequences)
if (UnicodeConstants.isGenderSymbol(char.code)) {
if (col > 0 && line.charAt(col - 1).code == UnicodeConstants.ZWJ) {
return SkipResult(true, 1)
}
}

return SkipResult(false, 0)
}

/**
* Convert buffer column to visual column.
* Accounts for DWC markers, surrogate pairs, ZWJ sequences, and other grapheme extenders
* that don't consume visual space.
*
* @param line The terminal line to analyze
* @param bufferCol The buffer column to convert
* @param width The terminal width (max columns)
* @return The visual column corresponding to the buffer column
*/
fun bufferColToVisualCol(line: TerminalLine, bufferCol: Int, width: Int): Int {
if (bufferCol <= 0) return 0

var visualCol = 0
var col = 0

while (col < bufferCol && col < width) {
val skipResult = shouldSkipChar(line, col, width)
if (skipResult.shouldSkip) {
col += skipResult.colsToAdvance
continue
}

// Regular character - count visual width (1 or 2 for double-width)
visualCol += getCharacterVisualWidth(line, col, width)
col++
}
return visualCol
}

/**
* Convert visual column to buffer column.
* Returns the buffer column at the START of the grapheme at the given visual position.
* This is used for click handling to snap to grapheme boundaries.
*
* @param line The terminal line to analyze
* @param visualCol The visual column to convert
* @param width The terminal width (max columns)
* @return The buffer column corresponding to the visual column
*/
fun visualColToBufferCol(line: TerminalLine, visualCol: Int, width: Int): Int {
if (visualCol <= 0) return 0

var currentVisualCol = 0
var col = 0

while (col < width && currentVisualCol < visualCol) {
val skipResult = shouldSkipChar(line, col, width)
if (skipResult.shouldSkip) {
col += skipResult.colsToAdvance
continue
}

// Regular character - count visual width
val charWidth = getCharacterVisualWidth(line, col, width)

// Check if visualCol falls within this character's visual range
if (visualCol < currentVisualCol + charWidth) {
return col // Snap to start of this grapheme
}

currentVisualCol += charWidth
col++
}
return col
}

/**
* Get visual width of character at buffer position (1 or 2 cells).
*
* Detection strategy: Look ahead through grapheme extenders to find DWC marker.
* If DWC follows, character is double-width.
*
* @param line The terminal line
* @param col The buffer column
* @param width The terminal width
* @return 1 for single-width, 2 for double-width characters
*/
fun getCharacterVisualWidth(line: TerminalLine, col: Int, width: Int): Int {
if (col >= width) return 1

val char = line.charAt(col)

// Simple case: next char is DWC (single BMP double-width char like CJK)
if (col + 1 < width && line.charAt(col + 1) == CharUtils.DWC) {
return 2
}

// For surrogate pairs and complex graphemes, scan forward through extenders to find DWC
if (Character.isHighSurrogate(char)) {
var nextCol = col + 1
while (nextCol < width) {
val nextChar = line.charAt(nextCol)
// Found DWC marker - this grapheme is double-width
if (nextChar == CharUtils.DWC) return 2
// Continue through grapheme extenders
if (Character.isLowSurrogate(nextChar) ||
UnicodeConstants.isVariationSelector(nextChar) ||
nextChar.code == UnicodeConstants.ZWJ ||
UnicodeConstants.isGenderSymbol(nextChar.code)) {
nextCol++
continue
}
// Check for skin tone modifier (surrogate pair starting with high surrogate)
if (Character.isHighSurrogate(nextChar) && nextCol + 1 < width) {
val afterNext = line.charAt(nextCol + 1)
if (Character.isLowSurrogate(afterNext)) {
val cp = Character.toCodePoint(nextChar, afterNext)
if (UnicodeConstants.isSkinToneModifier(cp)) {
nextCol += 2
continue
}
}
}
break
}
}

return 1 // Default: single width
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@ object GraphemeBoundaryUtils {
private fun needsGraphemeAnalysis(c: Char): Boolean {
return c.isHighSurrogate() ||
c.isLowSurrogate() ||
c.code == 0x200D || // ZWJ
c.code == 0xFE0E || c.code == 0xFE0F || // Variation selectors
c.code in 0x0300..0x036F || // Combining diacritics
c.code == UnicodeConstants.ZWJ ||
UnicodeConstants.isVariationSelector(c) ||
UnicodeConstants.isCombiningDiacritic(c.code) ||
GraphemeUtils.isGraphemeExtender(c)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,8 @@ data class GraphemeCluster(
val isEmoji: Boolean
get() {
if (codePoints.isEmpty()) return false
val first = codePoints[0]
return when {
// Emoji & Pictographs (U+1F300-U+1F9FF)
first in 0x1F300..0x1F9FF -> true
// Emoticons (U+1F600-U+1F64F)
first in 0x1F600..0x1F64F -> true
// Transport & Map Symbols (U+1F680-U+1F6FF)
first in 0x1F680..0x1F6FF -> true
// Supplemental Symbols (U+1F900-U+1F9FF)
first in 0x1F900..0x1F9FF -> true
// Misc Symbols with emoji presentation
hasVariationSelector(0xFE0F) -> true
else -> false
}
return GraphemeUtils.isEmojiPresentation(codePoints[0]) ||
hasVariationSelector(UnicodeConstants.VARIATION_SELECTOR_EMOJI)
}

/**
Expand All @@ -58,22 +46,22 @@ data class GraphemeCluster(
return if (selector != null) {
codePoints.contains(selector)
} else {
codePoints.contains(0xFE0E) || codePoints.contains(0xFE0F)
codePoints.any { UnicodeConstants.isVariationSelector(it) }
}
}

/**
* Checks if this grapheme contains a skin tone modifier (U+1F3FB-U+1F3FF).
*/
val hasSkinTone: Boolean
get() = codePoints.any { it in 0x1F3FB..0x1F3FF }
get() = codePoints.any { UnicodeConstants.isSkinToneModifier(it) }

/**
* Checks if this grapheme contains a Zero-Width Joiner (U+200D).
* ZWJ is used to join multiple emoji into a single visual unit.
*/
val hasZWJ: Boolean
get() = codePoints.contains(0x200D)
get() = codePoints.contains(UnicodeConstants.ZWJ)

/**
* Checks if this grapheme is a surrogate pair (outside BMP, U+10000+).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,9 @@ class GraphemeMetadata private constructor(
val c = text[i]
// Check for surrogates, emoji, combining characters
if (c.isHighSurrogate() || c.isLowSurrogate() ||
c.code in 0x0300..0x036F || // Combining diacritics
c.code == 0x200D || // ZWJ
c.code == 0xFE0E || c.code == 0xFE0F // Variation selectors
UnicodeConstants.isCombiningDiacritic(c.code) ||
c.code == UnicodeConstants.ZWJ ||
UnicodeConstants.isVariationSelector(c)
) {
needsMetadata = true
break
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,12 @@ object GraphemeUtils {

// Fast path: single BMP character
if (grapheme.length == 1) {
return CharUtils.mk_wcwidth(grapheme[0].code, ambiguousIsDWC).coerceAtLeast(0)
val codePoint = grapheme[0].code
// Check for emoji with Emoji_Presentation=Yes (should be 2 cells by default)
if (isEmojiPresentation(codePoint)) {
return 2
}
return CharUtils.mk_wcwidth(codePoint, ambiguousIsDWC).coerceAtLeast(0)
}

// Check cache
Expand Down Expand Up @@ -147,16 +152,22 @@ object GraphemeUtils {
}

// Check for ZWJ sequence (multiple emoji joined)
if (codePoints.contains(0x200D)) {
if (codePoints.contains(UnicodeConstants.ZWJ)) {
// ZWJ sequence: treat as single emoji (width 2)
return 2
}

// Check for Regional Indicator sequence (flag emoji)
// Two consecutive Regional Indicators form a flag (e.g., 🇺🇸 = U+1F1FA + U+1F1F8)
if (codePoints.size >= 2 && codePoints.all { UnicodeConstants.isRegionalIndicator(it) }) {
return 2 // Flag emoji
}

// Check for variation selector
val hasVariationSelector = codePoints.contains(0xFE0E) || codePoints.contains(0xFE0F)
val hasVariationSelector = codePoints.any { UnicodeConstants.isVariationSelector(it) }

// Check for skin tone modifier
val hasSkinTone = codePoints.any { it in 0x1F3FB..0x1F3FF }
val hasSkinTone = codePoints.any { UnicodeConstants.isSkinToneModifier(it) }

// For emoji with variation selector or skin tone, calculate base emoji width only
if (hasVariationSelector || hasSkinTone) {
Expand All @@ -167,12 +178,18 @@ object GraphemeUtils {
// Emoji are typically width 2
return when {
baseWidth == 2 -> 2
baseWidth == 1 && (hasVariationSelector || hasSkinTone) -> 2 // Emoji presentation
baseWidth == 1 -> 2 // Emoji presentation
baseWidth <= 0 -> 0
else -> baseWidth
}
}

// Check for single emoji with Emoji_Presentation=Yes (e.g., ✅, ⭐)
// These should be 2 cells even without variation selector
if (codePoints.size == 1 && isEmojiPresentation(codePoints.first())) {
return 2
}

// For combining character sequences, only count base character
var totalWidth = 0
var isFirst = true
Expand All @@ -197,6 +214,27 @@ object GraphemeUtils {
return totalWidth
}

/**
* Checks if a code point should render as emoji (2 cells width) by default.
*
* This covers:
* - Supplementary plane emoji (U+1F000+) which are always 2-cell wide
* - BMP characters that are UNAMBIGUOUSLY emoji (not commonly used as text symbols)
*
* NOTE: Many BMP symbols (▶◀⏹⏺ etc.) are intentionally NOT included here because
* they are often used as 1-cell text symbols in TUI applications. They will render
* as 2-cell emoji ONLY when followed by variation selector FE0F.
*
* Used by both buffer (for DWC markers) and renderer (for font selection).
*
* @param codePoint The Unicode code point to check
* @return True if this character should render as 2 cells by default
*/
fun isEmojiPresentation(codePoint: Int): Boolean {
return UnicodeConstants.isSupplementaryPlaneEmoji(codePoint) ||
UnicodeConstants.isBmpEmoji(codePoint)
}

/**
* Checks if a character is a grapheme extender (ZWJ, variation selector, skin tone, combining).
* Used for incremental grapheme boundary detection in streaming scenarios.
Expand All @@ -206,30 +244,13 @@ object GraphemeUtils {
*/
fun isGraphemeExtender(c: Char): Boolean {
return when (c.code) {
0x200D -> true // Zero-Width Joiner
0xFE0E, 0xFE0F -> true // Variation selectors
in 0x0300..0x036F -> true // Combining diacritics
in 0x1F3FB..0x1F3FF -> true // Skin tone modifiers (requires surrogate pair check)
in 0x20D0..0x20FF -> true // Combining marks for symbols
in 0x0591..0x05BD -> true // Hebrew combining marks
in 0x0610..0x061A -> true // Arabic combining marks
else -> false
}
}

/**
* Checks if a code point is a grapheme extender.
* More accurate than the Char version for supplementary plane characters.
*/
fun isGraphemeExtender(codePoint: Int): Boolean {
return when (codePoint) {
0x200D -> true // ZWJ
0xFE0E, 0xFE0F -> true // Variation selectors
in 0x0300..0x036F -> true // Combining diacritics
in 0x1F3FB..0x1F3FF -> true // Skin tone modifiers
in 0x20D0..0x20FF -> true // Combining marks for symbols
in 0x0591..0x05BD -> true // Hebrew combining marks
in 0x0610..0x061A -> true // Arabic combining marks
UnicodeConstants.ZWJ -> true // Zero-Width Joiner
UnicodeConstants.VARIATION_SELECTOR_TEXT, UnicodeConstants.VARIATION_SELECTOR_EMOJI -> true // Variation selectors
in UnicodeConstants.COMBINING_DIACRITICS_RANGE -> true // Combining diacritics
in UnicodeConstants.SKIN_TONE_RANGE -> true // Skin tone modifiers (requires surrogate pair check)
in UnicodeConstants.COMBINING_MARKS_FOR_SYMBOLS_RANGE -> true // Combining marks for symbols
in UnicodeConstants.HEBREW_COMBINING_MARKS_RANGE -> true // Hebrew combining marks
in UnicodeConstants.ARABIC_COMBINING_MARKS_RANGE -> true // Arabic combining marks
else -> false
}
}
Expand Down
Loading