Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 63 additions & 2 deletions src/chat_sdk/shared/streaming_markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,29 @@ def _strip_fenced_code(text: str) -> str:
return "\n".join(result_lines)


def _is_excluded_asterisk(stripped: str, run_start: int, run_len: int, ch: str) -> bool:
"""Whether a single-``*`` run at *run_start* should be excluded from the
emphasis tally per CommonMark's flanking rules.

A ``*`` flanked by whitespace (space, tab, newline, or text boundary)
on both sides is not a valid emphasis delimiter, and this same check
covers line-leading bullet list markers (``* item``). Mirrors
``shouldSkipAsterisk`` in upstream remend's ``emphasis-handlers.ts``.

Only single-``*`` runs are subject to this check -- ``**`` / ``***``
runs are evaluated by the bold pairing logic and have their own
word-boundary semantics. ``_``-emphasis isn't ambiguous with list
markers, so it's not affected here either.
"""
if ch != "*" or run_len != 1:
return False
prev_char = stripped[run_start - 1] if run_start > 0 else None
next_char = stripped[run_start + run_len] if run_start + run_len < len(stripped) else None
prev_ws = prev_char is None or prev_char in (" ", "\t", "\n")
next_ws = next_char is None or next_char in (" ", "\t", "\n")
return prev_ws and next_ws


def _close_emphasis(result: str, stripped: str, ch: str) -> str:
"""Close unclosed bold/italic emphasis for a single marker character.

Expand All @@ -66,6 +89,11 @@ def _close_emphasis(result: str, stripped: str, ch: str) -> str:
level: a run of 2+ characters opens/closes bold first, then any
remaining single character opens/closes italic.

Whitespace-flanked single ``*`` runs are excluded via
:func:`_is_excluded_asterisk` -- this covers line-leading bullets
(``* item``) and any other non-delimiter asterisks per CommonMark's
flanking rules (issue #69).

To guarantee idempotency the suffix is separated from any trailing
marker run by a zero-width space so it cannot merge with existing
characters and create new openers on re-scan.
Expand All @@ -78,10 +106,13 @@ def _close_emphasis(result: str, stripped: str, ch: str) -> str:
i += 2
continue
if stripped[i] == ch:
run_start = i
run_len = 0
while i < len(stripped) and stripped[i] == ch:
run_len += 1
i += 1
if _is_excluded_asterisk(stripped, run_start, run_len, ch):
continue
runs.append(run_len)
else:
i += 1
Expand Down Expand Up @@ -231,6 +262,7 @@ def _get_committable_prefix(text: str) -> str:
# Walk backward to find consecutive table-like lines at the end
held_count = 0
separator_found = False
separator_idx = -1

for i in range(len(lines) - 1, -1, -1):
trimmed = lines[i].strip()
Expand All @@ -241,17 +273,46 @@ def _get_committable_prefix(text: str) -> str:

if TABLE_SEPARATOR_RE.match(trimmed):
separator_found = True
separator_idx = i
break

if TABLE_ROW_RE.match(trimmed):
held_count += 1
else:
break

if separator_found or held_count == 0:
if separator_found and held_count == 0:
# Separator present but no body row beneath it yet. Append-only
# consumers (Slack `chat.appendStream`, etc.) parse each delta
# independently; emitting a header+separator without rows produces
# broken syntax. Hold the entire pre-separator table block until a
# row arrives (issue #69).
#
# The backward walk must also stop at empty lines (end of the
# current block) and at prior separators -- prior separators mean
# the candidate "new header" rows above them were already
# committed as body rows of a previously confirmed table.
# Rolling them back would violate the monotonic append-only
# contract; in that case fall through to "commit everything"
# instead (PR #99 review finding).
table_start = separator_idx
rolled_back = False
for i in range(separator_idx - 1, -1, -1):
trimmed = lines[i].strip()
if trimmed == "" or TABLE_ROW_RE.match(trimmed) is None:
break
if TABLE_SEPARATOR_RE.match(trimmed):
rolled_back = True
break
table_start = i
Comment on lines +300 to +307
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Stop backward table hold at earlier separator lines

When separator_found and held_count == 0, the backward scan uses TABLE_ROW_RE to extend table_start, but separator lines also match TABLE_ROW_RE. In a stream like |A|B|\n|---|---|\n|1|2|\n|C|D|\n|---|---|\n, this makes table_start walk into the already-committed block and _get_committable_prefix collapses from the previous committed prefix to "", violating the append-only monotonic contract of get_committable_text(). Adapters that compute deltas from prior committed length can then emit incorrect/no deltas for that turn; the scan should stop when it encounters an earlier separator (or otherwise avoid capturing a previously confirmed table block).

Useful? React with 👍 / 👎.

if rolled_back:
return text
commit_line_count = table_start
elif separator_found or held_count == 0:
return text
else:
commit_line_count = len(lines) - held_count

commit_line_count = len(lines) - held_count
committed_lines = lines[:commit_line_count]

result = "\n".join(committed_lines)
Expand Down
201 changes: 190 additions & 11 deletions tests/test_streaming_markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,15 +122,25 @@ def test_should_hold_back_trailing_table_header_lines(self):
assert "| A | B |" not in result
assert "Text" in result

def test_should_confirm_table_when_separator_arrives(self):
def test_should_confirm_table_when_separator_and_body_row_arrive(self):
r = StreamingMarkdownRenderer()
r.push("Text\n\n| A | B |\n")
assert "| A | B |" not in r.render()

# Separator alone with no body row is still held back: append-only
# consumers (e.g. Slack chat.appendStream) parse each delta
# independently and a header+separator emission with zero rows is
# broken syntax. See issue #69.
r.push("|---|---|\n")
assert "| A | B |" not in r.render()
assert "|---|---|" not in r.render()

# First body row releases the entire confirmed table atomically.
r.push("| 1 | 2 |\n")
result = r.render()
assert "| A | B |" in result
assert "|---|---|" in result
assert "| 1 | 2 |" in result

def test_should_release_held_lines_when_next_line_is_not_a_table_row(self):
r = StreamingMarkdownRenderer()
Expand Down Expand Up @@ -240,20 +250,34 @@ def test_should_confirm_table_with_alignment_markers_in_separator(self):
r.push("| Left | Center | Right |\n")
assert "| Left |" not in r.render()

# Separator alone is held; first body row releases the table.
r.push("|:---|:---:|---:|\n")
assert "| Left |" not in r.render()

r.push("| 1 | 2 | 3 |\n")
result = r.render()
assert "| Left | Center | Right |" in result
assert "|:---|:---:|---:|" in result
assert "| 1 | 2 | 3 |" in result

def test_should_not_hold_data_rows_after_confirmed_separator(self):
r = StreamingMarkdownRenderer()
# Header+separator alone is held until a body row arrives (issue #69).
r.push("| A | B |\n|---|---|\n")
assert "|---|---|" in r.render()
assert "|---|---|" not in r.render()

# First body row releases the entire table block atomically.
r.push("| 1 | 2 |\n")
result = r.render()
assert "| A | B |" in result
assert "|---|---|" in result
assert "| 1 | 2 |" in result

# Subsequent rows commit immediately.
r.push("| 3 | 4 |\n")
result = r.render()
assert "| 3 | 4 |" in result

def test_should_handle_multiple_push_calls_before_single_render(self):
r = StreamingMarkdownRenderer()
r.push("| A ")
Expand All @@ -273,7 +297,12 @@ def test_should_handle_table_header_split_across_chunks(self):
r.push(" | B |\n")
assert "| A | B |" not in r.render()

# Separator alone keeps the table held (issue #69).
r.push("|---|---|\n")
assert "| A | B |" not in r.render()

# First body row releases the assembled table.
r.push("| 1 | 2 |\n")
assert "| A | B |" in r.render()


Expand Down Expand Up @@ -450,22 +479,23 @@ def test_getcommittabletext_delta_should_stream_table_in_code_fence(self):
delta = committable[len(last_appended) :]
assert delta == ""

# Push separator -- table confirmed
# Push separator -- still held; header+separator without a body row
# would be broken markup for append-only consumers (issue #69).
r.push("|---|---|\n")
committable = r.get_committable_text()
delta = committable[len(last_appended) :]
assert "```" in delta
assert "| A | B |" in delta
assert "|---|---|" in delta
last_appended = committable
assert delta == ""

# Push data row
# First data row releases header+separator+row atomically.
r.push("| 1 | 2 |\n")
committable = r.get_committable_text()
delta = committable[len(last_appended) :]
assert "```" in delta
assert "| A | B |" in delta
assert "|---|---|" in delta
assert "| 1 | 2 |" in delta
# Should NOT have a closing ```
assert "```" not in delta
assert "```\n```" not in delta
last_appended = committable

# Blank line ends table -- closes code fence
Expand Down Expand Up @@ -740,13 +770,17 @@ def test_should_render_realworld_table_with_singledash_separators_progressively(
assert "| ID |" not in result
assert "Here's a table" in result

# Separator alone is held -- need a body row to confirm (issue #69).
r.push("| - | - | - | - | - | - | - | - |\n")
result = r.render()
assert "| ID |" in result
assert "| - |" in result
assert "| ID |" not in result
assert "| - |" not in result

# First body row releases header+separator+row atomically.
r.push("| 1 | Sarah Johnson | Engineering | 32 | $95,000 | Seattle | 2019-03-15 | Active |\n")
result = r.render()
assert "| ID |" in result
assert "| - |" in result
assert "Sarah Johnson" in result

r.push("| 2 | Michael")
Expand Down Expand Up @@ -910,3 +944,148 @@ def test_getcommittabletext_is_always_clean_remend_would_not_add_markers(self):
f'("{self.COMPLEX_MARKDOWN[: i + 1][-20:]}") has unclosed markers: '
f'"{committable[-40:]}"'
)


# ============================================================================
# Issue #69 regressions: list-marker awareness + table chunk-boundary
# ============================================================================


class TestIssue69Regressions:
"""Pin the three production bugs from issue #69 (comment 4514752058).

The hand-rolled ``_remend`` previously confused line-leading bullet
markers with italic openers, and ``_get_committable_prefix`` emitted
table header+separator without a body row -- both produced visible
corruption in Slack streaming.
"""

def test_remend_does_not_treat_line_leading_star_as_italic(self):
# Single bullet item -- a literal `* item one\n` is unchanged.
assert _remend("* item one\n") == "* item one\n"

def test_remend_does_not_close_italic_on_multi_bullet_list(self):
# Three bullets, odd count: previously appended a stray `*`.
text = "* item one\n* item two\n* item three\n"
assert _remend(text) == text

def test_finish_on_odd_count_bullet_list_does_not_corrupt(self):
r = StreamingMarkdownRenderer(wrap_tables_for_append=False)
r.push("* item one\n* item two\n* item three\n")
assert r.finish() == "* item one\n* item two\n* item three\n"

def test_remend_still_closes_genuine_italic(self):
# *hello (no following whitespace) is genuine emphasis, not a list.
assert _remend("*hello") == "*hello*"

def test_remend_still_closes_genuine_bold(self):
assert _remend("**bold") == "**bold**"

def test_remend_handles_bold_inside_list_item(self):
# Bullet marker is skipped; bold inside the item is balanced.
text = "* **important** item\n"
assert _remend(text) == text

def test_remend_closes_unclosed_bold_inside_list_item(self):
# Bullet skipped, then an unclosed `**` should still get closed.
assert _remend("* **important") == "* **important**"

def test_remend_handles_indented_bullet(self):
# Up to leading whitespace before the bullet -- still a list marker.
assert _remend(" * nested item\n") == " * nested item\n"

def test_remend_skips_whitespace_flanked_asterisk_mid_line(self):
# CommonMark: `*` flanked by whitespace on both sides isn't a valid
# delimiter. Previously counted as an italic opener.
assert _remend("use the * operator") == "use the * operator"

def test_remend_skips_trailing_asterisk_at_end_of_line(self):
# `*\n` -- next char is whitespace, prev is whitespace -- not a delimiter.
assert _remend("trailing star *\nmore text") == "trailing star *\nmore text"

def test_remend_skips_bare_asterisk_at_end_of_buffer(self):
# `*` at end of stream with whitespace before -- not a delimiter yet.
assert _remend("partial *") == "partial *"

def test_table_header_plus_separator_alone_is_held(self):
# Header+separator without a body row would be emitted as broken
# markup to append-only consumers. Hold the whole block.
r = StreamingMarkdownRenderer(wrap_tables_for_append=False)
r.push("| ID | Status |\n|---|---|\n")
assert r.get_committable_text() == ""

def test_table_chunk_boundary_emits_atomic_header_separator_row(self):
# Reproduces the exact chunk sequence from issue #69 comment.
r = StreamingMarkdownRenderer(wrap_tables_for_append=False)
chunks = [
"Header:\n\n",
"| ID",
" | Status |\n",
"|---|---|\n",
"| 1 | Open |\n",
"| 2 | Closed |\n",
]
last = ""
deltas: list[str] = []
for c in chunks:
r.push(c)
cur = r.get_committable_text()
deltas.append(cur[len(last) :])
last = cur

assert deltas[0] == "Header:\n\n"
# Header line, separator line, and split-header chunk all held.
assert deltas[1] == ""
assert deltas[2] == ""
assert deltas[3] == ""
# First body row releases the assembled table atomically.
assert deltas[4] == "| ID | Status |\n|---|---|\n| 1 | Open |\n"
# Subsequent rows commit immediately.
assert deltas[5] == "| 2 | Closed |\n"

def test_table_with_preceding_text_holds_only_table_block(self):
# Prose above an unconfirmed table commits; only the table holds.
r = StreamingMarkdownRenderer(wrap_tables_for_append=False)
r.push("Intro paragraph.\n\n| H |\n|---|\n")
assert r.get_committable_text() == "Intro paragraph.\n\n"

def test_table_held_block_flushes_on_finish_even_without_body_row(self):
# finish() is the terminal flush -- even an incomplete table is emitted.
r = StreamingMarkdownRenderer(wrap_tables_for_append=False)
r.push("| H |\n|---|\n")
final = r.finish()
assert "| H |" in final
assert "|---|" in final

def test_back_to_back_tables_keep_committable_monotonic(self):
# PR #99 review #2: when a second table's separator arrives in a
# stream that already had one confirmed table (and no blank line
# between them), the backward "hold pre-separator block" walk
# would roll back into the previously-committed first-table
# body. Verify get_committable_text() never shrinks.
r = StreamingMarkdownRenderer(wrap_tables_for_append=False)
chunks = [
"|A|B|\n|---|---|\n|1|2|\n", # full first table
"|C|D|\n", # second table's header (committed as body of first)
"|---|---|\n", # second table's separator -- must not roll back
]
last = ""
for chunk in chunks:
r.push(chunk)
cur = r.get_committable_text()
assert cur.startswith(last), (
f"monotonicity violated: prior committed prefix={last!r} not a prefix of new committed={cur!r}"
)
last = cur

def test_second_table_after_blank_line_still_holds_header(self):
# The fix above must NOT regress the well-formed multi-table
# case where tables are separated by a blank line.
r = StreamingMarkdownRenderer(wrap_tables_for_append=False)
r.push("| A | B |\n|---|---|\n| 1 | 2 |\n")
r.push("\n| X | Y |\n") # blank line then new header
r.push("|---|---|\n") # new separator
# `| X | Y |` is still held -- second table has no body row yet.
committable = r.get_committable_text()
assert "| X | Y |" not in committable
assert "| 1 | 2 |" in committable
Loading