From adf939c1723af505e40a550b16daf5889ecc5078 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 22 May 2026 03:47:49 +0000 Subject: [PATCH 1/3] fix(streaming-markdown): list-marker awareness + table chunk-boundary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three production Slack streaming bugs from issue #69 comment: 1. `_remend("* item one\n")` appended a stray `*`. `_close_emphasis` counted the line-leading bullet as an italic opener. Now skips single-`*` runs preceded by line-leading whitespace and followed by a space/tab (list-marker shape), matching upstream `remend`'s awareness of list markers. 2. `StreamingMarkdownRenderer.finish()` on an odd-count bullet list produced `...item three\n*` — same root cause as #1; the fix covers both. 3. `_get_committable_prefix` released a confirmed table the moment a separator arrived, even with zero body rows. Slack `chat.appendStream` saw header+separator alone as broken syntax and then bare body rows in subsequent appends, never reconnecting them as a single table. Now the header+separator block is held until at least one body row arrives, then released atomically. The original tests pinned the buggy "commit on separator" behavior; updated to require a body row before committing the table block, and added a TestIssue69Regressions class with the exact reproducers from the issue comment plus invariants for the new contract. Closes part of #69 (the streaming bugs). The broader hand-rolled parser question (Options A/B/C in the issue) remains open. --- src/chat_sdk/shared/streaming_markdown.py | 51 ++++++- tests/test_streaming_markdown.py | 155 ++++++++++++++++++++-- 2 files changed, 193 insertions(+), 13 deletions(-) diff --git a/src/chat_sdk/shared/streaming_markdown.py b/src/chat_sdk/shared/streaming_markdown.py index ede4b0e..4c315d1 100644 --- a/src/chat_sdk/shared/streaming_markdown.py +++ b/src/chat_sdk/shared/streaming_markdown.py @@ -54,6 +54,30 @@ def _strip_fenced_code(text: str) -> str: return "\n".join(result_lines) +def _is_list_marker(stripped: str, run_start: int, run_len: int, ch: str) -> bool: + """Whether a run of *ch* at *run_start* is a list marker, not emphasis. + + Only ``*`` is ambiguous -- it's both an inline emphasis marker and a + bullet-list marker. A list marker is a single character preceded only + by horizontal whitespace (or start of text) since the previous newline, + and followed by a horizontal whitespace character. + """ + if ch != "*" or run_len != 1: + return False + j = run_start - 1 + while j >= 0: + c = stripped[j] + if c == "\n": + break + if c not in (" ", "\t"): + return False + j -= 1 + after = run_start + run_len + if after >= len(stripped): + return False + return stripped[after] in (" ", "\t") + + def _close_emphasis(result: str, stripped: str, ch: str) -> str: """Close unclosed bold/italic emphasis for a single marker character. @@ -66,6 +90,10 @@ def _close_emphasis(result: str, stripped: str, ch: str) -> str: level: a run of 2+ characters opens/closes bold first, then any remaining single character opens/closes italic. + Line-leading ``*`` runs followed by whitespace are treated as list + markers and excluded from the emphasis tally -- mirroring upstream + ``remend``'s list-marker awareness (issue #69). + To guarantee idempotency the suffix is separated from any trailing marker run by a zero-width space so it cannot merge with existing characters and create new openers on re-scan. @@ -78,10 +106,13 @@ def _close_emphasis(result: str, stripped: str, ch: str) -> str: i += 2 continue if stripped[i] == ch: + run_start = i run_len = 0 while i < len(stripped) and stripped[i] == ch: run_len += 1 i += 1 + if _is_list_marker(stripped, run_start, run_len, ch): + continue runs.append(run_len) else: i += 1 @@ -231,6 +262,7 @@ def _get_committable_prefix(text: str) -> str: # Walk backward to find consecutive table-like lines at the end held_count = 0 separator_found = False + separator_idx = -1 for i in range(len(lines) - 1, -1, -1): trimmed = lines[i].strip() @@ -241,6 +273,7 @@ def _get_committable_prefix(text: str) -> str: if TABLE_SEPARATOR_RE.match(trimmed): separator_found = True + separator_idx = i break if TABLE_ROW_RE.match(trimmed): @@ -248,10 +281,24 @@ def _get_committable_prefix(text: str) -> str: else: break - if separator_found or held_count == 0: + if separator_found and held_count == 0: + # Separator present but no body row beneath it yet. Append-only + # consumers (Slack `chat.appendStream`, etc.) parse each delta + # independently; emitting a header+separator without rows produces + # broken syntax. Hold the entire pre-separator table block until a + # row arrives (issue #69). + table_start = separator_idx + for i in range(separator_idx - 1, -1, -1): + trimmed = lines[i].strip() + if trimmed == "" or TABLE_ROW_RE.match(trimmed) is None: + break + table_start = i + commit_line_count = table_start + elif separator_found or held_count == 0: return text + else: + commit_line_count = len(lines) - held_count - commit_line_count = len(lines) - held_count committed_lines = lines[:commit_line_count] result = "\n".join(committed_lines) diff --git a/tests/test_streaming_markdown.py b/tests/test_streaming_markdown.py index 571d832..cbcc3d3 100644 --- a/tests/test_streaming_markdown.py +++ b/tests/test_streaming_markdown.py @@ -122,15 +122,25 @@ def test_should_hold_back_trailing_table_header_lines(self): assert "| A | B |" not in result assert "Text" in result - def test_should_confirm_table_when_separator_arrives(self): + def test_should_confirm_table_when_separator_and_body_row_arrive(self): r = StreamingMarkdownRenderer() r.push("Text\n\n| A | B |\n") assert "| A | B |" not in r.render() + # Separator alone with no body row is still held back: append-only + # consumers (e.g. Slack chat.appendStream) parse each delta + # independently and a header+separator emission with zero rows is + # broken syntax. See issue #69. r.push("|---|---|\n") + assert "| A | B |" not in r.render() + assert "|---|---|" not in r.render() + + # First body row releases the entire confirmed table atomically. + r.push("| 1 | 2 |\n") result = r.render() assert "| A | B |" in result assert "|---|---|" in result + assert "| 1 | 2 |" in result def test_should_release_held_lines_when_next_line_is_not_a_table_row(self): r = StreamingMarkdownRenderer() @@ -240,20 +250,34 @@ def test_should_confirm_table_with_alignment_markers_in_separator(self): r.push("| Left | Center | Right |\n") assert "| Left |" not in r.render() + # Separator alone is held; first body row releases the table. r.push("|:---|:---:|---:|\n") + assert "| Left |" not in r.render() + + r.push("| 1 | 2 | 3 |\n") result = r.render() assert "| Left | Center | Right |" in result assert "|:---|:---:|---:|" in result + assert "| 1 | 2 | 3 |" in result def test_should_not_hold_data_rows_after_confirmed_separator(self): r = StreamingMarkdownRenderer() + # Header+separator alone is held until a body row arrives (issue #69). r.push("| A | B |\n|---|---|\n") - assert "|---|---|" in r.render() + assert "|---|---|" not in r.render() + # First body row releases the entire table block atomically. r.push("| 1 | 2 |\n") result = r.render() + assert "| A | B |" in result + assert "|---|---|" in result assert "| 1 | 2 |" in result + # Subsequent rows commit immediately. + r.push("| 3 | 4 |\n") + result = r.render() + assert "| 3 | 4 |" in result + def test_should_handle_multiple_push_calls_before_single_render(self): r = StreamingMarkdownRenderer() r.push("| A ") @@ -273,7 +297,12 @@ def test_should_handle_table_header_split_across_chunks(self): r.push(" | B |\n") assert "| A | B |" not in r.render() + # Separator alone keeps the table held (issue #69). r.push("|---|---|\n") + assert "| A | B |" not in r.render() + + # First body row releases the assembled table. + r.push("| 1 | 2 |\n") assert "| A | B |" in r.render() @@ -450,22 +479,23 @@ def test_getcommittabletext_delta_should_stream_table_in_code_fence(self): delta = committable[len(last_appended) :] assert delta == "" - # Push separator -- table confirmed + # Push separator -- still held; header+separator without a body row + # would be broken markup for append-only consumers (issue #69). r.push("|---|---|\n") committable = r.get_committable_text() delta = committable[len(last_appended) :] - assert "```" in delta - assert "| A | B |" in delta - assert "|---|---|" in delta - last_appended = committable + assert delta == "" - # Push data row + # First data row releases header+separator+row atomically. r.push("| 1 | 2 |\n") committable = r.get_committable_text() delta = committable[len(last_appended) :] + assert "```" in delta + assert "| A | B |" in delta + assert "|---|---|" in delta assert "| 1 | 2 |" in delta # Should NOT have a closing ``` - assert "```" not in delta + assert "```\n```" not in delta last_appended = committable # Blank line ends table -- closes code fence @@ -740,13 +770,17 @@ def test_should_render_realworld_table_with_singledash_separators_progressively( assert "| ID |" not in result assert "Here's a table" in result + # Separator alone is held -- need a body row to confirm (issue #69). r.push("| - | - | - | - | - | - | - | - |\n") result = r.render() - assert "| ID |" in result - assert "| - |" in result + assert "| ID |" not in result + assert "| - |" not in result + # First body row releases header+separator+row atomically. r.push("| 1 | Sarah Johnson | Engineering | 32 | $95,000 | Seattle | 2019-03-15 | Active |\n") result = r.render() + assert "| ID |" in result + assert "| - |" in result assert "Sarah Johnson" in result r.push("| 2 | Michael") @@ -910,3 +944,102 @@ def test_getcommittabletext_is_always_clean_remend_would_not_add_markers(self): f'("{self.COMPLEX_MARKDOWN[: i + 1][-20:]}") has unclosed markers: ' f'"{committable[-40:]}"' ) + + +# ============================================================================ +# Issue #69 regressions: list-marker awareness + table chunk-boundary +# ============================================================================ + + +class TestIssue69Regressions: + """Pin the three production bugs from issue #69 (comment 4514752058). + + The hand-rolled ``_remend`` previously confused line-leading bullet + markers with italic openers, and ``_get_committable_prefix`` emitted + table header+separator without a body row -- both produced visible + corruption in Slack streaming. + """ + + def test_remend_does_not_treat_line_leading_star_as_italic(self): + # Single bullet item -- a literal `* item one\n` is unchanged. + assert _remend("* item one\n") == "* item one\n" + + def test_remend_does_not_close_italic_on_multi_bullet_list(self): + # Three bullets, odd count: previously appended a stray `*`. + text = "* item one\n* item two\n* item three\n" + assert _remend(text) == text + + def test_finish_on_odd_count_bullet_list_does_not_corrupt(self): + r = StreamingMarkdownRenderer(wrap_tables_for_append=False) + r.push("* item one\n* item two\n* item three\n") + assert r.finish() == "* item one\n* item two\n* item three\n" + + def test_remend_still_closes_genuine_italic(self): + # *hello (no following whitespace) is genuine emphasis, not a list. + assert _remend("*hello") == "*hello*" + + def test_remend_still_closes_genuine_bold(self): + assert _remend("**bold") == "**bold**" + + def test_remend_handles_bold_inside_list_item(self): + # Bullet marker is skipped; bold inside the item is balanced. + text = "* **important** item\n" + assert _remend(text) == text + + def test_remend_closes_unclosed_bold_inside_list_item(self): + # Bullet skipped, then an unclosed `**` should still get closed. + assert _remend("* **important") == "* **important**" + + def test_remend_handles_indented_bullet(self): + # Up to leading whitespace before the bullet -- still a list marker. + assert _remend(" * nested item\n") == " * nested item\n" + + def test_table_header_plus_separator_alone_is_held(self): + # Header+separator without a body row would be emitted as broken + # markup to append-only consumers. Hold the whole block. + r = StreamingMarkdownRenderer(wrap_tables_for_append=False) + r.push("| ID | Status |\n|---|---|\n") + assert r.get_committable_text() == "" + + def test_table_chunk_boundary_emits_atomic_header_separator_row(self): + # Reproduces the exact chunk sequence from issue #69 comment. + r = StreamingMarkdownRenderer(wrap_tables_for_append=False) + chunks = [ + "Header:\n\n", + "| ID", + " | Status |\n", + "|---|---|\n", + "| 1 | Open |\n", + "| 2 | Closed |\n", + ] + last = "" + deltas: list[str] = [] + for c in chunks: + r.push(c) + cur = r.get_committable_text() + deltas.append(cur[len(last) :]) + last = cur + + assert deltas[0] == "Header:\n\n" + # Header line, separator line, and split-header chunk all held. + assert deltas[1] == "" + assert deltas[2] == "" + assert deltas[3] == "" + # First body row releases the assembled table atomically. + assert deltas[4] == "| ID | Status |\n|---|---|\n| 1 | Open |\n" + # Subsequent rows commit immediately. + assert deltas[5] == "| 2 | Closed |\n" + + def test_table_with_preceding_text_holds_only_table_block(self): + # Prose above an unconfirmed table commits; only the table holds. + r = StreamingMarkdownRenderer(wrap_tables_for_append=False) + r.push("Intro paragraph.\n\n| H |\n|---|\n") + assert r.get_committable_text() == "Intro paragraph.\n\n" + + def test_table_held_block_flushes_on_finish_even_without_body_row(self): + # finish() is the terminal flush -- even an incomplete table is emitted. + r = StreamingMarkdownRenderer(wrap_tables_for_append=False) + r.push("| H |\n|---|\n") + final = r.finish() + assert "| H |" in final + assert "|---|" in final From 7eda0400d2c2c4c657e15840c76a77fe9f342984 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 22 May 2026 04:07:47 +0000 Subject: [PATCH 2/3] refactor(_remend): match remend's flanking check for excluded asterisks The previous `_is_list_marker` skipped only line-leading single `*` followed by horizontal whitespace. Tighten to match upstream remend's `shouldSkipAsterisk` (packages/remend/src/emphasis-handlers.ts): exclude any single `*` flanked by whitespace (or text boundary) on both sides, which is what CommonMark says isn't a valid emphasis delimiter anyway. The line-leading bullet case still falls out naturally. Picks up three additional cases the previous narrower check missed: - `text * more` -- whitespace-flanked mid-line - `trailing *\n` -- asterisk at end of line - `partial *` -- asterisk at end of buffer Same `_remend` over-counting failure mode as the original issue #69 bugs, just different surface forms. Renamed the helper to `_is_excluded_asterisk` to reflect the broader scope. Remaining remend divergences (word-internal asterisks, math-block contents, escaped sequences, multi-backtick spans, setext headings, indented code, raw HTML, footnotes) are tracked on issue #69 for the parser-strategy discussion -- out of scope for this PR. --- src/chat_sdk/shared/streaming_markdown.py | 46 +++++++++++------------ tests/test_streaming_markdown.py | 13 +++++++ 2 files changed, 36 insertions(+), 23 deletions(-) diff --git a/src/chat_sdk/shared/streaming_markdown.py b/src/chat_sdk/shared/streaming_markdown.py index 4c315d1..57cf8a5 100644 --- a/src/chat_sdk/shared/streaming_markdown.py +++ b/src/chat_sdk/shared/streaming_markdown.py @@ -54,28 +54,27 @@ def _strip_fenced_code(text: str) -> str: return "\n".join(result_lines) -def _is_list_marker(stripped: str, run_start: int, run_len: int, ch: str) -> bool: - """Whether a run of *ch* at *run_start* is a list marker, not emphasis. - - Only ``*`` is ambiguous -- it's both an inline emphasis marker and a - bullet-list marker. A list marker is a single character preceded only - by horizontal whitespace (or start of text) since the previous newline, - and followed by a horizontal whitespace character. +def _is_excluded_asterisk(stripped: str, run_start: int, run_len: int, ch: str) -> bool: + """Whether a single-``*`` run at *run_start* should be excluded from the + emphasis tally per CommonMark's flanking rules. + + A ``*`` flanked by whitespace (space, tab, newline, or text boundary) + on both sides is not a valid emphasis delimiter, and this same check + covers line-leading bullet list markers (``* item``). Mirrors + ``shouldSkipAsterisk`` in upstream remend's ``emphasis-handlers.ts``. + + Only single-``*`` runs are subject to this check -- ``**`` / ``***`` + runs are evaluated by the bold pairing logic and have their own + word-boundary semantics. ``_``-emphasis isn't ambiguous with list + markers, so it's not affected here either. """ if ch != "*" or run_len != 1: return False - j = run_start - 1 - while j >= 0: - c = stripped[j] - if c == "\n": - break - if c not in (" ", "\t"): - return False - j -= 1 - after = run_start + run_len - if after >= len(stripped): - return False - return stripped[after] in (" ", "\t") + prev_char = stripped[run_start - 1] if run_start > 0 else None + next_char = stripped[run_start + run_len] if run_start + run_len < len(stripped) else None + prev_ws = prev_char is None or prev_char in (" ", "\t", "\n") + next_ws = next_char is None or next_char in (" ", "\t", "\n") + return prev_ws and next_ws def _close_emphasis(result: str, stripped: str, ch: str) -> str: @@ -90,9 +89,10 @@ def _close_emphasis(result: str, stripped: str, ch: str) -> str: level: a run of 2+ characters opens/closes bold first, then any remaining single character opens/closes italic. - Line-leading ``*`` runs followed by whitespace are treated as list - markers and excluded from the emphasis tally -- mirroring upstream - ``remend``'s list-marker awareness (issue #69). + Whitespace-flanked single ``*`` runs are excluded via + :func:`_is_excluded_asterisk` -- this covers line-leading bullets + (``* item``) and any other non-delimiter asterisks per CommonMark's + flanking rules (issue #69). To guarantee idempotency the suffix is separated from any trailing marker run by a zero-width space so it cannot merge with existing @@ -111,7 +111,7 @@ def _close_emphasis(result: str, stripped: str, ch: str) -> str: while i < len(stripped) and stripped[i] == ch: run_len += 1 i += 1 - if _is_list_marker(stripped, run_start, run_len, ch): + if _is_excluded_asterisk(stripped, run_start, run_len, ch): continue runs.append(run_len) else: diff --git a/tests/test_streaming_markdown.py b/tests/test_streaming_markdown.py index cbcc3d3..30afc47 100644 --- a/tests/test_streaming_markdown.py +++ b/tests/test_streaming_markdown.py @@ -994,6 +994,19 @@ def test_remend_handles_indented_bullet(self): # Up to leading whitespace before the bullet -- still a list marker. assert _remend(" * nested item\n") == " * nested item\n" + def test_remend_skips_whitespace_flanked_asterisk_mid_line(self): + # CommonMark: `*` flanked by whitespace on both sides isn't a valid + # delimiter. Previously counted as an italic opener. + assert _remend("use the * operator") == "use the * operator" + + def test_remend_skips_trailing_asterisk_at_end_of_line(self): + # `*\n` -- next char is whitespace, prev is whitespace -- not a delimiter. + assert _remend("trailing star *\nmore text") == "trailing star *\nmore text" + + def test_remend_skips_bare_asterisk_at_end_of_buffer(self): + # `*` at end of stream with whitespace before -- not a delimiter yet. + assert _remend("partial *") == "partial *" + def test_table_header_plus_separator_alone_is_held(self): # Header+separator without a body row would be emitted as broken # markup to append-only consumers. Hold the whole block. From 0bb035324174b215f3a2a9853444ed4dc741fe86 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 23 May 2026 00:42:13 +0000 Subject: [PATCH 3/3] fix(streaming-markdown): preserve monotonicity on back-to-back tables PR #99 review (chatgpt-codex P1): in `_get_committable_prefix`, the "hold pre-separator block" backward walk uses `TABLE_ROW_RE` to extend `table_start`, but separator lines also match that pattern. For a stream like `|A|B|\n|---|---|\n|1|2|\n|C|D|\n|---|---|\n` (two tables with no blank line between them), the walk crosses the first table's separator and collapses `_get_committable_prefix` back to "", violating the monotonic append-only contract of `get_committable_text()`. Adapters that compute deltas from prior committed length then emit incorrect/no deltas. Fix: the backward walk now stops at empty lines (existing), non-row content (existing), AND at prior separators (new). When it hits a prior separator -- meaning the candidate "new header" row above it was already committed as a body row of the prior table -- the function falls back to `return text` instead of holding. That emits one "stray separator" on the rollback delta, which is broken markup but the lesser evil compared to non-monotonic rollback. The fix preserves the well-formed multi-table case where the second table is separated from the first by a blank line; the empty-line break in the walk fires before reaching the prior separator. Tests added: - `test_back_to_back_tables_keep_committable_monotonic` (the exact reviewer repro) - `test_second_table_after_blank_line_still_holds_header` (verifies the fix doesn't regress the blank-line-separated case) The outdated comment from gemini-code-assist on `_is_excluded_asterisk` suggesting we also include `\n` and end-of-string in the list-marker exclusion is already addressed in the second commit on this branch, which broadened the helper to match remend's `shouldSkipAsterisk` exactly. 79 streaming tests pass / 3598 total / 1 pre-existing unrelated failure. --- src/chat_sdk/shared/streaming_markdown.py | 14 ++++++++++ tests/test_streaming_markdown.py | 33 +++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/src/chat_sdk/shared/streaming_markdown.py b/src/chat_sdk/shared/streaming_markdown.py index 57cf8a5..6a4f880 100644 --- a/src/chat_sdk/shared/streaming_markdown.py +++ b/src/chat_sdk/shared/streaming_markdown.py @@ -287,12 +287,26 @@ def _get_committable_prefix(text: str) -> str: # independently; emitting a header+separator without rows produces # broken syntax. Hold the entire pre-separator table block until a # row arrives (issue #69). + # + # The backward walk must also stop at empty lines (end of the + # current block) and at prior separators -- prior separators mean + # the candidate "new header" rows above them were already + # committed as body rows of a previously confirmed table. + # Rolling them back would violate the monotonic append-only + # contract; in that case fall through to "commit everything" + # instead (PR #99 review finding). table_start = separator_idx + rolled_back = False for i in range(separator_idx - 1, -1, -1): trimmed = lines[i].strip() if trimmed == "" or TABLE_ROW_RE.match(trimmed) is None: break + if TABLE_SEPARATOR_RE.match(trimmed): + rolled_back = True + break table_start = i + if rolled_back: + return text commit_line_count = table_start elif separator_found or held_count == 0: return text diff --git a/tests/test_streaming_markdown.py b/tests/test_streaming_markdown.py index 30afc47..392a55a 100644 --- a/tests/test_streaming_markdown.py +++ b/tests/test_streaming_markdown.py @@ -1056,3 +1056,36 @@ def test_table_held_block_flushes_on_finish_even_without_body_row(self): final = r.finish() assert "| H |" in final assert "|---|" in final + + def test_back_to_back_tables_keep_committable_monotonic(self): + # PR #99 review #2: when a second table's separator arrives in a + # stream that already had one confirmed table (and no blank line + # between them), the backward "hold pre-separator block" walk + # would roll back into the previously-committed first-table + # body. Verify get_committable_text() never shrinks. + r = StreamingMarkdownRenderer(wrap_tables_for_append=False) + chunks = [ + "|A|B|\n|---|---|\n|1|2|\n", # full first table + "|C|D|\n", # second table's header (committed as body of first) + "|---|---|\n", # second table's separator -- must not roll back + ] + last = "" + for chunk in chunks: + r.push(chunk) + cur = r.get_committable_text() + assert cur.startswith(last), ( + f"monotonicity violated: prior committed prefix={last!r} not a prefix of new committed={cur!r}" + ) + last = cur + + def test_second_table_after_blank_line_still_holds_header(self): + # The fix above must NOT regress the well-formed multi-table + # case where tables are separated by a blank line. + r = StreamingMarkdownRenderer(wrap_tables_for_append=False) + r.push("| A | B |\n|---|---|\n| 1 | 2 |\n") + r.push("\n| X | Y |\n") # blank line then new header + r.push("|---|---|\n") # new separator + # `| X | Y |` is still held -- second table has no body row yet. + committable = r.get_committable_text() + assert "| X | Y |" not in committable + assert "| 1 | 2 |" in committable