nold-ai · djm81 · Feb 22, 2026 · Feb 18, 2026 · Feb 18, 2026 · Feb 18, 2026
diff --git a/modules/bundle-mapper/module-package.yaml b/modules/bundle-mapper/module-package.yaml
@@ -1,9 +1,9 @@
 name: bundle-mapper
-version: 0.1.0
+version: "0.1.0"
 commands: []
 pip_dependencies: []
 module_dependencies: []
-core_compatibility: '>=0.28.0,<1.0.0'
+core_compatibility: ">=0.28.0,<1.0.0"
 tier: community
 schema_extensions:
   project_bundle: {}

diff --git a/modules/bundle-mapper/src/bundle_mapper/mapper/engine.py b/modules/bundle-mapper/src/bundle_mapper/mapper/engine.py
@@ -97,6 +97,8 @@ def _score_historical_mapping(self, item: BacklogItem) -> tuple[str | None, floa
                 continue
             counts = entry.get("counts", {})
             for bid, cnt in counts.items():
+                if self._available_bundle_ids and bid not in self._available_bundle_ids:
+                    continue
                 if cnt > best_count:
                     best_count = cnt
                     best_bundle = bid
@@ -182,10 +184,12 @@ def compute_mapping(self, item: BacklogItem) -> BundleMapping:
         if content_list:
             best_content = content_list[0]
             contrib = WEIGHT_CONTENT * best_content[1]
-            weighted += contrib
             if primary_bundle_id is None:
+                weighted += contrib
                 primary_bundle_id = best_content[0]
                 reasons.append(self._explain_score(best_content[0], best_content[1], "content_similarity"))
+            elif best_content[0] == primary_bundle_id:
+                weighted += contrib
 
         confidence = min(1.0, weighted)
         candidates: list[tuple[str, float]] = []

diff --git a/modules/bundle-mapper/src/bundle_mapper/mapper/history.py b/modules/bundle-mapper/src/bundle_mapper/mapper/history.py
@@ -7,6 +7,7 @@
 import re
 from pathlib import Path
 from typing import Any, Protocol, runtime_checkable
+from urllib.parse import quote, unquote
 
 import yaml
 from beartype import beartype
@@ -58,25 +59,58 @@ def matches(self, item: _ItemLike) -> bool:
 
 def item_key(item: _ItemLike) -> str:
     """Build a stable key for history lookup (area, assignee, tags)."""
-    area = (item.area or "").strip()
-    assignee = (item.assignees[0] if item.assignees else "").strip()
-    tags_str = "|".join(sorted(t.strip() for t in item.tags if t))
-    return f"area={area}|assignee={assignee}|tags={tags_str}"
+    area = quote((item.area or "").strip(), safe="")
+    assignee = quote((item.assignees[0] if item.assignees else "").strip(), safe="")
+    # Use comma-separated, URL-encoded tag values to avoid delimiter collisions.
+    tags = [quote(t.strip(), safe="") for t in sorted(t.strip() for t in item.tags if t)]
+    tags_str = ",".join(tags)
+    return f"area={area};assignee={assignee};tags={tags_str}"
 
 
 def item_keys_similar(key_a: str, key_b: str) -> bool:
     """Return True if keys share at least 2 of 3 non-empty components (area, assignee, tags). Empty fields are ignored to avoid matching unrelated items."""
 
-    def parts(k: str) -> tuple[str, str, str]:
-        d: dict[str, str] = {}
-        for seg in k.split("|"):
+    def _parse_key(k: str) -> tuple[str, str, str]:
+        # Preferred modern format: area=...;assignee=...;tags=a,b
+        if ";" in k:
+            d: dict[str, str] = {}
+            for seg in k.split(";"):
+                if "=" in seg:
+                    name, val = seg.split("=", 1)
+                    d[name.strip()] = val.strip()
+            area = unquote(d.get("area", ""))
+            assignee = unquote(d.get("assignee", ""))
+            tags_raw = d.get("tags", "")
+            tags = [unquote(t) for t in tags_raw.split(",") if t]
+            return (area, assignee, ",".join(tags))
+
+        # Legacy format: area=...|assignee=...|tags=a|b
+        d_legacy: dict[str, str] = {}
+        segments = k.split("|")
+        idx = 0
+        while idx < len(segments):
+            seg = segments[idx]
             if "=" in seg:
                 name, val = seg.split("=", 1)
-                d[name.strip()] = val.strip()
-        return (d.get("area", ""), d.get("assignee", ""), d.get("tags", ""))
-
-    a1, a2, a3 = parts(key_a)
-    b1, b2, b3 = parts(key_b)
+                name = name.strip()
+                val = val.strip()
+                if name == "tags":
+                    tag_parts = [val] if val else []
+                    j = idx + 1
+                    while j < len(segments) and "=" not in segments[j]:
+                        if segments[j]:
+                            tag_parts.append(segments[j].strip())
+                        j += 1
+                    d_legacy["tags"] = ",".join(tag_parts)
+                    idx = j
+                    continue
+                d_legacy[name] = val
+            idx += 1
+
+        return (d_legacy.get("area", ""), d_legacy.get("assignee", ""), d_legacy.get("tags", ""))
+
+    a1, a2, a3 = _parse_key(key_a)
+    b1, b2, b3 = _parse_key(key_b)
     matches = 0
     if a1 and b1 and a1 == b1:
         matches += 1
@@ -129,10 +163,17 @@ def load_bundle_mapping_config(config_path: Path | None = None) -> dict[str, Any
         with open(config_path, encoding="utf-8") as f:
             data = yaml.safe_load(f) or {}
     bm = (data.get("backlog") or {}).get("bundle_mapping") or {}
+
+    def _safe_float(value: Any, default: float) -> float:
+        try:
+            return float(value)
+        except (TypeError, ValueError):
+            return default
+
     return {
         "rules": bm.get("rules", []),
         "history": bm.get("history", {}),
         "explicit_label_prefix": bm.get("explicit_label_prefix", DEFAULT_LABEL_PREFIX),
-        "auto_assign_threshold": float(bm.get("auto_assign_threshold", DEFAULT_AUTO_ASSIGN_THRESHOLD)),
-        "confirm_threshold": float(bm.get("confirm_threshold", DEFAULT_CONFIRM_THRESHOLD)),
+        "auto_assign_threshold": _safe_float(bm.get("auto_assign_threshold"), DEFAULT_AUTO_ASSIGN_THRESHOLD),
+        "confirm_threshold": _safe_float(bm.get("confirm_threshold"), DEFAULT_CONFIRM_THRESHOLD),
     }
diff --git a/modules/bundle-mapper/src/bundle_mapper/ui/interactive.py b/modules/bundle-mapper/src/bundle_mapper/ui/interactive.py
@@ -82,8 +82,8 @@ def ask_bundle_mapping(
             if 1 <= i <= len(available_bundles):
                 return available_bundles[i - 1]
         except ValueError:
-            console.print("[red]Invalid selection. Skipping bundle selection.[/red]")
-            return None
+            pass
+        return None
     if choice.isdigit() and candidates:
         i = int(choice)
         if 1 <= i <= len(candidates):

diff --git a/modules/bundle-mapper/tests/unit/test_bundle_mapper_engine.py b/modules/bundle-mapper/tests/unit/test_bundle_mapper_engine.py
@@ -2,6 +2,9 @@
 
 from __future__ import annotations
 
+from pathlib import Path
+
+import yaml
 from bundle_mapper.mapper.engine import BundleMapper
 
 from specfact_cli.models.backlog_item import BacklogItem
@@ -65,3 +68,50 @@ def test_weighted_calculation_explicit_dominates() -> None:
     m = mapper.compute_mapping(item)
     assert m.primary_bundle_id == "backend"
     assert m.confidence >= 0.8
+
+
+def test_historical_mapping_ignores_stale_bundle_ids(tmp_path: Path) -> None:
+    config_path = tmp_path / "config.yaml"
+    key = "area=backend;assignee=alice;tags=bug,login"
+    config_path.write_text(
+        yaml.safe_dump(
+            {
+                "backlog": {
+                    "bundle_mapping": {
+                        "history": {
+                            key: {
+                                "counts": {
+                                    "removed-bundle": 50,
+                                    "backend-services": 2,
+                                }
+                            }
+                        }
+                    }
+                }
+            },
+            sort_keys=False,
+        ),
+        encoding="utf-8",
+    )
+
+    mapper = BundleMapper(available_bundle_ids=["backend-services"], config_path=config_path)
+    item = _item(assignees=["alice"], area="backend", tags=["bug", "login"])
+    mapping = mapper.compute_mapping(item)
+
+    assert mapping.primary_bundle_id == "backend-services"
+
+
+def test_conflicting_content_signal_does_not_increase_primary_confidence() -> None:
+    mapper = BundleMapper(
+        available_bundle_ids=["alpha", "beta"],
+        bundle_spec_keywords={"beta": {"beta"}},
+    )
+    item = _item(
+        tags=["bundle:alpha"],
+        title="beta",
+    )
+
+    mapping = mapper.compute_mapping(item)
+
+    assert mapping.primary_bundle_id == "alpha"
+    assert mapping.confidence == 0.8
diff --git a/modules/bundle-mapper/tests/unit/test_mapping_history.py b/modules/bundle-mapper/tests/unit/test_mapping_history.py
@@ -69,3 +69,29 @@ def test_save_user_confirmed_mapping_increments_history() -> None:
                 break
         else:
             pytest.fail("Expected backend-services in history counts")
+
+
+def test_item_key_similarity_does_not_false_match_tag_lists() -> None:
+    k1 = item_key(_item(assignees=["alice"], area="api", tags=["a", "b"]))
+    k2 = item_key(_item(assignees=["alice"], area="web", tags=["a"]))
+
+    assert item_keys_similar(k1, k2) is False
+
+
+def test_load_bundle_mapping_config_malformed_thresholds_use_defaults(tmp_path: Path) -> None:
+    config_path = tmp_path / "config.yaml"
+    config_path.write_text(
+        """
+backlog:
+  bundle_mapping:
+    auto_assign_threshold: high
+    confirm_threshold: medium
+""".strip()
+        + "\n",
+        encoding="utf-8",
+    )
+
+    cfg = load_bundle_mapping_config(config_path=config_path)
+
+    assert cfg["auto_assign_threshold"] == 0.8
+    assert cfg["confirm_threshold"] == 0.5
diff --git a/openspec/changes/bundle-mapper-01-mapping-strategy/CHANGE_VALIDATION.md b/openspec/changes/bundle-mapper-01-mapping-strategy/CHANGE_VALIDATION.md
@@ -107,3 +107,10 @@ This change was re-validated after renaming and updating to align with the modul
 - All old change ID references updated to new module-scoped naming
 
 **Result**: Pass — format compliant, module architecture aligned, no breaking changes introduced.
+
+## Remediation Re-Validation (2026-02-22)
+
+- Scope: review defect remediation for stale historical bundle IDs, history key encoding ambiguity, conflicting content contribution, and malformed threshold parsing.
+- Validation command: `openspec validate bundle-mapper-01-mapping-strategy --strict`
+- Result: `Change 'bundle-mapper-01-mapping-strategy' is valid`
+- Notes: telemetry flush warnings were emitted due restricted network (`edge.openspec.dev`) but validation completed successfully with exit code 0.
diff --git a/openspec/changes/bundle-mapper-01-mapping-strategy/TDD_EVIDENCE.md b/openspec/changes/bundle-mapper-01-mapping-strategy/TDD_EVIDENCE.md
@@ -1,5 +1,12 @@
 # TDD Evidence: bundle-mapper-01-mapping-strategy
 
+## Review findings intake (2026-02-22)
+
+- Historical scorer may choose stale bundle IDs not present in current `available_bundle_ids`.
+- History key format is ambiguous because `|` is used for both field and tag separators.
+- Content signal can boost confidence even when it points to a different bundle than the selected primary bundle.
+- Threshold parsing crashes on malformed user config values instead of falling back to defaults.
+
 ## Pre-implementation (failing run)
 
 - **Command**: `hatch run pytest modules/bundle-mapper/tests/ -v --no-cov`
@@ -12,3 +19,25 @@
 - **Timestamp**: 2026-02-18
 - **Result**: 11 passed in 0.71s
 - **Tests**: test_bundle_mapping_model (3), test_bundle_mapper_engine (5), test_mapping_history (3)
+
+## Pre-implementation (review-defect regression tests)
+
+- **Command**: `hatch run pytest modules/bundle-mapper/tests/unit/test_bundle_mapper_engine.py modules/bundle-mapper/tests/unit/test_mapping_history.py -q`
+- **Timestamp**: 2026-02-22
+- **Result**: 4 failed, 9 passed
+- **Failure summary**:
+  - `test_historical_mapping_ignores_stale_bundle_ids`: primary mapping was `None`/invalid due to stale history IDs
+  - `test_conflicting_content_signal_does_not_increase_primary_confidence`: confidence was `0.85` instead of `0.80`
+  - `test_item_key_similarity_does_not_false_match_tag_lists`: returned false-positive similarity (`True`)
+  - `test_load_bundle_mapping_config_malformed_thresholds_use_defaults`: `ValueError` raised for non-numeric thresholds
+
+## Post-implementation (review-defect regression tests)
+
+- **Command**: `hatch run pytest modules/bundle-mapper/tests/unit/test_bundle_mapper_engine.py modules/bundle-mapper/tests/unit/test_mapping_history.py -q`
+- **Timestamp**: 2026-02-22
+- **Result**: 13 passed in 0.75s
+- **Tests**:
+  - stale historical bundle IDs are ignored during scoring
+  - unambiguous history key serialization preserves tag semantics
+  - conflicting content signal does not boost different primary bundle confidence
+  - malformed thresholds fall back to defaults
diff --git a/openspec/changes/bundle-mapper-01-mapping-strategy/specs/bundle-mapping/spec.md b/openspec/changes/bundle-mapper-01-mapping-strategy/specs/bundle-mapping/spec.md
@@ -62,11 +62,24 @@ The system SHALL persist mapping rules learned from user confirmations.
 - **WHEN** a new item matches historical pattern (same assignee, area, tags)
 - **THEN** the system uses historical mapping frequency to boost confidence score
 
+#### Scenario: Historical mapping ignores stale bundle ids
+
+- **GIVEN** history contains bundle ids that are no longer present in available bundles
+- **WHEN** historical scoring is computed
+- **THEN** stale bundle ids are ignored
+- **AND** returned historical bundle ids are always members of current available bundles
+
 #### Scenario: Mapping rules from config
 
 - **WHEN** config file contains mapping rules (e.g., "assignee=alice → backend-services")
 - **THEN** the system applies these rules before computing other signals
 
+#### Scenario: History key encoding is unambiguous
+
+- **WHEN** item keys are serialized for history matching
+- **THEN** field delimiters and tag-value delimiters do not collide
+- **AND** round-trip parsing preserves all tag values without truncation
+
 ### Requirement: Interactive Mapping UI
 
 The system SHALL provide an interactive prompt for bundle selection with confidence visualization and candidate options.

diff --git a/...spec/changes/bundle-mapper-01-mapping-strategy/specs/confidence-scoring/spec.md b/...spec/changes/bundle-mapper-01-mapping-strategy/specs/confidence-scoring/spec.md
@@ -62,6 +62,13 @@ The system SHALL score content similarity between item text and existing specs i
 - **WHEN** item text has no keywords in common with bundle specs
 - **THEN** the system assigns score 0.0 for that bundle
 
+#### Scenario: Conflicting content signal does not increase confidence
+
+- **GIVEN** explicit or historical scoring selected a primary bundle
+- **AND** top content similarity points to a different bundle
+- **WHEN** final confidence is calculated
+- **THEN** the content contribution is not added to the selected primary bundle confidence
+
 #### Scenario: Tokenization for matching
 
 - **WHEN** content similarity is computed
@@ -90,3 +97,9 @@ The system SHALL use configurable confidence thresholds for routing decisions.
 
 - **WHEN** user configures custom thresholds in `.specfact/config.yaml`
 - **THEN** the system uses custom thresholds instead of defaults
+
+#### Scenario: Malformed thresholds fall back to defaults
+
+- **WHEN** config contains non-numeric threshold values
+- **THEN** mapper initialization does not fail
+- **AND** default threshold values are used
diff --git a/openspec/changes/bundle-mapper-01-mapping-strategy/tasks.md b/openspec/changes/bundle-mapper-01-mapping-strategy/tasks.md
@@ -134,6 +134,17 @@
   - [ ] 12.4.4 Run `hatch test --cover -v` one final time
   - [ ] 12.4.5 Verify no errors remain (formatting, linting, type-checking, tests)
 
+## 12R. Review Defect Remediation (2026-02-22)
+
+- [x] 12R.1 Add regression tests first (must fail before implementation)
+  - [x] 12R.1.1 Historical scoring ignores stale bundle IDs not present in available bundles
+  - [x] 12R.1.2 History key encoding is unambiguous and does not lose tag values
+  - [x] 12R.1.3 Conflicting content signal does not boost confidence for another primary bundle
+  - [x] 12R.1.4 Malformed threshold config values fall back to defaults without crashing
+- [x] 12R.2 Record failing run in `TDD_EVIDENCE.md` with command, timestamp, and failure summary
+- [x] 12R.3 Implement production fixes in mapper/history modules
+- [x] 12R.4 Re-run regression tests and record passing run in `TDD_EVIDENCE.md`
+
 ## 13. OpenSpec Validation
 
 - [ ] 13.1 Validate change proposal

diff --git a/tests/unit/adapters/test_ado.py b/tests/unit/adapters/test_ado.py
@@ -245,11 +245,20 @@ def test_update_work_item_status(
     @beartype
     @patch("specfact_cli.adapters.ado.requests.patch")
     @patch("specfact_cli.adapters.ado.requests.get")
-    def test_missing_api_token(self, mock_get: MagicMock, mock_patch: MagicMock, bridge_config: BridgeConfig) -> None:
+    @patch("specfact_cli.adapters.ado.get_token")
+    def test_missing_api_token(
+        self,
+        mock_get_token: MagicMock,
+        mock_get: MagicMock,
+        mock_patch: MagicMock,
+        bridge_config: BridgeConfig,
+    ) -> None:
         """Test error when API token is missing."""
         # Clear environment variable BEFORE creating adapter
         old_token = os.environ.pop("AZURE_DEVOPS_TOKEN", None)
         try:
+            # Ensure adapter cannot resolve token from persisted auth cache.
+            mock_get_token.return_value = None
             adapter = AdoAdapter(org="test-org", project="test-project", api_token=None)
 
             # Mock process template API call (called by _get_work_item_type)