From bcfca24e3b07c8df140dc02e71e1d0368ce8d03d Mon Sep 17 00:00:00 2001 From: Brendan O'Leary Date: Tue, 14 Apr 2026 09:40:32 -0400 Subject: [PATCH] Add meeting tech tasks (action items, decisions, competitors, messaging, features) --- tasks/manifest.yaml | 5 + tasks/task_meeting_tech_action_items.md | 195 ++++++++++++++++ tasks/task_meeting_tech_competitors.md | 216 ++++++++++++++++++ tasks/task_meeting_tech_decisions.md | 196 ++++++++++++++++ tasks/task_meeting_tech_messaging.md | 240 ++++++++++++++++++++ tasks/task_meeting_tech_product_features.md | 228 +++++++++++++++++++ 6 files changed, 1080 insertions(+) create mode 100644 tasks/task_meeting_tech_action_items.md create mode 100644 tasks/task_meeting_tech_competitors.md create mode 100644 tasks/task_meeting_tech_decisions.md create mode 100644 tasks/task_meeting_tech_messaging.md create mode 100644 tasks/task_meeting_tech_product_features.md diff --git a/tasks/manifest.yaml b/tasks/manifest.yaml index 38d5c69..b309d25 100644 --- a/tasks/manifest.yaml +++ b/tasks/manifest.yaml @@ -59,3 +59,8 @@ tasks: - task_gh_issue_triage - task_pdf_to_calendar - task_cve_security_triage + - task_meeting_tech_action_items + - task_meeting_tech_decisions + - task_meeting_tech_competitors + - task_meeting_tech_messaging + - task_meeting_tech_product_features diff --git a/tasks/task_meeting_tech_action_items.md b/tasks/task_meeting_tech_action_items.md new file mode 100644 index 0000000..069a674 --- /dev/null +++ b/tasks/task_meeting_tech_action_items.md @@ -0,0 +1,195 @@ +--- +id: task_meeting_tech_action_items +name: Meeting Action Items Extraction +category: meeting +grading_type: hybrid +timeout_seconds: 180 +grading_weights: + automated: 0.6 + llm_judge: 0.4 +workspace_files: + - source: meetings/2021-06-28-gitlab-product-marketing-meeting.md + dest: meeting_transcript.md +--- + +## Prompt + +I have a file `meeting_transcript.md` containing a transcript of a GitLab Product Marketing weekly meeting from June 28, 2021. The meeting covers several topics including corporate events, product announcements for GitLab Commit, competitive analysis, a new infographic design, and a messaging framework. + +Please extract all action items from this meeting and write them to a file called `action_items.md`. For each action item, include: + +- **Owner** (the person responsible, by name) +- **Action** (what they need to do) +- **Deadline** (if mentioned) +- **Context** (which discussion topic it relates to) + +Group the action items by topic area. Also include a summary count of total action items at the top. + +--- + +## Expected Behavior + +The agent should: + +1. Read and parse the meeting transcript +2. Identify all explicit and implicit action items from the discussion +3. Associate each action item with the correct person/owner +4. Group them by topic + +Key action items that should be extracted: + +- **Corporate Events**: PMMs need to get commitment from their campaign managers for specific events (platform→re:Invent, CI/CD→Google Next, GitOps→KubeCon) and comment on the issue +- **Product Announcements**: Cormac needs to add a top five for Plan stage; team members need to review and pick top 5 overall features; this is due Tuesday +- **Competitive Analysis**: Samia needs to compare using only tier one competitors relevant to her stages; team needs to add a GitLab line item to the competitive sheet +- **Messaging Framework**: William to finalize messaging with "more speed less risk" as the chosen tagline; due end of day +- **General**: Team should watch Talladega Nights (humorous homework assignment) + +--- + +## Grading Criteria + +- [ ] File `action_items.md` is created +- [ ] At least 5 distinct action items identified +- [ ] Action items are associated with specific owners (e.g., Cormac, Samia, William, Cindy) +- [ ] Corporate events action items captured (get campaign manager commitments) +- [ ] Product announcements action items captured (top 5 features, due Tuesday) +- [ ] Competitive analysis action items captured (tier one competitors, add GitLab row) +- [ ] Messaging framework action items captured (finalize tagline, due end of day) +- [ ] Action items are grouped by topic or category +- [ ] Deadlines noted where mentioned (Tuesday for product announcements, end of day for messaging) + +--- + +## Automated Checks + +```python +def grade(transcript: list, workspace_path: str) -> dict: + """ + Grade the meeting action items extraction task. + + Args: + transcript: Parsed JSONL transcript as list of dicts + workspace_path: Path to the task's isolated workspace directory + + Returns: + Dict mapping criterion names to scores (0.0 to 1.0) + """ + from pathlib import Path + import re + + scores = {} + workspace = Path(workspace_path) + + report_path = workspace / "action_items.md" + if not report_path.exists(): + for alt in ["actions.md", "action-items.md", "meeting_actions.md"]: + alt_path = workspace / alt + if alt_path.exists(): + report_path = alt_path + break + + if not report_path.exists(): + return { + "report_created": 0.0, + "min_action_items": 0.0, + "owners_identified": 0.0, + "events_actions": 0.0, + "announcements_actions": 0.0, + "competitive_actions": 0.0, + "messaging_actions": 0.0, + "grouped_by_topic": 0.0, + "deadlines_noted": 0.0, + } + + scores["report_created"] = 1.0 + content = report_path.read_text() + content_lower = content.lower() + + # Check minimum number of action items (look for bullet points or numbered items) + action_markers = re.findall(r'(?:^|\n)\s*(?:[-*•]|\d+[.)]) .+', content) + scores["min_action_items"] = 1.0 if len(action_markers) >= 5 else (0.5 if len(action_markers) >= 3 else 0.0) + + # Check that owners are identified + owners = ["cormac", "samia", "william", "cindy", "brian", "tai"] + owner_count = sum(1 for o in owners if o in content_lower) + scores["owners_identified"] = 1.0 if owner_count >= 3 else (0.5 if owner_count >= 2 else 0.0) + + # Corporate events actions + events_patterns = [ + r'(?:campaign\s*manager|event|reinvent|google\s*next|kubecon)', + r'(?:commit|sign\s*up|sponsor)', + ] + events_hits = sum(1 for p in events_patterns if re.search(p, content_lower)) + scores["events_actions"] = 1.0 if events_hits >= 2 else (0.5 if events_hits >= 1 else 0.0) + + # Product announcements actions + announce_patterns = [ + r'(?:top\s*(?:five|5)|product\s*announce|feature)', + r'(?:plan|cormac)', + ] + announce_hits = sum(1 for p in announce_patterns if re.search(p, content_lower)) + scores["announcements_actions"] = 1.0 if announce_hits >= 2 else (0.5 if announce_hits >= 1 else 0.0) + + # Competitive analysis actions + competitive_patterns = [ + r'(?:tier\s*(?:one|1)|competitor)', + r'(?:gitlab\s*(?:line|row|column)|add\s*gitlab)', + ] + competitive_hits = sum(1 for p in competitive_patterns if re.search(p, content_lower)) + scores["competitive_actions"] = 1.0 if competitive_hits >= 2 else (0.5 if competitive_hits >= 1 else 0.0) + + # Messaging framework actions + messaging_patterns = [ + r'(?:messag|tagline|framework)', + r'(?:more\s*speed\s*less\s*risk|finalize)', + ] + messaging_hits = sum(1 for p in messaging_patterns if re.search(p, content_lower)) + scores["messaging_actions"] = 1.0 if messaging_hits >= 2 else (0.5 if messaging_hits >= 1 else 0.0) + + # Check grouping by topic (look for headers or clear sections) + headers = re.findall(r'(?:^|\n)#+\s+.+|(?:^|\n)\*\*.+\*\*', content) + scores["grouped_by_topic"] = 1.0 if len(headers) >= 3 else (0.5 if len(headers) >= 2 else 0.0) + + # Check deadlines + deadline_patterns = [r'tuesday', r'end\s*of\s*(?:the\s*)?day', r'due', r'deadline'] + deadline_hits = sum(1 for p in deadline_patterns if re.search(p, content_lower)) + scores["deadlines_noted"] = 1.0 if deadline_hits >= 2 else (0.5 if deadline_hits >= 1 else 0.0) + + return scores +``` + +--- + +## LLM Judge Rubric + +### Criterion 1: Completeness of Action Item Extraction (Weight: 35%) + +**Score 1.0**: Identifies all major action items across all discussion topics (events, announcements, competitive, messaging). Captures both explicit assignments and implicit commitments. +**Score 0.75**: Captures most action items but misses one or two minor ones. +**Score 0.5**: Captures action items from some topics but misses entire topic areas. +**Score 0.25**: Only captures a few obvious action items. +**Score 0.0**: No meaningful action items extracted. + +### Criterion 2: Owner Attribution Accuracy (Weight: 30%) + +**Score 1.0**: Correctly identifies the responsible person for each action item. Names are accurately attributed from the conversation context. +**Score 0.75**: Most owners are correctly identified with one or two minor misattributions. +**Score 0.5**: Some owners identified but several are wrong or missing. +**Score 0.25**: Owners are mostly missing or incorrectly attributed. +**Score 0.0**: No owner attribution attempted. + +### Criterion 3: Organization and Structure (Weight: 20%) + +**Score 1.0**: Action items are clearly grouped by topic, formatted consistently, and include a summary count. Easy to scan and use as a follow-up checklist. +**Score 0.75**: Well-organized with minor formatting inconsistencies. +**Score 0.5**: Some organization but items are mixed across topics or hard to follow. +**Score 0.25**: Minimal organization, items listed without grouping. +**Score 0.0**: No structure or organization. + +### Criterion 4: Context and Deadline Accuracy (Weight: 15%) + +**Score 1.0**: Deadlines are noted where mentioned (Tuesday for announcements, end of day for messaging). Context for each action item is accurate and helpful. +**Score 0.75**: Most deadlines and context captured with minor omissions. +**Score 0.5**: Some context provided but deadlines are missing or inaccurate. +**Score 0.25**: Minimal context, no deadlines. +**Score 0.0**: No context or deadline information. diff --git a/tasks/task_meeting_tech_competitors.md b/tasks/task_meeting_tech_competitors.md new file mode 100644 index 0000000..6a6bee4 --- /dev/null +++ b/tasks/task_meeting_tech_competitors.md @@ -0,0 +1,216 @@ +--- +id: task_meeting_tech_competitors +name: Meeting Competitor Analysis Extraction +category: meeting +grading_type: hybrid +timeout_seconds: 180 +grading_weights: + automated: 0.6 + llm_judge: 0.4 +workspace_files: + - source: meetings/2021-06-28-gitlab-product-marketing-meeting.md + dest: meeting_transcript.md +--- + +## Prompt + +I have a file `meeting_transcript.md` containing a transcript of a GitLab Product Marketing weekly meeting from June 28, 2021. Part of the meeting involves detailed discussion about competitive positioning, a comparison spreadsheet, and how to present GitLab against competitors. + +Please analyze the transcript and create a file called `competitor_analysis.md` that covers: + +1. **Competitors mentioned**: List every competitor or competing product mentioned, with their tier classification if discussed +2. **Competitive positioning approach**: How does the team plan to position GitLab against these competitors? +3. **Stage-specific competitor mapping**: Which competitors apply to which product stages (as discussed)? +4. **Methodology decisions**: What decisions were made about how to conduct the competitive analysis (e.g., which tiers to focus on, how to handle stages where a competitor doesn't apply)? +5. **Key competitive insights**: Any strategic insights about specific competitors mentioned in the discussion + +--- + +## Expected Behavior + +The agent should: + +1. Read the meeting transcript carefully +2. Extract all competitor names and classify them + +Key competitors and details: + +- **Tier 1 competitors** (the focus of current analysis): Azure DevOps (ADO), Atlassian, GitHub, Jenkins, JFrog, CloudBees +- **Tier classification**: The team previously tiered competitors into tier 1, 2, and 3; currently focusing only on tier 1 +- **Stage relevance**: Not all tier 1 competitors apply to all stages (e.g., CloudBees may not be relevant for Monitor; GitHub may not be relevant for Monitor) +- **Platform positioning**: Some competitors (Azure DevOps, GitHub, Atlassian, JFrog) position themselves as platforms; Jenkins/CloudBees do not +- **Methodology**: The competitive spreadsheet was copy-pasted with the same 6 competitors on every stage tab, which was identified as wrong — only relevant competitors should appear per stage +- **GitLab line item**: The team decided to add a GitLab row to the comparison so the sheet doesn't only show what GitLab is better at +- **Feature selection lens**: Features should be chosen from a market lens (what buyers shop for), not a GitLab-only lens; should include some competitor-only features for honesty +- **Infographic approach**: New design uses green-only colors (no red) to appear as a helpful industry comparison rather than a competitive attack piece + +--- + +## Grading Criteria + +- [ ] File `competitor_analysis.md` is created +- [ ] All tier 1 competitors listed (ADO/Azure DevOps, Atlassian, GitHub, Jenkins, JFrog, CloudBees) +- [ ] Tier classification system mentioned (tier 1, 2, 3) +- [ ] Stage-specific relevance discussed (not all competitors apply to all stages) +- [ ] Platform vs non-platform distinction captured (GitHub, ADO, Atlassian, JFrog as platforms; Jenkins/CloudBees not) +- [ ] Decision to add GitLab line item captured +- [ ] Market lens approach for feature selection noted +- [ ] Infographic design philosophy captured (green-only, comparison not attack) +- [ ] Methodology decision about focusing on tier 1 first captured + +--- + +## Automated Checks + +```python +def grade(transcript: list, workspace_path: str) -> dict: + """ + Grade the meeting competitor analysis extraction task. + + Args: + transcript: Parsed JSONL transcript as list of dicts + workspace_path: Path to the task's isolated workspace directory + + Returns: + Dict mapping criterion names to scores (0.0 to 1.0) + """ + from pathlib import Path + import re + + scores = {} + workspace = Path(workspace_path) + + report_path = workspace / "competitor_analysis.md" + if not report_path.exists(): + for alt in ["competitors.md", "competitive_analysis.md", "competitor_report.md"]: + alt_path = workspace / alt + if alt_path.exists(): + report_path = alt_path + break + + if not report_path.exists(): + return { + "report_created": 0.0, + "tier1_competitors": 0.0, + "tier_system": 0.0, + "stage_relevance": 0.0, + "platform_distinction": 0.0, + "gitlab_line_item": 0.0, + "market_lens": 0.0, + "infographic_philosophy": 0.0, + "tier1_focus": 0.0, + } + + scores["report_created"] = 1.0 + content = report_path.read_text() + content_lower = content.lower() + + # Tier 1 competitors listed + competitors = { + "ado": [r'(?:azure\s*devops|ado)'], + "atlassian": [r'atlassian'], + "github": [r'github'], + "jenkins": [r'jenkins'], + "jfrog": [r'jfrog|j\s*frog'], + "cloudbees": [r'cloudbees|cloud\s*bees'], + } + comp_count = 0 + for name, patterns in competitors.items(): + if any(re.search(p, content_lower) for p in patterns): + comp_count += 1 + scores["tier1_competitors"] = 1.0 if comp_count >= 5 else (0.75 if comp_count >= 4 else (0.5 if comp_count >= 3 else 0.0)) + + # Tier classification system + tier_patterns = [r'tier\s*(?:one|1|two|2|three|3)', r'tier[- ]?\d'] + scores["tier_system"] = 1.0 if any(re.search(p, content_lower) for p in tier_patterns) else 0.0 + + # Stage-specific relevance + stage_patterns = [ + r'(?:not\s*(?:all|every)|relevant|applicable|apply).*(?:stage|monitor|configure)', + r'(?:stage|monitor|configure).*(?:not\s*(?:all|every)|relevant|applicable)', + ] + scores["stage_relevance"] = 1.0 if any(re.search(p, content_lower) for p in stage_patterns) else 0.0 + + # Platform distinction + platform_patterns = [ + r'platform', + ] + non_platform_patterns = [ + r'(?:jenkins|cloudbees).*(?:not|don.t|doesn.t).*platform', + r'(?:not|don.t|doesn.t).*platform.*(?:jenkins|cloudbees)', + ] + has_platform = any(re.search(p, content_lower) for p in platform_patterns) + has_non_platform = any(re.search(p, content_lower) for p in non_platform_patterns) + scores["platform_distinction"] = 1.0 if has_platform and has_non_platform else (0.5 if has_platform else 0.0) + + # GitLab line item decision + gitlab_patterns = [ + r'(?:add|include).*gitlab.*(?:line|row|column|entry)', + r'gitlab.*(?:line|row|column|entry).*(?:add|include)', + r'(?:add|include).*(?:line|row|column|entry).*gitlab', + ] + scores["gitlab_line_item"] = 1.0 if any(re.search(p, content_lower) for p in gitlab_patterns) else 0.0 + + # Market lens approach + market_patterns = [ + r'market\s*lens', + r'(?:buyer|customer|shopp)', + r'(?:honest|trustworthy|accurate\s*assessment)', + ] + market_hits = sum(1 for p in market_patterns if re.search(p, content_lower)) + scores["market_lens"] = 1.0 if market_hits >= 2 else (0.5 if market_hits >= 1 else 0.0) + + # Infographic philosophy + infographic_patterns = [ + r'(?:green|color).*(?:comparison|helpful|industry)', + r'(?:no\s*red|without\s*red|avoid\s*red)', + r'(?:comparison|helpful).*(?:not\s*(?:competitive|attack))', + ] + infographic_hits = sum(1 for p in infographic_patterns if re.search(p, content_lower)) + scores["infographic_philosophy"] = 1.0 if infographic_hits >= 2 else (0.5 if infographic_hits >= 1 else 0.0) + + # Tier 1 focus decision + focus_patterns = [ + r'(?:focus|only|just).*tier\s*(?:one|1)', + r'tier\s*(?:one|1).*(?:focus|first|now|current)', + ] + scores["tier1_focus"] = 1.0 if any(re.search(p, content_lower) for p in focus_patterns) else 0.0 + + return scores +``` + +--- + +## LLM Judge Rubric + +### Criterion 1: Competitor Identification Completeness (Weight: 30%) + +**Score 1.0**: All six tier 1 competitors are identified (Azure DevOps, Atlassian, GitHub, Jenkins, JFrog, CloudBees) with accurate tier classification and discussion of the tier system. +**Score 0.75**: Most competitors identified with tier information, missing one or two. +**Score 0.5**: Several competitors identified but tier system not well explained. +**Score 0.25**: Only a few competitors mentioned without classification. +**Score 0.0**: No competitors identified. + +### Criterion 2: Strategic Positioning Analysis (Weight: 30%) + +**Score 1.0**: Captures the full nuance of GitLab's positioning approach: market lens for features, platform vs. non-platform distinction, green-only infographic for comparison (not attack), adding GitLab row for honest comparison, and the goal of creating a helpful industry resource. +**Score 0.75**: Captures most positioning elements with minor gaps. +**Score 0.5**: Some positioning insights but misses key strategic decisions. +**Score 0.25**: Surface-level mention of competitive positioning. +**Score 0.0**: No positioning analysis. + +### Criterion 3: Stage-Specific Mapping (Weight: 20%) + +**Score 1.0**: Clearly explains that not all competitors apply to all stages, gives specific examples (e.g., CloudBees/GitHub not relevant for Monitor), and notes the spreadsheet copy-paste issue that was identified. +**Score 0.75**: Good understanding of stage relevance with minor gaps. +**Score 0.5**: Mentions stage relevance but lacks specifics. +**Score 0.25**: Vague reference to stages. +**Score 0.0**: No stage-specific analysis. + +### Criterion 4: Methodology Clarity (Weight: 20%) + +**Score 1.0**: Clearly documents the agreed methodology: tier 1 focus first, then expand; only relevant competitors per stage; features from market lens; add GitLab for comparison. Presents this as actionable guidance. +**Score 0.75**: Most methodology decisions captured. +**Score 0.5**: Some methodology mentioned but incomplete. +**Score 0.25**: Vague methodology description. +**Score 0.0**: No methodology documented. diff --git a/tasks/task_meeting_tech_decisions.md b/tasks/task_meeting_tech_decisions.md new file mode 100644 index 0000000..e1e73ac --- /dev/null +++ b/tasks/task_meeting_tech_decisions.md @@ -0,0 +1,196 @@ +--- +id: task_meeting_tech_decisions +name: Meeting Decisions Extraction +category: meeting +grading_type: hybrid +timeout_seconds: 180 +grading_weights: + automated: 0.6 + llm_judge: 0.4 +workspace_files: + - source: meetings/2021-06-28-gitlab-product-marketing-meeting.md + dest: meeting_transcript.md +--- + +## Prompt + +I have a file `meeting_transcript.md` containing a transcript of a GitLab Product Marketing weekly meeting from June 28, 2021. The meeting covers corporate events sponsorship, product announcements for GitLab Commit, competitive analysis methodology, infographic design feedback, and a messaging framework exercise. + +Please identify all decisions that were made (or consensus reached) during this meeting and write them to a file called `decisions.md`. For each decision, include: + +- **Decision** (what was decided) +- **Context** (brief background on why this was discussed) +- **Participants involved** (who weighed in) +- **Status** (final, tentative, or needs follow-up) + +Also include a summary at the top with the total number of decisions and which ones may need further confirmation. + +--- + +## Expected Behavior + +The agent should: + +1. Read and parse the meeting transcript +2. Distinguish between decisions (conclusions reached) and open discussions +3. Capture the context behind each decision + +Key decisions that should be identified: + +1. **Event assignments**: Platform team → AWS re:Invent, CI/CD → Google Next, GitOps → KubeCon +2. **Product announcements approach**: Bundle small MVCs into larger themes (e.g., vulnerability management) rather than listing individual features; reuse GitLab 14.0 content plus new additions +3. **Top 5 feature selection**: Team to select top 5 overall features from all stages for the Commit keynote (rather than 3 per stage) +4. **Competitive sheet methodology**: Use only tier one competitors; only include competitors relevant to each specific stage (not all 5 for every stage); add a GitLab line item for comparison +5. **Infographic colors**: Keep the green-only color scheme (no red/yellow) to maintain a comparison tone rather than competitive attack +6. **Messaging tagline selection**: "More speed less risk" chosen as the final tagline (over alternatives like "move fast with confidence", "no trade-offs", etc.) +7. **Stage naming**: Acknowledged stages like "configure" and "monitor" are not descriptive enough but will keep them for now; the next iteration will add click-through detail pages + +--- + +## Grading Criteria + +- [ ] File `decisions.md` is created +- [ ] At least 5 distinct decisions identified +- [ ] Event assignment decisions captured (re:Invent, Google Next, KubeCon) +- [ ] Product announcement bundling approach captured +- [ ] Competitive methodology decisions captured (tier one only, relevant stages, GitLab row) +- [ ] Messaging tagline decision captured ("more speed less risk") +- [ ] Infographic color decision captured (green only, no red) +- [ ] Context provided for each decision +- [ ] Decision status indicated (final vs needs follow-up) + +--- + +## Automated Checks + +```python +def grade(transcript: list, workspace_path: str) -> dict: + """ + Grade the meeting decisions extraction task. + + Args: + transcript: Parsed JSONL transcript as list of dicts + workspace_path: Path to the task's isolated workspace directory + + Returns: + Dict mapping criterion names to scores (0.0 to 1.0) + """ + from pathlib import Path + import re + + scores = {} + workspace = Path(workspace_path) + + report_path = workspace / "decisions.md" + if not report_path.exists(): + for alt in ["meeting_decisions.md", "decision_log.md", "decisions_summary.md"]: + alt_path = workspace / alt + if alt_path.exists(): + report_path = alt_path + break + + if not report_path.exists(): + return { + "report_created": 0.0, + "min_decisions": 0.0, + "event_assignments": 0.0, + "announcement_approach": 0.0, + "competitive_methodology": 0.0, + "messaging_tagline": 0.0, + "infographic_colors": 0.0, + "context_provided": 0.0, + "status_indicated": 0.0, + } + + scores["report_created"] = 1.0 + content = report_path.read_text() + content_lower = content.lower() + + # Check minimum number of decisions + decision_markers = re.findall(r'(?:^|\n)\s*(?:[-*•]|\d+[.)]) .{10,}', content) + headers = re.findall(r'(?:^|\n)#+\s+.+', content) + scores["min_decisions"] = 1.0 if len(decision_markers) >= 5 or len(headers) >= 5 else (0.5 if len(decision_markers) >= 3 or len(headers) >= 3 else 0.0) + + # Event assignments + event_terms = ["reinvent", "re:invent", "google next", "kubecon"] + event_hits = sum(1 for t in event_terms if t in content_lower) + scores["event_assignments"] = 1.0 if event_hits >= 2 else (0.5 if event_hits >= 1 else 0.0) + + # Announcement approach (bundling/grouping MVCs) + announce_patterns = [ + r'(?:bundle|group|bucket|aggregat)', + r'(?:vulnerability\s*management|mvc|14\.0)', + ] + announce_hits = sum(1 for p in announce_patterns if re.search(p, content_lower)) + scores["announcement_approach"] = 1.0 if announce_hits >= 2 else (0.5 if announce_hits >= 1 else 0.0) + + # Competitive methodology + comp_patterns = [ + r'(?:tier\s*(?:one|1))', + r'(?:gitlab\s*(?:line|row|column)|add\s*gitlab|gitlab\s*comparison)', + r'(?:relevant|applicable).*(?:stage|competitor)', + ] + comp_hits = sum(1 for p in comp_patterns if re.search(p, content_lower)) + scores["competitive_methodology"] = 1.0 if comp_hits >= 2 else (0.5 if comp_hits >= 1 else 0.0) + + # Messaging tagline + tagline_patterns = [ + r'more\s*speed\s*less\s*risk', + ] + scores["messaging_tagline"] = 1.0 if any(re.search(p, content_lower) for p in tagline_patterns) else 0.0 + + # Infographic colors + color_patterns = [ + r'(?:green|color).*(?:no\s*red|without\s*red|comparison)', + r'(?:no\s*red|avoid\s*red|stick\s*with\s*green)', + ] + scores["infographic_colors"] = 1.0 if any(re.search(p, content_lower) for p in color_patterns) else 0.0 + + # Context provided (look for explanatory text around decisions) + context_patterns = [r'(?:because|reason|background|context|rationale|why|since|given\s*that)', r'(?:discuss|debate|consider|weigh)'] + context_hits = sum(1 for p in context_patterns if re.search(p, content_lower)) + scores["context_provided"] = 1.0 if context_hits >= 2 else (0.5 if context_hits >= 1 else 0.0) + + # Status indicated + status_patterns = [r'(?:final|tentative|follow.?up|confirmed|pending|needs?\s*(?:confirmation|follow))', r'(?:status|resolved|agreed|consensus)'] + status_hits = sum(1 for p in status_patterns if re.search(p, content_lower)) + scores["status_indicated"] = 1.0 if status_hits >= 2 else (0.5 if status_hits >= 1 else 0.0) + + return scores +``` + +--- + +## LLM Judge Rubric + +### Criterion 1: Decision Identification Accuracy (Weight: 35%) + +**Score 1.0**: Identifies all major decisions including event assignments, announcement approach, competitive methodology, infographic colors, and messaging tagline. Correctly distinguishes decisions from ongoing discussions. +**Score 0.75**: Captures most decisions but misses one or includes an item that was discussed but not decided. +**Score 0.5**: Captures some decisions but misses several or conflates discussions with decisions. +**Score 0.25**: Only identifies one or two obvious decisions. +**Score 0.0**: No meaningful decisions identified. + +### Criterion 2: Context Quality (Weight: 25%) + +**Score 1.0**: Each decision includes accurate context explaining what prompted the discussion, what alternatives were considered, and why the chosen option was selected. +**Score 0.75**: Most decisions have good context with minor gaps. +**Score 0.5**: Some context provided but missing key reasoning or alternatives considered. +**Score 0.25**: Minimal context, decisions listed without explanation. +**Score 0.0**: No context provided. + +### Criterion 3: Participant Attribution (Weight: 20%) + +**Score 1.0**: Correctly identifies who advocated for each position and who was involved in reaching consensus. Names are accurately drawn from the transcript. +**Score 0.75**: Most participants correctly identified with minor errors. +**Score 0.5**: Some participants identified but several are missing or misattributed. +**Score 0.25**: Minimal participant identification. +**Score 0.0**: No participant attribution. + +### Criterion 4: Structure and Usefulness (Weight: 20%) + +**Score 1.0**: Decisions are clearly formatted with consistent structure, include a useful summary, and indicate which decisions are final vs. needing follow-up. Could be used directly as meeting minutes. +**Score 0.75**: Well-structured with minor formatting issues. +**Score 0.5**: Readable but inconsistently structured or missing summary. +**Score 0.25**: Poorly organized, hard to use as reference. +**Score 0.0**: No usable structure. diff --git a/tasks/task_meeting_tech_messaging.md b/tasks/task_meeting_tech_messaging.md new file mode 100644 index 0000000..693b9ad --- /dev/null +++ b/tasks/task_meeting_tech_messaging.md @@ -0,0 +1,240 @@ +--- +id: task_meeting_tech_messaging +name: Meeting Messaging Framework Extraction +category: meeting +grading_type: hybrid +timeout_seconds: 180 +grading_weights: + automated: 0.6 + llm_judge: 0.4 +workspace_files: + - source: meetings/2021-06-28-gitlab-product-marketing-meeting.md + dest: meeting_transcript.md +--- + +## Prompt + +I have a file `meeting_transcript.md` containing a transcript of a GitLab Product Marketing weekly meeting from June 28, 2021. Toward the end of the meeting, the team does a collaborative messaging exercise to develop taglines and messaging pillars for GitLab. + +Please extract and organize all messaging framework options discussed in the meeting and write them to a file called `messaging_framework.md`. Your output should include: + +1. **All candidate taglines/phrases** that were proposed, grouped by which messaging pillar they relate to +2. **The evaluation criteria** the team used (e.g., parity of tone between phrases, catchiness, noun phrases vs verb phrases) +3. **The pros and cons discussed** for each option +4. **The final selections** (what the team decided to go with) +5. **Rejected alternatives** and why they were rejected + +--- + +## Expected Behavior + +The agent should: + +1. Parse the transcript to find the messaging framework discussion +2. Identify the three messaging pillars and all candidate phrases + +Key messaging content: + +**Pillar 1 — Transparency/Single Platform:** +- "From roadmap to company vision, we are transparent" (original) +- "A single source of truth, countless possibilities" (proposed, well-liked) +- "All-in-one for everyone" (proposed, William liked it but acknowledged not everyone's cup of tea) + +**Pillar 2 — End-to-end control:** +- "Automate nearly anything, collaborate on everything" (original) +- "End to end control over your software factory" (proposed, described as less catchy but descriptive) + +**Pillar 3 — Speed/Security:** +- "Scale up, speed up, test up" (original — William hated it, especially "test up") +- "Move fast with confidence" (proposed — liked the sentiment but poor parity with other noun phrases) +- "More speed less risk" (proposed — team consensus favorite) +- "Increase speed and stay on track" (proposed — not liked) +- "Velocity with confidence" (mentioned as a concept) +- "No trade-offs" (briefly entertained, rejected because engineer mindset says there's always trade-offs) +- "More speed less risk with confidence" (suggested combination) + +**Evaluation criteria discussed:** +- Parity of tone (noun phrases should match noun phrases) +- Catchiness/pithiness +- Technical accuracy (no absolute claims like "no trade-offs" or "100% secure") +- Ability to use in parallel construction ("With GitLab you get X") + +**Final selections:** +- "A single source of truth, countless possibilities" +- "End to end control over your software factory" +- "More speed less risk" + +**Security sub-tagline:** +- "Secure the factory and its deliverables" (credited to Cindy's blog post) + +--- + +## Grading Criteria + +- [ ] File `messaging_framework.md` is created +- [ ] All three messaging pillars identified +- [ ] Multiple candidate phrases listed per pillar +- [ ] "More speed less risk" identified as the chosen tagline for speed/security +- [ ] "A single source of truth, countless possibilities" identified as chosen for transparency pillar +- [ ] Rejected alternatives listed with reasons (e.g., "test up" rejected, "no trade-offs" rejected) +- [ ] Evaluation criteria captured (parity/tone, catchiness, accuracy) +- [ ] "Secure the factory and its deliverables" captured as security sub-tagline +- [ ] Final selections clearly distinguished from alternatives + +--- + +## Automated Checks + +```python +def grade(transcript: list, workspace_path: str) -> dict: + """ + Grade the meeting messaging framework extraction task. + + Args: + transcript: Parsed JSONL transcript as list of dicts + workspace_path: Path to the task's isolated workspace directory + + Returns: + Dict mapping criterion names to scores (0.0 to 1.0) + """ + from pathlib import Path + import re + + scores = {} + workspace = Path(workspace_path) + + report_path = workspace / "messaging_framework.md" + if not report_path.exists(): + for alt in ["messaging.md", "framework.md", "taglines.md", "messaging_options.md"]: + alt_path = workspace / alt + if alt_path.exists(): + report_path = alt_path + break + + if not report_path.exists(): + return { + "report_created": 0.0, + "three_pillars": 0.0, + "multiple_candidates": 0.0, + "chosen_speed_tagline": 0.0, + "chosen_transparency_tagline": 0.0, + "rejected_alternatives": 0.0, + "evaluation_criteria": 0.0, + "security_subtagline": 0.0, + "finals_distinguished": 0.0, + } + + scores["report_created"] = 1.0 + content = report_path.read_text() + content_lower = content.lower() + + # Three pillars identified + pillar_terms = [ + [r'(?:transparen|single\s*(?:source|platform)|all.in.one)'], + [r'(?:end.to.end|control|automat)'], + [r'(?:speed|security|risk|velocity|confidence)'], + ] + pillar_count = sum(1 for terms in pillar_terms if any(re.search(p, content_lower) for p in terms)) + scores["three_pillars"] = 1.0 if pillar_count >= 3 else (0.5 if pillar_count >= 2 else 0.0) + + # Multiple candidate phrases + candidate_phrases = [ + r'single\s*source\s*of\s*truth', + r'all.in.one\s*for\s*everyone', + r'end\s*to\s*end\s*control', + r'more\s*speed\s*less\s*risk', + r'move\s*fast\s*with\s*confidence', + r'scale\s*up.*speed\s*up.*test\s*up', + r'no\s*trade.?offs?', + r'automate\s*nearly\s*anything', + r'countless\s*possibilities', + r'velocity\s*with\s*confidence', + ] + phrase_count = sum(1 for p in candidate_phrases if re.search(p, content_lower)) + scores["multiple_candidates"] = 1.0 if phrase_count >= 6 else (0.75 if phrase_count >= 4 else (0.5 if phrase_count >= 3 else 0.0)) + + # "More speed less risk" as chosen + chosen_patterns = [ + r'more\s*speed\s*less\s*risk.*(?:chosen|selected|final|decided|winner|preferred|went\s*with)', + r'(?:chosen|selected|final|decided|winner|preferred|went\s*with).*more\s*speed\s*less\s*risk', + r'more\s*speed\s*less\s*risk', + ] + scores["chosen_speed_tagline"] = 1.0 if any(re.search(p, content_lower) for p in chosen_patterns[:2]) else (0.5 if re.search(chosen_patterns[2], content_lower) else 0.0) + + # "A single source of truth, countless possibilities" as chosen + transparency_patterns = [ + r'single\s*source\s*of\s*truth.*countless\s*possibilities', + ] + scores["chosen_transparency_tagline"] = 1.0 if any(re.search(p, content_lower) for p in transparency_patterns) else 0.0 + + # Rejected alternatives with reasons + rejected_patterns = [ + r'(?:test\s*up|scale\s*up.*test\s*up).*(?:reject|hate|goofy|bad|dislike|didn.t\s*like)', + r'(?:reject|hate|goofy|bad|dislike|didn.t\s*like).*(?:test\s*up|scale\s*up.*test\s*up)', + r'no\s*trade.?offs?.*(?:reject|never|always|engineer)', + r'(?:reject|never|always|engineer).*no\s*trade.?offs?', + ] + rejected_hits = sum(1 for p in rejected_patterns if re.search(p, content_lower)) + scores["rejected_alternatives"] = 1.0 if rejected_hits >= 2 else (0.5 if rejected_hits >= 1 else 0.0) + + # Evaluation criteria + criteria_patterns = [ + r'(?:pari(?:ty|del)|tone|parallel)', + r'(?:catch|pith|punch)', + r'(?:noun\s*phrase|verb\s*phrase|construction)', + ] + criteria_hits = sum(1 for p in criteria_patterns if re.search(p, content_lower)) + scores["evaluation_criteria"] = 1.0 if criteria_hits >= 2 else (0.5 if criteria_hits >= 1 else 0.0) + + # Security sub-tagline + security_patterns = [ + r'secure\s*the\s*factory.*deliverables', + ] + scores["security_subtagline"] = 1.0 if any(re.search(p, content_lower) for p in security_patterns) else 0.0 + + # Finals clearly distinguished + final_patterns = [ + r'(?:final|selected|chosen|decided|winner)', + r'(?:reject|alternative|considered|discard)', + ] + final_hits = sum(1 for p in final_patterns if re.search(p, content_lower)) + scores["finals_distinguished"] = 1.0 if final_hits >= 2 else (0.5 if final_hits >= 1 else 0.0) + + return scores +``` + +--- + +## LLM Judge Rubric + +### Criterion 1: Completeness of Options Captured (Weight: 30%) + +**Score 1.0**: Lists all proposed taglines/phrases across all three pillars, including originals and alternatives. At least 8-10 distinct phrases captured with correct pillar grouping. +**Score 0.75**: Most phrases captured with correct grouping, missing one or two. +**Score 0.5**: Several phrases captured but missing entire alternatives or a pillar. +**Score 0.25**: Only captures the final selections without alternatives. +**Score 0.0**: No meaningful options captured. + +### Criterion 2: Evaluation Criteria and Reasoning (Weight: 25%) + +**Score 1.0**: Accurately captures the evaluation framework: parity of tone, catchiness, technical accuracy, parallel construction ability. Includes specific reasoning for rejections (e.g., "test up" sounds goofy, "no trade-offs" contradicts engineering mindset, "move fast with confidence" lacks noun-phrase parity). +**Score 0.75**: Most evaluation criteria captured with good reasoning. +**Score 0.5**: Some criteria mentioned but reasoning is incomplete. +**Score 0.25**: Minimal reasoning provided. +**Score 0.0**: No evaluation criteria or reasoning. + +### Criterion 3: Final vs. Rejected Distinction (Weight: 25%) + +**Score 1.0**: Clearly separates final selections from rejected alternatives. Easy to identify what was chosen and what was discarded. +**Score 0.75**: Good distinction with minor ambiguity. +**Score 0.5**: Some distinction but reader has to infer which are final. +**Score 0.25**: All options mixed together without clear resolution. +**Score 0.0**: No distinction made. + +### Criterion 4: Nuance and Attribution (Weight: 20%) + +**Score 1.0**: Captures who proposed what, includes the Cindy blog post credit for "secure the factory and its deliverables," notes the Ash Withers inspiration, and reflects the collaborative dynamic of the exercise. +**Score 0.75**: Good attribution with minor gaps. +**Score 0.5**: Some attribution but misses key contributions. +**Score 0.25**: No attribution of ideas to individuals. +**Score 0.0**: No nuance or attribution. diff --git a/tasks/task_meeting_tech_product_features.md b/tasks/task_meeting_tech_product_features.md new file mode 100644 index 0000000..675b5cc --- /dev/null +++ b/tasks/task_meeting_tech_product_features.md @@ -0,0 +1,228 @@ +--- +id: task_meeting_tech_product_features +name: Meeting Product Feature Prioritization +category: meeting +grading_type: hybrid +timeout_seconds: 180 +grading_weights: + automated: 0.6 + llm_judge: 0.4 +workspace_files: + - source: meetings/2021-06-28-gitlab-product-marketing-meeting.md + dest: meeting_transcript.md +--- + +## Prompt + +I have a file `meeting_transcript.md` containing a transcript of a GitLab Product Marketing weekly meeting from June 28, 2021. A significant portion of the meeting discusses which product features and improvements to highlight at their upcoming GitLab Commit conference. + +Please analyze the transcript and create a file called `feature_priorities.md` containing a prioritized list of features discussed. For each feature, include: + +1. **Feature name** +2. **Stage/area** (e.g., Create, Secure, Monitor, Plan, CI/CD, GitOps, etc.) +3. **Excitement level** (1-3 as discussed in the meeting, where 3 is most exciting) +4. **Description** (what the feature does or why it matters) +5. **Notes** (any discussion points, such as whether it was community-contributed, already had press coverage, etc.) + +Also include: +- The team's **top 5 overall picks** for the Commit keynote +- The **methodology** they agreed on for evaluating features (bundling MVCs, excitement levels, stack ranking) +- Any features that were **explicitly deprioritized** and why + +--- + +## Expected Behavior + +The agent should: + +1. Parse the meeting transcript to find all feature discussions +2. Extract and organize features with their metadata + +Key features discussed: + +**Top 5 / High Priority:** +1. **UX improvements** (cross-stage) — Excitement: 3. Brian noted this maps beyond just Create stage. Common "big launch" category per industry practice. +2. **Vulnerability management** (Secure) — Excitement: high. Cindy proposed bundling small MVCs over the year into one narrative. Already part of 14.0 announcements. +3. **GitOps capabilities** (Configure/GitOps) — Kubernetes agent + HashiCorp/Terraform integrations bundled together. +4. **Pipeline editor** (CI/CD) — Mentioned as a definite option for CI/CD stage. +5. **Something from Plan** — Cormac assigned to add; epic boards mentioned (long-requested feature), milestone burnup charts mentioned. + +**Other notable features:** +- **Fuzzing acquisitions** (Secure) — Deprioritized because already had press coverage twice (acquisition announcement + integration follow-up); "worn out" +- **Semgrep scanner replacement** (Secure) — Replaced an existing scanner; could be a line item +- **DAST Browser scanner** (Secure) — Proprietary DAST for single-page applications, in beta; called "Berserker" +- **VS Code integrations** (Create) — Two integrations that were community contributions; the unofficial extension becoming official +- **Terraform module** (Configure) — Community module now officially supported +- **Value Stream Analytics** (Plan) — Potential story but customizable version was from 12.9 (too old) +- **Incident management** (Monitor) — Bundle of improvements, though core was from earlier + +**Methodology:** +- Bundle small MVCs into larger themes rather than listing individual features +- Use excitement levels 1-3 as stack rank (one each, not raw scores) +- Look back over past year for features that started as beta and are now GA-ready +- Reuse GitLab 14.0 announcements plus additions +- Target: top 5 overall for PR team to use as keynote fodder + +--- + +## Grading Criteria + +- [ ] File `feature_priorities.md` is created +- [ ] At least 8 distinct features or feature bundles listed +- [ ] UX improvements identified as a top pick +- [ ] Vulnerability management identified as a top pick +- [ ] GitOps/Kubernetes agent identified as a top pick +- [ ] Pipeline editor mentioned for CI/CD +- [ ] Fuzzing deprioritized due to prior press coverage +- [ ] Community contributions noted (VS Code, Terraform module) +- [ ] Bundling methodology captured (group MVCs into themes) +- [ ] Top 5 overall picks section included + +--- + +## Automated Checks + +```python +def grade(transcript: list, workspace_path: str) -> dict: + """ + Grade the meeting product feature prioritization task. + + Args: + transcript: Parsed JSONL transcript as list of dicts + workspace_path: Path to the task's isolated workspace directory + + Returns: + Dict mapping criterion names to scores (0.0 to 1.0) + """ + from pathlib import Path + import re + + scores = {} + workspace = Path(workspace_path) + + report_path = workspace / "feature_priorities.md" + if not report_path.exists(): + for alt in ["features.md", "product_features.md", "feature_list.md", "priorities.md"]: + alt_path = workspace / alt + if alt_path.exists(): + report_path = alt_path + break + + if not report_path.exists(): + return { + "report_created": 0.0, + "min_features": 0.0, + "ux_top_pick": 0.0, + "vuln_mgmt_top_pick": 0.0, + "gitops_top_pick": 0.0, + "pipeline_editor": 0.0, + "fuzzing_deprioritized": 0.0, + "community_contributions": 0.0, + "bundling_methodology": 0.0, + "top_five_section": 0.0, + } + + scores["report_created"] = 1.0 + content = report_path.read_text() + content_lower = content.lower() + + # Minimum features listed + feature_markers = re.findall(r'(?:^|\n)\s*(?:[-*•]|\d+[.)]) .{10,}', content) + headers = re.findall(r'(?:^|\n)#+\s+.+', content) + total_items = len(feature_markers) + len(headers) + scores["min_features"] = 1.0 if total_items >= 8 else (0.5 if total_items >= 5 else 0.0) + + # UX as top pick + ux_patterns = [ + r'(?:ux|user\s*experience).*(?:top|high|exciting|priorit|important)', + r'(?:top|high|exciting|priorit|important).*(?:ux|user\s*experience)', + ] + scores["ux_top_pick"] = 1.0 if any(re.search(p, content_lower) for p in ux_patterns) else (0.5 if re.search(r'(?:ux|user\s*experience)', content_lower) else 0.0) + + # Vulnerability management as top pick + vuln_patterns = [ + r'vulnerability\s*management', + ] + scores["vuln_mgmt_top_pick"] = 1.0 if any(re.search(p, content_lower) for p in vuln_patterns) else 0.0 + + # GitOps/K8s agent as top pick + gitops_patterns = [ + r'(?:kubernetes|k8s)\s*agent', + r'gitops', + r'(?:hashicorp|terraform)\s*integrat', + ] + gitops_hits = sum(1 for p in gitops_patterns if re.search(p, content_lower)) + scores["gitops_top_pick"] = 1.0 if gitops_hits >= 2 else (0.5 if gitops_hits >= 1 else 0.0) + + # Pipeline editor + scores["pipeline_editor"] = 1.0 if re.search(r'pipeline\s*editor', content_lower) else 0.0 + + # Fuzzing deprioritized + fuzzing_patterns = [ + r'fuzz.*(?:already|prior|previous|worn|press|cover|depriorit)', + r'(?:already|prior|previous|worn|press|cover|depriorit).*fuzz', + ] + scores["fuzzing_deprioritized"] = 1.0 if any(re.search(p, content_lower) for p in fuzzing_patterns) else (0.5 if re.search(r'fuzz', content_lower) else 0.0) + + # Community contributions + community_patterns = [ + r'communit.*contribut', + r'(?:vs\s*code|vscode).*communit', + r'communit.*(?:vs\s*code|vscode|terraform)', + ] + community_hits = sum(1 for p in community_patterns if re.search(p, content_lower)) + scores["community_contributions"] = 1.0 if community_hits >= 1 else 0.0 + + # Bundling methodology + bundle_patterns = [ + r'(?:bundle|group|bucket|aggregat|roll\s*up)', + r'(?:mvc|small\s*feature|iteration)', + r'(?:theme|narrative|story)', + ] + bundle_hits = sum(1 for p in bundle_patterns if re.search(p, content_lower)) + scores["bundling_methodology"] = 1.0 if bundle_hits >= 2 else (0.5 if bundle_hits >= 1 else 0.0) + + # Top 5 section + top5_patterns = [ + r'top\s*(?:five|5)', + ] + scores["top_five_section"] = 1.0 if any(re.search(p, content_lower) for p in top5_patterns) else 0.0 + + return scores +``` + +--- + +## LLM Judge Rubric + +### Criterion 1: Feature Extraction Completeness (Weight: 30%) + +**Score 1.0**: Lists all discussed features including UX improvements, vulnerability management, GitOps/K8s agent, pipeline editor, fuzzing, Semgrep, DAST Browser (Berserker), VS Code integrations, Terraform module, value stream analytics, incident management, and epic boards. Each has accurate stage assignment. +**Score 0.75**: Most features captured with correct stage assignments, missing one or two. +**Score 0.5**: Main features captured but several lesser ones missing. +**Score 0.25**: Only the most obvious features captured. +**Score 0.0**: No meaningful features extracted. + +### Criterion 2: Prioritization Accuracy (Weight: 30%) + +**Score 1.0**: Correctly identifies the emerging top 5 (UX, vulnerability management, GitOps bundle, CI/CD, Plan TBD). Accurately reflects excitement levels and the team's reasoning for rankings. Notes which features were deprioritized (fuzzing due to prior press) and which need follow-up. +**Score 0.75**: Top picks mostly correct with minor ordering issues. +**Score 0.5**: Some prioritization present but misses key rankings or reasoning. +**Score 0.25**: Features listed without meaningful prioritization. +**Score 0.0**: No prioritization attempted. + +### Criterion 3: Methodology Documentation (Weight: 20%) + +**Score 1.0**: Clearly explains the agreed methodology: bundle MVCs into themes, use 1-3 excitement as stack rank, look back over past year for beta→GA features, reuse 14.0 content, target top 5 for PR. Notes the shift from "3 per stage" to "top 5 overall." +**Score 0.75**: Most methodology elements captured. +**Score 0.5**: Some methodology mentioned but incomplete. +**Score 0.25**: Vague methodology reference. +**Score 0.0**: No methodology documented. + +### Criterion 4: Context and Detail Quality (Weight: 20%) + +**Score 1.0**: Includes relevant context for each feature: community contribution status, prior press coverage, maturity level, customer demand signals (upvotes, MAU discussion). Notes the connection to GitLab Commit keynote. +**Score 0.75**: Good context with minor gaps. +**Score 0.5**: Some context but many features lack detail. +**Score 0.25**: Minimal context provided. +**Score 0.0**: No context or detail.