github · duksh · May 11, 2026 · May 11, 2026
@@ -7,6 +7,13 @@ on:
   push:
     branches: ["main"]
   pull_request:
+    paths:
+      - "src/**"
+      - "tests/**"
+      - "templates/**"
+      - "workflows/**"
+      - "extensions/**"
+      - "pyproject.toml"
 
 jobs:
   ruff:
@@ -29,6 +36,7 @@ jobs:
   pytest:
     runs-on: ${{ matrix.os }}
     strategy:
+      fail-fast: true
       matrix:
         os: [ubuntu-latest, windows-latest]
         python-version: ["3.11", "3.12", "3.13"]
@@ -38,6 +46,9 @@ jobs:
 
       - name: Install uv
         uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
+        with:
+          enable-cache: true
+          cache-dependency-glob: "**/pyproject.toml"
 
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6

@@ -17,6 +17,21 @@
         "workflow",
         "core"
       ]
+    },
+    "cost-tracker": {
+      "name": "LLM Cost Tracker",
+      "id": "cost-tracker",
+      "version": "1.0.0",
+      "description": "Records actual LLM spend back to the spec's Cost Allocation section and surfaces per-feature budget status",
+      "author": "spec-kit-core",
+      "repository": "https://github.com/github/spec-kit",
+      "bundled": true,
+      "tags": [
+        "finops",
+        "cost",
+        "llm",
+        "budget"
+      ]
     }
   }
 }
@@ -0,0 +1,60 @@
+---
+description: "Update the Actual LLM Spend field in the current spec with the reported spend amount"
+---
+
+# Record LLM Spend
+
+Update the **Actual LLM Spend (USD)** field in the active spec's `## Cost Allocation`
+section to reflect the spend incurred during the most recent command.
+
+## Outline
+
+1. **Locate the active spec file**
+   - Find the spec document for the current feature in `.specify/specs/`.
+   - If no spec file is found, emit a warning and exit — do not create one.
+
+2. **Determine the spend amount**
+   - If invoked as an `after_implement` hook, check whether the integration
+     reported token usage for the completed step.
+   - If token counts are available, convert to USD using the model's published
+     per-token pricing (default: haiku at $0.00025/1K input + $0.00125/1K output).
+   - If token counts are not available, prompt:
+     ```
+     Enter the LLM spend for this step in USD (e.g. 0.04), or press Enter to skip:
+     ```
+     If the user presses Enter or provides a non-numeric value, skip and exit.
+
+3. **Read the current value**
+   - Parse the `## Cost Allocation` table in the spec.
+   - Read the current **Actual LLM Spend (USD)** cell value.
+   - If the cell is absent or contains a placeholder, treat current value as 0.
+
+4. **Add and write back**
+   - New total = current value + spend amount from step 2.
+   - Overwrite the **Actual LLM Spend (USD)** cell with `$<new_total>` (2 decimal places).
+   - Preserve all other table rows exactly.
+
+5. **Threshold check**
+   - Read **Approved LLM Budget (USD)** from the same table.
+   - Compute `pct = new_total / approved * 100`.
+   - If `pct >= 100`: emit
+     ```
+     ⛔ Budget exceeded: $<new_total> spent of $<approved> approved (<pct>%).
+     Consider pausing and reviewing with your team before continuing.
+     ```
+   - If `pct >= warn_at_pct` (default 80, from config): emit
+     ```
+     ⚠  Budget warning: $<new_total> spent of $<approved> approved (<pct>%).
+     ```
+   - Otherwise: emit
+     ```
+     ✓ Spend recorded: $<new_total> of $<approved> approved (<pct>% used).
+     ```
+
+## Graceful Degradation
+
+- If the spec has no `## Cost Allocation` section: skip with a one-line warning.
+- If the approved budget field is absent or zero: skip the threshold check; still
+  write the spend value.
+- If the spec file is read-only or the write fails: emit an error message and exit
+  without modifying the file.
@@ -0,0 +1,69 @@
+---
+description: "Show a budget summary table across all specs in the project"
+---
+
+# Cost Report
+
+Display a budget summary table covering every spec in the current project that
+has a `## Cost Allocation` section.
+
+## Outline
+
+1. **Discover specs**
+   - Enumerate all `*.md` files under `.specify/specs/` (non-recursive).
+   - For each file, attempt to parse the `## Cost Allocation` table.
+   - Skip files where the section is absent or cannot be parsed.
+
+2. **Extract fields per spec**
+   For each spec with a Cost Allocation section, read:
+   - **Feature** — the spec filename (without extension) or the first H1 heading
+   - **Team** — from the Team row
+   - **Cost Center** — from the Cost Center row
+   - **Priority** — from the Feature Priority row
+   - **Approved (USD)** — from the Approved LLM Budget row (parse `$X.XX` → float)
+   - **Actual (USD)** — from the Actual LLM Spend row (parse `$X.XX` → float; 0 if placeholder)
+   - **% Used** — compute `actual / approved * 100` (0 if approved is 0)
+
+3. **Render the summary table**
+
+   ```
+   ┌─────────────────────────────────────────────────────────────────────────┐
+   │  LLM Cost Report                                   2024-01-15 14:30 UTC │
+   ├──────────────────────┬───────────┬──────────┬────────┬───────┬──────────┤
+   │ Feature              │ Priority  │ Approved │ Actual │ % Used│ Status   │
+   ├──────────────────────┼───────────┼──────────┼────────┼───────┼──────────┤
+   │ add-login            │ P1        │  $10.00  │  $7.80 │  78%  │ ✓ ok     │
+   │ dark-mode            │ P2        │   $5.00  │  $4.10 │  82%  │ ⚠ warn   │
+   │ data-export          │ P3        │   $3.00  │  $3.50 │ 117%  │ ⛔ over  │
+   ├──────────────────────┼───────────┼──────────┼────────┼───────┼──────────┤
+   │ TOTAL                │           │  $18.00  │ $15.40 │  86%  │ ⚠ warn   │
+   └──────────────────────┴───────────┴──────────┴────────┴───────┴──────────┘
+   ```
+
+   Status legend:
+   - `✓ ok` — below 80% of approved budget
+   - `⚠ warn` — 80–99% of approved budget
+   - `⛔ over` — 100%+ of approved budget
+
+4. **Exit code**
+   - Exit 0 if all features are under budget.
+   - Exit 1 if any feature has exceeded its approved budget (for CI use).
+
+## Options
+
+This command accepts no arguments. Configuration comes from
+`.specify/extensions/cost-tracker/cost-tracker-config.yml`:
+
+```yaml
+warn_at_pct: 80   # Percentage at which ⚠ warning status is shown
+```
+
+## Graceful Degradation
+
+- If `.specify/specs/` does not exist or contains no parseable specs, print:
+  ```
+  No specs with Cost Allocation data found in .specify/specs/.
+  Run /speckit.specify to create a spec, then add a ## Cost Allocation section.
+  ```
+- Specs where **Approved LLM Budget** is absent or zero are listed with
+  `N/A` in the Approved and % Used columns and excluded from the total.
@@ -0,0 +1,15 @@
+# Cost Tracker Extension Configuration
+# Copy to .specify/extensions/cost-tracker/cost-tracker-config.yml and customize.
+
+# Percentage of approved budget at which a warning is emitted (default: 80)
+warn_at_pct: 80
+
+# Currency symbol used in output (display only)
+currency: "USD"
+
+# Token pricing used when converting token counts to USD.
+# Defaults match Anthropic haiku pricing at time of writing.
+# Override if you use a different model or pricing has changed.
+token_pricing:
+  input_per_1k: 0.00025   # USD per 1,000 input tokens
+  output_per_1k: 0.00125  # USD per 1,000 output tokens
@@ -0,0 +1,53 @@
+schema_version: "1.0"
+
+extension:
+  id: "cost-tracker"
+  name: "LLM Cost Tracker"
+  version: "1.0.0"
+  description: "Records actual LLM spend back to the spec's Cost Allocation section and surfaces per-feature budget status"
+  author: "spec-kit-core"
+  repository: "https://github.com/github/spec-kit"
+  license: "MIT"
+  homepage: "https://github.com/github/spec-kit"
+
+requires:
+  speckit_version: ">=0.7.2"
+
+provides:
+  commands:
+    - name: "speckit.cost-tracker.record"
+      file: "commands/speckit.cost-tracker.record.md"
+      description: "Update the Actual LLM Spend field in the current spec with the reported spend amount"
+
+    - name: "speckit.cost-tracker.report"
+      file: "commands/speckit.cost-tracker.report.md"
+      description: "Show a budget summary table across all specs in the project"
+
+  config:
+    - name: "cost-tracker-config.yml"
+      template: "config-template.yml"
+      description: "Cost tracker configuration"
+      required: false
+
+hooks:
+  after_implement:
+    command: "speckit.cost-tracker.record"
+    optional: true
+    prompt: "Record LLM spend for this implementation step?"
+    description: "Prompts for actual spend and writes it to the spec's Cost Allocation section"
+
+tags:
+  - "finops"
+  - "cost"
+  - "llm"
+  - "budget"
+
+cost_profile:
+  llm_calls_per_invocation: "none"
+  estimated_tokens_per_call: 0
+  supports_caching: false
+  recommended_model_tier: "haiku"
+
+defaults:
+  currency: "USD"
+  warn_at_pct: 80
@@ -86,6 +86,14 @@ tags:
   - "template"
   # ADD MORE: "category", "tool-name", etc.
 
+# CUSTOMIZE: Declare the cost characteristics of this extension (optional)
+# llm_calls_per_invocation: none=0 calls, low=1-3, medium=4-10, high=>10
+cost_profile:
+  llm_calls_per_invocation: "none"   # none | low | medium | high
+  estimated_tokens_per_call: 0       # average tokens per LLM call
+  supports_caching: false            # true if prompt caching reduces cost
+  recommended_model_tier: "haiku"    # haiku | sonnet | opus
+
 # CUSTOMIZE: Default configuration values (optional)
 # These are merged with user config
 defaults:

@@ -43,3 +43,27 @@ specify preset remove lean
 ## License
 
 MIT
+
+## Cost Profile
+
+Lean uses approximately 67% fewer tokens per spec/plan/tasks cycle than the
+standard templates. The reduced prompt size also makes haiku the appropriate
+model tier for all lean commands.
+
+| Command | Lean | Standard | Reduction |
+|---------|------|----------|-----------|
+| `speckit.specify` | ~800 tokens | ~2,400 tokens | ~67% |
+| `speckit.plan` | ~1,200 tokens | ~3,600 tokens | ~67% |
+| `speckit.tasks` | ~600 tokens | ~1,800 tokens | ~67% |
+| `speckit.implement` | ~500 tokens | ~500 tokens | ~0% |
+| `speckit.constitution` | ~400 tokens | ~1,200 tokens | ~67% |
+
+*Token estimates are approximate and vary with input size.*
+
+### When to Use Lean by Priority
+
+| Priority | Recommendation |
+|----------|---------------|
+| **P1** | Use standard. P1 features justify full artifacts and review gates. |
+| **P2** | Lean is suitable. Reduced boilerplate still produces reviewable artifacts. |
+| **P3** | Lean is preferred. Minimal investment for exploratory or low-risk changes. |
@@ -4091,6 +4091,17 @@ def extension_list(
             console.print(f"     [dim]{ext['id']}[/dim]")
             console.print(f"     {ext['description']}")
             console.print(f"     Commands: {ext['command_count']} | Hooks: {ext['hook_count']} | Priority: {ext['priority']} | Status: {'Enabled' if ext['enabled'] else 'Disabled'}")
+            cp = ext.get("cost_profile", {})
+            if cp:
+                tier = cp.get("recommended_model_tier", "haiku")
+                calls = cp.get("llm_calls_per_invocation", "none")
+                tokens = cp.get("estimated_tokens_per_call", 0)
+                caching = cp.get("supports_caching", False)
+                caching_str = "[green]yes[/green]" if caching else "no"
+                console.print(
+                    f"     [dim]Cost: tier=[cyan]{tier}[/cyan]  calls={calls}  "
+                    f"tokens/call~{tokens}  caching={caching_str}[/dim]"
+                )
             console.print()
 
     if available or all_extensions:

@@ -357,6 +357,11 @@ def hooks(self) -> Dict[str, Any]:
         """Get hook definitions."""
         return self.data.get("hooks", {})
 
+    @property
+    def cost_profile(self) -> Dict[str, Any]:
+        """Get cost profile metadata (optional — returns empty dict if absent)."""
+        return self.data.get("cost_profile", {})
+
     def get_hash(self) -> str:
         """Calculate SHA256 hash of manifest file."""
         with open(self.path, 'rb') as f:
@@ -1519,7 +1524,8 @@ def list_installed(self) -> List[Dict[str, Any]]:
                     "priority": normalize_priority(metadata.get("priority")),
                     "installed_at": metadata.get("installed_at"),
                     "command_count": len(manifest.commands),
-                    "hook_count": len(manifest.hooks)
+                    "hook_count": len(manifest.hooks),
+                    "cost_profile": manifest.cost_profile,
                 })
             except ValidationError:
                 # Corrupted extension

@@ -42,6 +42,7 @@ def get_step_type(type_key: str) -> StepBase | None:
 
 def _register_builtin_steps() -> None:
     """Register all built-in step types."""
+    from .steps.budget_gate import BudgetGateStep
     from .steps.command import CommandStep
     from .steps.do_while import DoWhileStep
     from .steps.fan_in import FanInStep
@@ -53,6 +54,7 @@ def _register_builtin_steps() -> None:
     from .steps.switch import SwitchStep
     from .steps.while_loop import WhileStep
 
+    _register_step(BudgetGateStep())
     _register_step(CommandStep())
     _register_step(DoWhileStep())
     _register_step(FanInStep())