diff --git a/.github/workflows/rpgkit-lint.yml b/.github/workflows/rpgkit-lint.yml new file mode 100644 index 0000000..7a0d279 --- /dev/null +++ b/.github/workflows/rpgkit-lint.yml @@ -0,0 +1,33 @@ +name: RPG-Kit Lint + +permissions: + contents: read + +on: + push: + branches: [main] + paths: + - "RPG-Kit/**" + - ".github/workflows/rpgkit-*.yml" + - ".github/workflows/scripts/rpgkit/**" + - ".markdownlint-cli2.jsonc" + pull_request: + paths: + - "RPG-Kit/**" + - ".github/workflows/rpgkit-*.yml" + - ".github/workflows/scripts/rpgkit/**" + - ".markdownlint-cli2.jsonc" + workflow_dispatch: + +jobs: + markdownlint: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Run markdownlint-cli2 + uses: DavidAnson/markdownlint-cli2-action@v19 + with: + globs: | + RPG-Kit/**/*.md diff --git a/.github/workflows/rpgkit-pre-release.yml b/.github/workflows/rpgkit-pre-release.yml new file mode 100644 index 0000000..6846e28 --- /dev/null +++ b/.github/workflows/rpgkit-pre-release.yml @@ -0,0 +1,54 @@ +name: RPG-Kit Pre-Release + +on: + push: + branches: + - dev + - "dev/**" + paths: + - "RPG-Kit/**" + - ".github/workflows/rpgkit-pre-release.yml" + - ".github/workflows/scripts/rpgkit/**" + workflow_dispatch: + +jobs: + pre-release: + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + env: + PROJECT_DIR: RPG-Kit + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Prepare scripts + run: chmod +x .github/workflows/scripts/rpgkit/*.sh + + - name: Get RPG-Kit pre-release version + id: get_tag + run: .github/workflows/scripts/rpgkit/get-next-pre-version.sh "${{ github.run_number }}" + + - name: Check if release already exists + id: check_release + run: .github/workflows/scripts/rpgkit/check-release-exists.sh "${{ steps.get_tag.outputs.tag_name }}" + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Create release package variants + if: steps.check_release.outputs.exists == 'false' + run: .github/workflows/scripts/rpgkit/create-release-packages.sh "${{ steps.get_tag.outputs.new_version }}" + + - name: Generate release notes + if: steps.check_release.outputs.exists == 'false' + run: .github/workflows/scripts/rpgkit/generate-release-notes.sh "${{ steps.get_tag.outputs.new_version }}" "${{ steps.get_tag.outputs.latest_tag }}" pre + + - name: Create GitHub pre-release + if: steps.check_release.outputs.exists == 'false' + run: .github/workflows/scripts/rpgkit/create-github-release.sh "${{ steps.get_tag.outputs.new_version }}" "${{ steps.get_tag.outputs.tag_name }}" pre + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/rpgkit-release.yml b/.github/workflows/rpgkit-release.yml new file mode 100644 index 0000000..82e5d76 --- /dev/null +++ b/.github/workflows/rpgkit-release.yml @@ -0,0 +1,56 @@ +name: RPG-Kit Release + +on: + push: + branches: [main] + paths: + - "RPG-Kit/**" + - ".github/workflows/rpgkit-release.yml" + - ".github/workflows/scripts/rpgkit/**" + workflow_dispatch: + +jobs: + release: + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + env: + PROJECT_DIR: RPG-Kit + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Prepare scripts + run: chmod +x .github/workflows/scripts/rpgkit/*.sh + + - name: Get next RPG-Kit version + id: get_tag + run: .github/workflows/scripts/rpgkit/get-next-version.sh + + - name: Check if release already exists + id: check_release + run: .github/workflows/scripts/rpgkit/check-release-exists.sh "${{ steps.get_tag.outputs.tag_name }}" + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Update version in pyproject.toml + if: steps.check_release.outputs.exists == 'false' + run: .github/workflows/scripts/rpgkit/update-version.sh "${{ steps.get_tag.outputs.new_version }}" + + - name: Create release package variants + if: steps.check_release.outputs.exists == 'false' + run: .github/workflows/scripts/rpgkit/create-release-packages.sh "${{ steps.get_tag.outputs.new_version }}" + + - name: Generate release notes + if: steps.check_release.outputs.exists == 'false' + run: .github/workflows/scripts/rpgkit/generate-release-notes.sh "${{ steps.get_tag.outputs.new_version }}" "${{ steps.get_tag.outputs.latest_tag }}" stable + + - name: Create GitHub release + if: steps.check_release.outputs.exists == 'false' + run: .github/workflows/scripts/rpgkit/create-github-release.sh "${{ steps.get_tag.outputs.new_version }}" "${{ steps.get_tag.outputs.tag_name }}" stable + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/scripts/rpgkit/check-release-exists.sh b/.github/workflows/scripts/rpgkit/check-release-exists.sh new file mode 100755 index 0000000..28fadc9 --- /dev/null +++ b/.github/workflows/scripts/rpgkit/check-release-exists.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ $# -ne 1 ]]; then + echo "Usage: $0 " >&2 + exit 1 +fi + +TAG_NAME="$1" + +write_output() { + [[ -n "${GITHUB_OUTPUT:-}" ]] && echo "$1" >> "$GITHUB_OUTPUT" +} + +if gh release view "$TAG_NAME" >/dev/null 2>&1; then + write_output "exists=true" + echo "Release $TAG_NAME already exists, skipping..." +else + write_output "exists=false" + echo "Release $TAG_NAME does not exist, proceeding..." +fi diff --git a/.github/workflows/scripts/rpgkit/create-github-release.sh b/.github/workflows/scripts/rpgkit/create-github-release.sh new file mode 100755 index 0000000..7fc3cd5 --- /dev/null +++ b/.github/workflows/scripts/rpgkit/create-github-release.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ $# -lt 2 || $# -gt 3 ]]; then + echo "Usage: $0 [stable|pre]" >&2 + exit 1 +fi + +VERSION="$1" +TAG_NAME="$2" +RELEASE_KIND="${3:-stable}" +VERSION_NO_V="${VERSION#v}" +REPO_ROOT="${GITHUB_WORKSPACE:-$(git rev-parse --show-toplevel)}" +PROJECT_DIR="${PROJECT_DIR:-RPG-Kit}" +PROJECT_ROOT="$REPO_ROOT/$PROJECT_DIR" +GENRELEASES_DIR="$PROJECT_ROOT/.genreleases" +NOTES_FILE="${NOTES_FILE:-$REPO_ROOT/release_notes.md}" + +mapfile -t ASSETS < <(find "$GENRELEASES_DIR" -maxdepth 1 -type f -name "rpgkit-template-*-${VERSION}.zip" | sort) +if [[ ${#ASSETS[@]} -eq 0 ]]; then + echo "No release assets found in $GENRELEASES_DIR for $VERSION" >&2 + exit 1 +fi + +PRERELEASE_ARG=() +if [[ "$RELEASE_KIND" == "pre" ]]; then + PRERELEASE_ARG=(--prerelease) +fi + +TARGET_ARG=() +if [[ -n "${GITHUB_SHA:-}" ]]; then + TARGET_ARG=(--target "$GITHUB_SHA") +fi + +gh release create "$TAG_NAME" \ + "${ASSETS[@]}" \ + --title "RPG-Kit Templates - $VERSION_NO_V" \ + --notes-file "$NOTES_FILE" \ + "${PRERELEASE_ARG[@]}" \ + "${TARGET_ARG[@]}" diff --git a/.github/workflows/scripts/rpgkit/create-release-packages.ps1 b/.github/workflows/scripts/rpgkit/create-release-packages.ps1 new file mode 100755 index 0000000..48f8918 --- /dev/null +++ b/.github/workflows/scripts/rpgkit/create-release-packages.ps1 @@ -0,0 +1,411 @@ +#!/usr/bin/env pwsh +#requires -Version 7.0 + +<# +.SYNOPSIS + Build Spec Kit template release archives for each supported AI assistant and script type. + +.DESCRIPTION + create-release-packages.ps1 (workflow-local) + Build Spec Kit template release archives for each supported AI assistant and script type. + +.PARAMETER Version + Version string with leading 'v' (e.g., v0.2.0) + +.PARAMETER Agents + Comma or space separated subset of agents to build (default: all) + Valid agents: copilot, claude, gemini, cursor-agent, qwen, opencode, auggie, codex, codebuddy, qoder, amp + +.PARAMETER Scripts + Comma or space separated subset of script types to build (default: both) + Valid scripts: sh, ps + +.EXAMPLE + .\create-release-packages.ps1 -Version v0.2.0 + +.EXAMPLE + .\create-release-packages.ps1 -Version v0.2.0 -Agents claude,copilot -Scripts sh + +.EXAMPLE + .\create-release-packages.ps1 -Version v0.2.0 -Agents claude -Scripts ps +#> + +param( + [Parameter(Mandatory=$true, Position=0)] + [string]$Version, + + [Parameter(Mandatory=$false)] + [string]$Agents = "", + + [Parameter(Mandatory=$false)] + [string]$Scripts = "" +) + +$ErrorActionPreference = "Stop" + +if ($Version -notmatch '^v\d+\.\d+\.\d+(-.+)?$') { + Write-Error "Version must look like v0.0.0 or v0.0.0-dev.1" + exit 1 +} + +$RepoRoot = if ($env:GITHUB_WORKSPACE) { $env:GITHUB_WORKSPACE } else { (git rev-parse --show-toplevel).Trim() } +$ProjectDir = if ($env:PROJECT_DIR) { $env:PROJECT_DIR } else { "RPG-Kit" } +$ProjectRoot = Join-Path $RepoRoot $ProjectDir +if (-not (Test-Path $ProjectRoot)) { + Write-Error "RPG-Kit project directory not found: $ProjectRoot" + exit 1 +} +Set-Location $ProjectRoot + +Write-Host "Building release packages for $Version from $ProjectRoot" + +$GenReleasesDir = Join-Path $ProjectRoot ".genreleases" +if (Test-Path $GenReleasesDir) { + Remove-Item -Path $GenReleasesDir -Recurse -Force -ErrorAction SilentlyContinue +} +New-Item -ItemType Directory -Path $GenReleasesDir -Force | Out-Null + +function Rewrite-Paths { + param([string]$Content) + + $Content = $Content -replace '(/?)\bmemory/', '.rpgkit/memory/' + $Content = $Content -replace '(/?)\bscripts/', '.rpgkit/scripts/' + $Content = $Content -replace '(/?)\btemplates/', '.rpgkit/templates/' + $Content = $Content -replace '(/?)\butils/', '.rpgkit/utils/' + return $Content +} + +function Generate-Commands { + param( + [string]$Extension, + [string]$OutputDir + ) + + New-Item -ItemType Directory -Path $OutputDir -Force | Out-Null + + $templates = Get-ChildItem -Path "templates/commands/*.md" -File -ErrorAction SilentlyContinue + + foreach ($template in $templates) { + $name = [System.IO.Path]::GetFileNameWithoutExtension($template.Name) + + # Read file content and normalize line endings + $body = (Get-Content -Path $template.FullName -Raw) -replace "`r`n", "`n" + + # Extract description from YAML frontmatter (for toml format) + $description = "" + if ($body -match '(?m)^description:\s*(.+)$') { + $description = $matches[1] + } + + # Rewrite paths for .rpgkit structure + $body = Rewrite-Paths -Content $body + + # Generate output file based on extension + $outputFile = Join-Path $OutputDir "rpgkit.$name.$Extension" + + switch ($Extension) { + 'toml' { + $body = $body -replace '\\', '\\' + $output = "description = `"$description`"`n`nprompt = `"`"`"`n$body`n`"`"`"" + Set-Content -Path $outputFile -Value $output -NoNewline + } + 'md' { + Set-Content -Path $outputFile -Value $body -NoNewline + } + 'agent.md' { + Set-Content -Path $outputFile -Value $body -NoNewline + } + } + } +} + +function Generate-CopilotPrompts { + param( + [string]$AgentsDir, + [string]$PromptsDir + ) + + New-Item -ItemType Directory -Path $PromptsDir -Force | Out-Null + + $agentFiles = Get-ChildItem -Path "$AgentsDir/rpgkit.*.agent.md" -File -ErrorAction SilentlyContinue + + foreach ($agentFile in $agentFiles) { + $basename = $agentFile.Name -replace '\.agent\.md$', '' + $promptFile = Join-Path $PromptsDir "$basename.prompt.md" + + $content = @" +--- +agent: $basename +--- +"@ + Set-Content -Path $promptFile -Value $content + } +} + +function Build-Variant { + param( + [string]$Agent, + [string]$Script + ) + + $baseDir = Join-Path $GenReleasesDir "sdd-${Agent}-package-${Script}" + Write-Host "Building $Agent ($Script) package..." + New-Item -ItemType Directory -Path $baseDir -Force | Out-Null + + # Copy base structure but filter scripts by variant + $specDir = Join-Path $baseDir ".rpgkit" + New-Item -ItemType Directory -Path $specDir -Force | Out-Null + + # Copy memory directory + if (Test-Path "memory") { + Copy-Item -Path "memory" -Destination $specDir -Recurse -Force + Write-Host "Copied memory -> .rpgkit" + } + + # Only copy the relevant script variant directory + if (Test-Path "scripts") { + $scriptsDestDir = Join-Path $specDir "scripts" + New-Item -ItemType Directory -Path $scriptsDestDir -Force | Out-Null + + switch ($Script) { + 'sh' { + if (Test-Path "scripts/bash") { + Copy-Item -Path "scripts/bash" -Destination $scriptsDestDir -Recurse -Force + Write-Host "Copied scripts/bash -> .rpgkit/scripts" + } + } + 'ps' { + if (Test-Path "scripts/powershell") { + Copy-Item -Path "scripts/powershell" -Destination $scriptsDestDir -Recurse -Force + Write-Host "Copied scripts/powershell -> .rpgkit/scripts" + } + } + } + + # Copy any script files that aren't in variant-specific directories + Get-ChildItem -Path "scripts" -File -ErrorAction SilentlyContinue | ForEach-Object { + Copy-Item -Path $_.FullName -Destination $scriptsDestDir -Force + } + + # Copy all subdirectories under scripts + Get-ChildItem -Path "scripts" -Directory -ErrorAction SilentlyContinue | ForEach-Object { + Copy-Item -Path $_.FullName -Destination $scriptsDestDir -Recurse -Force + } + } + + # Copy templates (excluding commands directory and vscode-settings.json) + if (Test-Path "templates") { + $templatesDestDir = Join-Path $specDir "templates" + New-Item -ItemType Directory -Path $templatesDestDir -Force | Out-Null + + Get-ChildItem -Path "templates" -Recurse -File | Where-Object { + $_.FullName -notmatch 'templates[/\\]commands[/\\]' -and $_.Name -ne 'vscode-settings.json' + } | ForEach-Object { + $relativePath = $_.FullName.Substring((Resolve-Path "templates").Path.Length + 1) + $destFile = Join-Path $templatesDestDir $relativePath + $destFileDir = Split-Path $destFile -Parent + New-Item -ItemType Directory -Path $destFileDir -Force | Out-Null + Copy-Item -Path $_.FullName -Destination $destFile -Force + } + Write-Host "Copied templates -> .rpgkit/templates" + } + + # Copy utils directory + if (Test-Path "utils") { + Copy-Item -Path "utils" -Destination $specDir -Recurse -Force + Write-Host "Copied utils -> .rpgkit/utils" + } + + # Replace placeholder in copied scripts with the actual CLI command name + if (Test-Path (Join-Path $specDir "scripts")) { + $agentName = "" + switch ($Agent) { + 'copilot' { $agentName = "copilot" } + 'claude' { $agentName = "claude" } + 'gemini' { $agentName = "gemini -p" } + 'qwen' { $agentName = "qwen -p" } + 'cursor-agent' { $agentName = "agent -p" } + 'auggie' { $agentName = "augment -p" } + 'codex' { $agentName = "codex exec" } + 'codebuddy' { $agentName = "codebuddy -p" } + 'qoder' { $agentName = "qodercli -p" } + 'opencode' { $agentName = "opencode run" } + 'amp' { $agentName = "amp --execute" } + default { $agentName = "" } + } + + # Only perform replacement if agentName is set + if (-not [string]::IsNullOrEmpty($agentName)) { + $scriptsPath = Join-Path $specDir "scripts" + Get-ChildItem -Path $scriptsPath -File -Recurse -ErrorAction SilentlyContinue | ForEach-Object { + $content = Get-Content -Path $_.FullName -Raw -ErrorAction SilentlyContinue + if ($null -ne $content) { + $newContent = $content -replace '', $agentName + if ($content -ne $newContent) { + Set-Content -Path $_.FullName -Value $newContent -NoNewline + } + } + } + Write-Host "Replaced with '$agentName' in scripts" + } else { + Write-Host "Skipped replacement (no CLI command for $Agent)" + } + } + + # Generate agent-specific command files + switch ($Agent) { + 'claude' { + $cmdDir = Join-Path $baseDir ".claude/commands" + Generate-Commands -Extension 'md' -OutputDir $cmdDir + $settingsContent = @' +{ + "permissions": { + "allow": [ + "Write", + "Edit", + "Read", + "Glob", + "Grep", + "Bash", + "WebFetch" + ], + "deny": [ + "WebSearch" + ] + } +} +'@ + Set-Content -Path (Join-Path $baseDir ".claude/settings.json") -Value $settingsContent -NoNewline + } + 'gemini' { + $cmdDir = Join-Path $baseDir ".gemini/commands" + Generate-Commands -Extension 'toml' -OutputDir $cmdDir + if (Test-Path "agent_templates/gemini/GEMINI.md") { + Copy-Item -Path "agent_templates/gemini/GEMINI.md" -Destination (Join-Path $baseDir "GEMINI.md") + } + } + 'copilot' { + $agentsDir = Join-Path $baseDir ".github/agents" + Generate-Commands -Extension 'agent.md' -OutputDir $agentsDir + + # Generate companion prompt files + $promptsDir = Join-Path $baseDir ".github/prompts" + Generate-CopilotPrompts -AgentsDir $agentsDir -PromptsDir $promptsDir + + # Create VS Code workspace settings + $vscodeDir = Join-Path $baseDir ".vscode" + New-Item -ItemType Directory -Path $vscodeDir -Force | Out-Null + if (Test-Path "templates/vscode-settings.json") { + Copy-Item -Path "templates/vscode-settings.json" -Destination (Join-Path $vscodeDir "settings.json") + } + } + 'cursor-agent' { + $cmdDir = Join-Path $baseDir ".cursor/commands" + Generate-Commands -Extension 'md' -OutputDir $cmdDir + } + 'qwen' { + $cmdDir = Join-Path $baseDir ".qwen/commands" + Generate-Commands -Extension 'toml' -OutputDir $cmdDir + if (Test-Path "agent_templates/qwen/QWEN.md") { + Copy-Item -Path "agent_templates/qwen/QWEN.md" -Destination (Join-Path $baseDir "QWEN.md") + } + } + 'auggie' { + $cmdDir = Join-Path $baseDir ".augment/commands" + Generate-Commands -Extension 'md' -OutputDir $cmdDir + } + 'codex' { + $cmdDir = Join-Path $baseDir ".codex/prompts" + Generate-Commands -Extension 'md' -OutputDir $cmdDir + } + 'codebuddy' { + $cmdDir = Join-Path $baseDir ".codebuddy/commands" + Generate-Commands -Extension 'md' -OutputDir $cmdDir + } + 'qoder' { + $cmdDir = Join-Path $baseDir ".qoder/commands" + Generate-Commands -Extension 'md' -OutputDir $cmdDir + } + 'opencode' { + $cmdDir = Join-Path $baseDir ".opencode/command" + Generate-Commands -Extension 'md' -OutputDir $cmdDir + } + 'amp' { + $cmdDir = Join-Path $baseDir ".agents/commands" + Generate-Commands -Extension 'md' -OutputDir $cmdDir + } + } + + # Create zip archive + $zipFile = Join-Path $GenReleasesDir "rpgkit-template-${Agent}-${Script}-${Version}.zip" + Compress-Archive -Path "$baseDir/*" -DestinationPath $zipFile -Force + Write-Host "Created $zipFile" +} + +# Define all agents and scripts +$AllAgents = @('copilot', 'claude', 'gemini', 'cursor-agent', 'qwen', 'opencode', 'auggie', 'codex', 'codebuddy', 'qoder', 'amp') +$AllScripts = @('sh', 'ps') + +function Normalize-List { + param([string]$Input) + + if ([string]::IsNullOrEmpty($Input)) { + return @() + } + + # Split by comma or space and remove duplicates while preserving order + $items = $Input -split '[,\s]+' | Where-Object { $_ } | Select-Object -Unique + return $items +} + +function Validate-Subset { + param( + [string]$Type, + [string[]]$Allowed, + [string[]]$Items + ) + + $ok = $true + foreach ($item in $Items) { + if ($item -notin $Allowed) { + Write-Error "Unknown $Type '$item' (allowed: $($Allowed -join ', '))" + $ok = $false + } + } + return $ok +} + +# Determine agent list +if (-not [string]::IsNullOrEmpty($Agents)) { + $AgentList = Normalize-List -Input $Agents + if (-not (Validate-Subset -Type 'agent' -Allowed $AllAgents -Items $AgentList)) { + exit 1 + } +} else { + $AgentList = $AllAgents +} + +# Determine script list +if (-not [string]::IsNullOrEmpty($Scripts)) { + $ScriptList = Normalize-List -Input $Scripts + if (-not (Validate-Subset -Type 'script' -Allowed $AllScripts -Items $ScriptList)) { + exit 1 + } +} else { + $ScriptList = $AllScripts +} + +Write-Host "Agents: $($AgentList -join ', ')" +Write-Host "Scripts: $($ScriptList -join ', ')" + +# Build all variants +foreach ($agent in $AgentList) { + foreach ($script in $ScriptList) { + Build-Variant -Agent $agent -Script $script + } +} + +Write-Host "`nArchives in ${GenReleasesDir}:" +Get-ChildItem -Path $GenReleasesDir -Filter "rpgkit-template-*-${Version}.zip" | ForEach-Object { + Write-Host " $($_.Name)" +} \ No newline at end of file diff --git a/.github/workflows/scripts/rpgkit/create-release-packages.sh b/.github/workflows/scripts/rpgkit/create-release-packages.sh new file mode 100755 index 0000000..9891028 --- /dev/null +++ b/.github/workflows/scripts/rpgkit/create-release-packages.sh @@ -0,0 +1,297 @@ +#!/usr/bin/env bash +set -euo pipefail + +# create-release-packages.sh (workflow-local) +# Build Spec Kit template release archives for each supported AI assistant and script type. +# Usage: .github/workflows/scripts/create-release-packages.sh +# Version argument should include leading 'v'. +# Optionally set AGENTS and/or SCRIPTS env vars to limit what gets built. +# AGENTS : space or comma separated subset of: copilot claude gemini cursor-agent qwen opencode auggie codex codebuddy qoder amp (default: all) +# SCRIPTS : space or comma separated subset of: sh ps (default: both) +# Examples: +# AGENTS=claude SCRIPTS=sh $0 v0.2.0 +# AGENTS="copilot,gemini" $0 v0.2.0 +# SCRIPTS=ps $0 v0.2.0 + +if [[ $# -ne 1 ]]; then + echo "Usage: $0 " >&2 + exit 1 +fi +NEW_VERSION="$1" +if [[ ! $NEW_VERSION =~ ^v[0-9]+\.[0-9]+\.[0-9]+(-.+)?$ ]]; then + echo "Version must look like v0.0.0 or v0.0.0-dev.1" >&2 + exit 1 +fi + +REPO_ROOT="${GITHUB_WORKSPACE:-$(git rev-parse --show-toplevel)}" +PROJECT_DIR="${PROJECT_DIR:-RPG-Kit}" +PROJECT_ROOT="$REPO_ROOT/$PROJECT_DIR" +GENRELEASES_DIR="$PROJECT_ROOT/.genreleases" + +if [[ ! -d "$PROJECT_ROOT" ]]; then + echo "RPG-Kit project directory not found: $PROJECT_ROOT" >&2 + exit 1 +fi + +cd "$PROJECT_ROOT" +echo "Building release packages for $NEW_VERSION from $PROJECT_ROOT" + +mkdir -p "$GENRELEASES_DIR" +rm -rf "$GENRELEASES_DIR"/* || true + +generate_commands() { + local ext=$1 output_dir=$2 + mkdir -p "$output_dir" + for template in templates/commands/*.md; do + [[ -f "$template" ]] || continue + local name description body + name=$(basename "$template" .md) + + # Normalize line endings + body=$(tr -d '\r' < "$template") + + # Extract description from YAML frontmatter (for toml format) + description=$(awk '/^description:/ {sub(/^description:[[:space:]]*/, ""); print; exit}' <<< "$body") + + case $ext in + toml) + body=$(sed 's/\\/\\\\/g' <<< "$body") + { echo "description = \"$description\""; echo; echo "prompt = \"\"\""; echo "$body"; echo "\"\"\""; } > "$output_dir/rpgkit.$name.$ext" ;; + md) + echo "$body" > "$output_dir/rpgkit.$name.$ext" ;; + agent.md) + echo "$body" > "$output_dir/rpgkit.$name.$ext" ;; + esac + done +} + +generate_copilot_prompts() { + local agents_dir=$1 prompts_dir=$2 + mkdir -p "$prompts_dir" + + # Generate a .prompt.md file for each .agent.md file + for agent_file in "$agents_dir"/rpgkit.*.agent.md; do + [[ -f "$agent_file" ]] || continue + + local basename=$(basename "$agent_file" .agent.md) + local prompt_file="$prompts_dir/${basename}.prompt.md" + + # Create prompt file with agent frontmatter + cat > "$prompt_file" < .rpgkit"; } + + # Only copy the relevant script variant directory + if [[ -d scripts ]]; then + mkdir -p "$SPEC_DIR/scripts" + case $script in + sh) + [[ -d scripts/bash ]] && { cp -r scripts/bash "$SPEC_DIR/scripts/"; echo "Copied scripts/bash -> .rpgkit/scripts"; } + # Copy any script files that aren't in variant-specific directories + find scripts -maxdepth 1 -type f -exec cp {} "$SPEC_DIR/scripts/" \; 2>/dev/null || true + ;; + ps) + [[ -d scripts/powershell ]] && { cp -r scripts/powershell "$SPEC_DIR/scripts/"; echo "Copied scripts/powershell -> .rpgkit/scripts"; } + # Copy any script files that aren't in variant-specific directories + find scripts -maxdepth 1 -type f -exec cp {} "$SPEC_DIR/scripts/" \; 2>/dev/null || true + ;; + esac + # Copy all subdirectories under scripts + find scripts -mindepth 1 -maxdepth 1 -type d -exec cp -r {} "$SPEC_DIR/scripts/" \; 2>/dev/null || true + fi + + # Replace placeholder in copied scripts with the actual CLI command name + if [[ -d "$SPEC_DIR/scripts" ]]; then + local agent_name="" + case $agent in + copilot) + agent_name="copilot" + ;; + claude) + agent_name="claude" ;; + gemini) + agent_name="gemini -p" ;; + qwen) + agent_name="qwen -p" ;; + cursor-agent) + agent_name="agent -p" ;; + auggie) + agent_name="augment -p" ;; + codex) + agent_name="codex exec" ;; + codebuddy) + agent_name="codebuddy -p" ;; + qoder) + agent_name="qodercli -p" ;; + opencode) + agent_name="opencode run" ;; + amp) + agent_name="amp --execute" ;; + *) + agent_name="" ;; + esac + + # Only perform replacement if agent_name is set + if [[ -n "$agent_name" ]]; then + find "$SPEC_DIR/scripts" -type f -exec sed -i "s||${agent_name}|g" {} + 2>/dev/null || true + echo "Replaced with '$agent_name' in scripts" + else + echo "Skipped replacement (no CLI command for $agent)" + fi + fi + + [[ -d templates ]] && { mkdir -p "$SPEC_DIR/templates"; find templates -type f -not -path "templates/commands/*" -not -name "vscode-settings.json" -exec cp --parents {} "$SPEC_DIR"/ \; ; echo "Copied templates -> .rpgkit/templates"; } + + [[ -d utils ]] && { cp -r utils "$SPEC_DIR/"; echo "Copied utils -> .rpgkit/utils"; } + + case $agent in + claude) + mkdir -p "$base_dir/.claude/commands" + generate_commands md "$base_dir/.claude/commands" + cat > "$base_dir/.claude/settings.json" <<'SETTINGS' +{ + "permissions": { + "allow": [ + "Write", + "Edit", + "Read", + "Glob", + "Grep", + "Bash", + "WebFetch", + "mcp__rpg-tools" + ], + "deny": [ + "WebSearch" + ] + } +} +SETTINGS + ;; + gemini) + mkdir -p "$base_dir/.gemini/commands" + generate_commands toml "$base_dir/.gemini/commands" + [[ -f agent_templates/gemini/GEMINI.md ]] && cp agent_templates/gemini/GEMINI.md "$base_dir/GEMINI.md" ;; + copilot) + mkdir -p "$base_dir/.github/agents" + generate_commands agent.md "$base_dir/.github/agents" + # Generate companion prompt files + generate_copilot_prompts "$base_dir/.github/agents" "$base_dir/.github/prompts" + # Create VS Code workspace settings + mkdir -p "$base_dir/.vscode" + [[ -f templates/vscode-settings.json ]] && cp templates/vscode-settings.json "$base_dir/.vscode/settings.json" + ;; + cursor-agent) + mkdir -p "$base_dir/.cursor/commands" + generate_commands md "$base_dir/.cursor/commands" ;; + qwen) + mkdir -p "$base_dir/.qwen/commands" + generate_commands toml "$base_dir/.qwen/commands" + [[ -f agent_templates/qwen/QWEN.md ]] && cp agent_templates/qwen/QWEN.md "$base_dir/QWEN.md" ;; + auggie) + mkdir -p "$base_dir/.augment/commands" + generate_commands md "$base_dir/.augment/commands" ;; + codex) + mkdir -p "$base_dir/.codex/prompts" + generate_commands md "$base_dir/.codex/prompts" ;; + codebuddy) + mkdir -p "$base_dir/.codebuddy/commands" + generate_commands md "$base_dir/.codebuddy/commands" ;; + qoder) + mkdir -p "$base_dir/.qoder/commands" + generate_commands md "$base_dir/.qoder/commands" ;; + opencode) + mkdir -p "$base_dir/.opencode/command" + generate_commands md "$base_dir/.opencode/command" ;; + amp) + mkdir -p "$base_dir/.agents/commands" + generate_commands md "$base_dir/.agents/commands" ;; + esac + create_archive "$base_dir" "$GENRELEASES_DIR/rpgkit-template-${agent}-${script}-${NEW_VERSION}.zip" + echo "Created $GENRELEASES_DIR/rpgkit-template-${agent}-${script}-${NEW_VERSION}.zip" +} + +# Determine agent list +ALL_AGENTS=(copilot claude gemini cursor-agent qwen opencode auggie codex codebuddy qoder amp) +ALL_SCRIPTS=(sh ps) + +norm_list() { + # convert comma+space separated -> line separated unique while preserving order of first occurrence + tr ',\n' ' ' | awk '{for(i=1;i<=NF;i++){if(!seen[$i]++){printf((out?"\n":"") $i);out=1}}}END{printf("\n")}' +} + +validate_subset() { + local type=$1; shift; local -n allowed=$1; shift; local items=("$@") + local invalid=0 + for it in "${items[@]}"; do + local found=0 + for a in "${allowed[@]}"; do [[ $it == "$a" ]] && { found=1; break; }; done + if [[ $found -eq 0 ]]; then + echo "Error: unknown $type '$it' (allowed: ${allowed[*]})" >&2 + invalid=1 + fi + done + return $invalid +} + +if [[ -n ${AGENTS:-} ]]; then + mapfile -t AGENT_LIST < <(printf '%s' "$AGENTS" | norm_list) + validate_subset agent ALL_AGENTS "${AGENT_LIST[@]}" || exit 1 +else + AGENT_LIST=("${ALL_AGENTS[@]}") +fi + +if [[ -n ${SCRIPTS:-} ]]; then + mapfile -t SCRIPT_LIST < <(printf '%s' "$SCRIPTS" | norm_list) + validate_subset script ALL_SCRIPTS "${SCRIPT_LIST[@]}" || exit 1 +else + SCRIPT_LIST=("${ALL_SCRIPTS[@]}") +fi + +echo "Agents: ${AGENT_LIST[*]}" +echo "Scripts: ${SCRIPT_LIST[*]}" + +for agent in "${AGENT_LIST[@]}"; do + for script in "${SCRIPT_LIST[@]}"; do + build_variant "$agent" "$script" + done +done + +echo "Archives in $GENRELEASES_DIR:" +ls -1 "$GENRELEASES_DIR"/rpgkit-template-*-"${NEW_VERSION}".zip + diff --git a/.github/workflows/scripts/rpgkit/generate-release-notes.sh b/.github/workflows/scripts/rpgkit/generate-release-notes.sh new file mode 100755 index 0000000..0ee3fac --- /dev/null +++ b/.github/workflows/scripts/rpgkit/generate-release-notes.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ $# -lt 2 || $# -gt 3 ]]; then + echo "Usage: $0 [stable|pre]" >&2 + exit 1 +fi + +NEW_VERSION="$1" +LAST_TAG="$2" +RELEASE_KIND="${3:-stable}" +REPO_ROOT="${GITHUB_WORKSPACE:-$(git rev-parse --show-toplevel)}" +PROJECT_DIR="${PROJECT_DIR:-RPG-Kit}" +NOTES_FILE="${NOTES_FILE:-$REPO_ROOT/release_notes.md}" + +if git rev-parse -q --verify "refs/tags/$LAST_TAG" >/dev/null; then + COMMITS=$(git log --oneline --pretty=format:"- %s" "$LAST_TAG"..HEAD -- "$PROJECT_DIR" || true) +else + COMMITS=$(git log --oneline --pretty=format:"- %s" HEAD -- "$PROJECT_DIR" | head -n 10 || true) +fi + +COMMITS="${COMMITS:-No RPG-Kit changes found.}" + +if [[ "$RELEASE_KIND" == "pre" ]]; then + BRANCH="${GITHUB_REF_NAME:-unknown}" + cat > "$NOTES_FILE" << EOF +> **This is a development pre-release from the \`$BRANCH\` branch.** +> It is intended for testing purposes only. For stable releases, use \`rpgkit init\` without \`--pre\`. + +## Changelog (since ${LAST_TAG}) + +$COMMITS +EOF +else + cat > "$NOTES_FILE" << EOF +This is the latest RPG-Kit template release. We recommend using the RPG-Kit CLI to scaffold projects, but the template archives can also be downloaded and managed manually. + +## Changelog (since ${LAST_TAG}) + +$COMMITS +EOF +fi + +echo "Generated release notes at $NOTES_FILE:" +cat "$NOTES_FILE" diff --git a/.github/workflows/scripts/rpgkit/get-next-pre-version.sh b/.github/workflows/scripts/rpgkit/get-next-pre-version.sh new file mode 100755 index 0000000..1dbc296 --- /dev/null +++ b/.github/workflows/scripts/rpgkit/get-next-pre-version.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ $# -ne 1 ]]; then + echo "Usage: $0 " >&2 + exit 1 +fi + +RUN_NUMBER="$1" +TAG_PREFIX="${TAG_PREFIX:-rpgkit-v}" +INITIAL_VERSION="${INITIAL_VERSION:-0.1.0}" + +write_output() { + [[ -n "${GITHUB_OUTPUT:-}" ]] && echo "$1" >> "$GITHUB_OUTPUT" +} + +LATEST_TAG=$(git tag -l "${TAG_PREFIX}[0-9]*.[0-9]*.[0-9]*" --sort=-v:refname \ + | grep -E "^${TAG_PREFIX}[0-9]+\.[0-9]+\.[0-9]+$" \ + | head -n1 || true) + +if [[ -z "$LATEST_TAG" ]]; then + LATEST_TAG="${TAG_PREFIX}0.0.0" + BASE_VERSION="$INITIAL_VERSION" +else + BASE_VERSION="${LATEST_TAG#${TAG_PREFIX}}" +fi +write_output "latest_tag=$LATEST_TAG" + +NEW_VERSION="v${BASE_VERSION}-dev.${RUN_NUMBER}" +TAG_NAME="${TAG_PREFIX}${NEW_VERSION#v}" + +write_output "new_version=$NEW_VERSION" +write_output "tag_name=$TAG_NAME" +echo "Latest stable RPG-Kit tag: $LATEST_TAG" +echo "Pre-release version will be: $NEW_VERSION" +echo "Pre-release tag will be: $TAG_NAME" diff --git a/.github/workflows/scripts/rpgkit/get-next-version.sh b/.github/workflows/scripts/rpgkit/get-next-version.sh new file mode 100755 index 0000000..d6687eb --- /dev/null +++ b/.github/workflows/scripts/rpgkit/get-next-version.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +set -euo pipefail + +TAG_PREFIX="${TAG_PREFIX:-rpgkit-v}" + +write_output() { + [[ -n "${GITHUB_OUTPUT:-}" ]] && echo "$1" >> "$GITHUB_OUTPUT" +} + +INITIAL_VERSION="${INITIAL_VERSION:-0.1.0}" + +LATEST_TAG=$(git tag -l "${TAG_PREFIX}[0-9]*.[0-9]*.[0-9]*" --sort=-v:refname \ + | grep -E "^${TAG_PREFIX}[0-9]+\.[0-9]+\.[0-9]+$" \ + | head -n1 || true) + +if [[ -z "$LATEST_TAG" ]]; then + LATEST_TAG="${TAG_PREFIX}0.0.0" + NEW_VERSION="v$INITIAL_VERSION" +else + VERSION="${LATEST_TAG#${TAG_PREFIX}}" + IFS='.' read -ra VERSION_PARTS <<< "$VERSION" + MAJOR=${VERSION_PARTS[0]:-0} + MINOR=${VERSION_PARTS[1]:-0} + PATCH=${VERSION_PARTS[2]:-0} + + PATCH=$((PATCH + 1)) + NEW_VERSION="v$MAJOR.$MINOR.$PATCH" +fi + +write_output "latest_tag=$LATEST_TAG" +TAG_NAME="${TAG_PREFIX}${NEW_VERSION#v}" + +write_output "new_version=$NEW_VERSION" +write_output "tag_name=$TAG_NAME" +echo "Latest RPG-Kit tag: $LATEST_TAG" +echo "New version will be: $NEW_VERSION" +echo "Release tag will be: $TAG_NAME" diff --git a/.github/workflows/scripts/rpgkit/update-version.sh b/.github/workflows/scripts/rpgkit/update-version.sh new file mode 100755 index 0000000..128bae1 --- /dev/null +++ b/.github/workflows/scripts/rpgkit/update-version.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ $# -ne 1 ]]; then + echo "Usage: $0 " >&2 + exit 1 +fi + +VERSION="$1" +PYTHON_VERSION="${VERSION#v}" +REPO_ROOT="${GITHUB_WORKSPACE:-$(git rev-parse --show-toplevel)}" +PROJECT_DIR="${PROJECT_DIR:-RPG-Kit}" +PROJECT_ROOT="$REPO_ROOT/$PROJECT_DIR" +PYPROJECT="$PROJECT_ROOT/pyproject.toml" + +if [[ ! -f "$PYPROJECT" ]]; then + echo "Warning: $PYPROJECT not found, skipping version update" + exit 0 +fi + +python - "$PYPROJECT" "$PYTHON_VERSION" <<'PY' +from pathlib import Path +import re +import sys + +path = Path(sys.argv[1]) +version = sys.argv[2] +text = path.read_text() +updated, count = re.subn(r'^version = ".*"$', f'version = "{version}"', text, count=1, flags=re.MULTILINE) +if count != 1: + raise SystemExit(f"Could not update version in {path}") +path.write_text(updated) +PY + +echo "Updated $PYPROJECT version to $PYTHON_VERSION (for release artifacts only)" diff --git a/.gitignore b/.gitignore index ce89292..92c43d8 100644 --- a/.gitignore +++ b/.gitignore @@ -416,3 +416,7 @@ FodyWeavers.xsd *.msix *.msm *.msp + +# RPG-Kit release artifacts +RPG-Kit/.genreleases/ +release_notes.md diff --git a/.markdownlint-cli2.jsonc b/.markdownlint-cli2.jsonc new file mode 100644 index 0000000..ab2b9e9 --- /dev/null +++ b/.markdownlint-cli2.jsonc @@ -0,0 +1,29 @@ +{ + // https://github.com/DavidAnson/markdownlint/blob/main/doc/Rules.md + "config": { + "default": true, + "MD003": { + "style": "atx" + }, + "MD007": { + "indent": 2 + }, + "MD013": false, + "MD024": { + "siblings_only": true + }, + "MD033": false, + "MD041": false, + "MD049": { + "style": "asterisk" + }, + "MD050": { + "style": "asterisk" + } + }, + "ignores": [ + "RPG-Kit/.genreleases/", + "RPG-Kit/.pytest_cache/", + "RPG-Kit/**/__pycache__/" + ] +} diff --git a/README.md b/README.md index f38e58c..3047d72 100644 --- a/README.md +++ b/README.md @@ -1,490 +1,203 @@ # RPG-ZeroRepo -[![arXiv:2509.16198](https://img.shields.io/badge/TechReport-arXiv%3A2509.16198-b31a1b)](https://arxiv.org/abs/2509.16198) -[![Python 3.11+](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) +**RPG-ZeroRepo turns Repository Planning Graphs into a control layer for long-horizon AI coding agents.** -**RPG (Repository Planning Graph)** is a hierarchical graph representation that captures both the *functional* view (features, requirements) and the *structural* view (files, classes, functions, dependencies) of a software repository. **ZeroRepo** is an LLM-driven framework built on RPG that **generates a complete software repository from a natural-language description** -- given a short project overview (e.g. "a multiplayer snake game"), it constructs an RPG through feature planning and architecture design, then iteratively writes and tests every source file, all automatically. +[![Paper 1: arXiv:2509.16198](https://img.shields.io/badge/Paper%201-arXiv%3A2509.16198-b31a1b)](https://arxiv.org/abs/2509.16198) +[![Paper 2: arXiv:2602.02084](https://img.shields.io/badge/Paper%202-arXiv%3A2602.02084-b31a1b)](https://arxiv.org/abs/2602.02084) +[![ICLR 2026](https://img.shields.io/badge/ICLR-2026-blue.svg)](https://arxiv.org/abs/2509.16198) +[![ICML 2026](https://img.shields.io/badge/ICML-2026-blue.svg)](https://arxiv.org/abs/2602.02084) +[![Python 3.11+](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/) +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) -The pipeline has three sequential phases: +๐Ÿ”ฅ **New: [RPG-Kit](RPG-Kit/) is now open source for Claude Code and GitHub Copilot.** -![pipeline of zerorepo](docs/pipeline.png) +Coding agents often lose repository-level context across long tasks: requirements drift, architecture decisions disappear, and edits miss hidden dependencies. -## News and Updates ---- -- [Coming Soon] An RPG-based plugin for Claude Code is under active development and will be open-sourced once ready. -- [2026-03-02] ๐Ÿ”ฅ We have officially open-sourced the **EpiCoder Feature Tree** (features and frequencies) at [Hugging Face](https://huggingface.co/datasets/microsoft/EpiCoder-meta-features). This provides the structured knowledge base needed to enable full repository planning capabilities in **ZeroRepo**. -- [2026-02-27] ๐Ÿ”ฅ We have released code of the [RPG-Encoder](zerorepo/rpg_encoder/) for repository understanding and reconstruction and [RepoCraft](repocraft/) for end-to-end repository generation evaluation. -- [2026-02-12] ๐Ÿ”ฅ We have open-sourced the ZeroRepo codebase. RPG-Encoder is currently going through the open-source release process and will be released once the process is complete. -- [2026.02.02] ๐Ÿ”ฅ Our paper "[Closing the Loop: Universal Repository Representation with RPG-Encoder](https://arxiv.org/abs/2602.02084)" has been released on arXiv. -- [2026.01.26] ๐Ÿ”ฅ [RPG-ZeroRepo](https://arxiv.org/abs/2509.16198) has been accepted as a poster at ICLR 2026. -- [2025.09.19] ๐Ÿ”ฅ Our paper "[RPG: A Repository Planning Graph for Unified and Scalable Codebase Generation](https://arxiv.org/abs/2509.16198)" has been released on arXiv. - - -## Table of Contents - -- [Quick Start](#quick-start) -- [Phase 1 -- Property Level (Feature Planning)](#phase-1----property-level-feature-planning) -- [Phase 2 -- Implementation Level (Architecture Design)](#phase-2----implementation-level-architecture-design) -- [Phase 3 -- Code Generation (Iterative Implementation)](#phase-3----code-generation-iterative-implementation) -- [Checkpoint & Resume](#checkpoint--resume) -- [Intermediate Files Reference](#intermediate-files-reference) -- [Configuration](#configuration) -- [RPG-Encoder](#rpg-encoder) -- RPG extraction, incremental maintenance, and agentic navigation ([detailed docs](zerorepo/rpg_encoder/README.md)) -- [RepoCraft Benchmark](#repocraft-benchmark) -- Benchmark construction and evaluation for repo-level code generation ([detailed docs](repocraft/README.md)) - ---- - -## Quick Start - -### 1. Prepare project directory - -Create a project directory **outside of any existing git repository**. ZeroRepo uses git internally to track each implementation step in `workspace/` -- placing it inside an existing repo will cause conflicts. - -``` -my_project/ # Must NOT be inside a git repo -โ”œโ”€โ”€ checkpoints/ # Pipeline state & design artifacts (auto-populated) -โ”‚ โ””โ”€โ”€ repo_data.json # You create this (see below) -โ””โ”€โ”€ workspace/ # Generated repository (auto-initialized as a git repo) -``` +RPG-Kit gives agents a **persistent RPG workspace** so they can plan, generate, understand, and update repositories through a shared graph instead of transient chat history and file search. -```bash -mkdir -p my_project/checkpoints my_project/workspace -``` +The repository also includes the research code: **[ZeroRepo](#zerorepo-requirements--rpg--repository)** implements the forward pipeline (`requirements โ†’ RPG โ†’ repository`), and **[RPG-Encoder](#rpg-encoder-repository--rpg)** implements the reverse pipeline (`repository โ†’ RPG`). -Then write `checkpoints/repo_data.json` describing the project you want to generate: - -```json -{ - "repository_name": "SnakeGame", - "repository_purpose": "A multiplayer snake game with AI opponents, pathfinding, replay recording, and persistent high scores." -} -``` - -### 2. Configure - -Edit `configs/zerorepo_config.yaml` -- set your LLM provider, model, and authentication. Edit `configs/trae_config.yaml` for the code-gen agent's LLM. - -### 3. Run - -```bash -# Run the full pipeline (design + implementation) -python main.py \ - --config configs/zerorepo_config.yaml \ - --checkpoint ../my_project/checkpoints \ - --repo ../my_project/workspace \ - --phase all \ - --resume -``` +--- -Or use the provided script: +## News -```bash -bash scripts/run_main.sh -``` +- [2026-05-15] ๐Ÿ”ฅ **RPG-Kit** is now open source for Claude Code and GitHub Copilot. It uses Repository Planning Graphs as a control layer for long-horizon coding agents, including planning, multi-file generation, repository understanding, and graph-aware updates. +- [2026-05-01] ๐ŸŽ‰ **RPG-Encoder** ([*Closing the Loop: Universal Repository Representation with RPG-Encoder*](https://arxiv.org/abs/2602.02084)) has been accepted to **ICML 2026**. +- [2026-03-02] ๐Ÿ”ฅ We have open-sourced the **EpiCoder Feature Tree** at [Hugging Face](https://huggingface.co/datasets/microsoft/EpiCoder-meta-features), providing structured knowledge for repository planning in **ZeroRepo**. +- [2026-02-27] ๐Ÿ”ฅ We released the code for [RPG-Encoder](zerorepo/rpg_encoder/) and [RepoCraft](repocraft/). +- [2026-01-26] ๐Ÿ”ฅ [RPG-ZeroRepo](https://arxiv.org/abs/2509.16198) was accepted as a poster at ICLR 2026. -The pipeline is **fully resumable** -- if interrupted, rerun with `--resume` to continue from the last completed stage. +--- -### 4. Output +## Documentation -``` -my_project/ - checkpoints/ # Intermediate files (design artifacts, state) - workspace/ # The generated repository (git-initialized) - src/ - tests/ - README.md - ... -``` +- [RPG-Kit Guide](RPG-Kit/README.md) โ€” setup, slash commands, MCP tools +- [RPG-Kit Commands Reference](RPG-Kit/docs/commands.md) +- [RPG-Kit CLI Reference](RPG-Kit/docs/cli-reference.md) +- [RPG-Kit Configuration](RPG-Kit/docs/configuration.md) +- [ZeroRepo Pipeline Details](docs/zerorepo-pipeline.md) โ€” Phase 1/2/3, checkpoint files, configuration +- [RPG-Encoder Module](zerorepo/rpg_encoder/README.md) +- [RepoCraft Benchmark](repocraft/README.md) --- -## Phase 1 -- Property Level (Feature Planning) - -**Module:** `zerorepo/rpg_gen/prop_level/` -**Entry:** `PropBuilder.build_feature_tree()` +## RPG-Kit -This phase transforms a high-level project description into a structured feature hierarchy and component decomposition. +RPG-Kit turns Repository Planning Graphs into a control layer for long-horizon AI coding agents. -### Step 1: Feature Selection +> Good planning for coding agents should be grounded, executable, verifiable, and reusable. RPG-Kit makes the plan a graph, not a transient chat artifact. -**Agent:** `FeatureSelectAgent` +RPG-Kit gives agents such as Claude Code and GitHub Copilot a persistent RPG workspace for planning, generation, repository understanding, and graph-aware editing. -Takes the `repository_purpose` text and generates a comprehensive **Feature Tree** -- a hierarchical taxonomy of everything the project should do. -``` -Input: "A multiplayer snake game with AI opponents..." - -Output: Feature_tree - โ”œโ”€โ”€ functionality - โ”‚ โ”œโ”€โ”€ game mechanics: [movement, collision detection, food spawning, scoring] - โ”‚ โ”œโ”€โ”€ ai opponent: [strategy, training] - โ”‚ โ””โ”€โ”€ level generation: [static layout, procedural maze, dynamic obstacles] - โ”œโ”€โ”€ algorithm - โ”‚ โ”œโ”€โ”€ pathfinding: [astar, bfs flood fill, monte carlo rollouts] - โ”‚ โ””โ”€โ”€ optimization: [caching, incremental updates, memory management] - โ”œโ”€โ”€ data structures - โ”‚ โ”œโ”€โ”€ grid management: [occupancy map, compact bitset, quadtree index] - โ”‚ โ””โ”€โ”€ snake body: [ring buffer segments, delta encoding, serialization] - โ””โ”€โ”€ ... -``` +### Why RPG-Kit? -**Saved to:** `checkpoints/feature_selection.json` +Coding agents are strong at local edits, but repository-level work requires durable context: requirements, architecture, implementation progress, and dependencies must stay aligned across many steps. -### Step 2: Feature Refactoring +| Without RPG-Kit | With RPG-Kit | +|---|---| +| The agent relies on chat history and file search. | The agent works against a structured RPG workspace. | +| Requirements and design decisions drift over long tasks. | Requirements, features, architecture, and files stay connected in the graph. | +| Multi-file generation can become inconsistent. | Generation follows an explicit planning graph. | +| Updates are often local edits without impact analysis. | Edits are planned through affected RPG nodes and dependencies. | +| The repository map is rebuilt mentally every time. | The RPG is searchable, explorable, and reusable across tasks. | -**Agent:** `FeatureRefactorAgent` +### What can I do with RPG-Kit? -Groups the flat feature tree into **Components** -- logical modules that will become top-level source directories. +| Task | Start from | RPG-Kit workflow | Benefit | +|---|---|---|---| +| **Build a new repository** | A natural-language requirement | Create an RPG plan, refine it into architecture/tasks, then generate code. | A persistent plan for long-horizon multi-file generation. | +| **Understand an existing repository** | An existing codebase | Encode the repo into an RPG workspace, then search, explore, and explain through MCP tools (`search_rpg`, `explore_rpg`, `get_node_detail`). | A structured repository map beyond chat history and file search. | +| **Update an existing repository** | A codebase + change request | Use the RPG to locate affected nodes, plan the edit, and update code and graph together (`/rpgkit.rpg_edit "..."`). | Graph-aware edits that account for cross-file dependencies. | -``` -Input: Feature_tree (above) - -Output: Components - โ”œโ”€โ”€ gameplay.core โ†’ game mechanics, rules, levels - โ”œโ”€โ”€ simulation.engine โ†’ deterministic tick, physics, timing - โ”œโ”€โ”€ data.model โ†’ grid structures, snake body, caching - โ”œโ”€โ”€ ai.pathfinding โ†’ pathfinding, opponent AI, training - โ”œโ”€โ”€ io.input_persistence โ†’ input handling, file I/O, replay - โ””โ”€โ”€ network.multiplayer โ†’ transport, netcode, matchmaking -``` +### Quick Start -**Saved to:** `checkpoints/feature_refactoring.json` - -### Output - -The combined result is written to `checkpoints/repo_data.json`: - -```json -{ - "repository_name": "SnakeGame", - "repository_purpose": "...", - "Feature_tree": { ... }, - "Component": [ - { - "name": "gameplay.core", - "refactored_subtree": { - "gameplay": { - "rules": ["movement", "collision detection", "scoring"], - "levels": ["static layout", "procedural generation"] - } - } - }, - ... - ] -} +```bash +uv tool install rpgkit-cli \ + --from "git+https://github.com/microsoft/RPG-ZeroRepo.git#subdirectory=RPG-Kit" +rpgkit check ``` ---- +**On an existing repository:** -## Phase 2 -- Implementation Level (Architecture Design) - -**Module:** `zerorepo/rpg_gen/impl_level/` -**Entry:** `ImplBuilder.run()` - -This phase turns the feature plan into concrete file layouts, interface designs, and implementation tasks. - -### Step 1: Create Initial RPG - -Builds the initial **Repository Planning Graph (RPG)** from Component data. Each component becomes a directory node, features become leaf nodes. - -**Saved to:** `checkpoints/global_repo_rpg.json` - -### Step 2: File Design (Skeleton) - -**Agent:** `FileDesigner` (via `RawSkeletonAgent` + `GroupSkeletonAgent`) - -Two sub-phases: -1. **Raw Skeleton** -- LLM generates a high-level file/directory layout -2. **Group Skeleton** -- assigns features to concrete file paths, adds `__init__.py` files - -``` -Output: RepoSkeleton (76 files for SnakeGame example) - SnakeGame/ - โ”œโ”€โ”€ src/ - โ”‚ โ”œโ”€โ”€ ai/ - โ”‚ โ”‚ โ”œโ”€โ”€ analysis.py - โ”‚ โ”‚ โ”œโ”€โ”€ planning/ - โ”‚ โ”‚ โ”‚ โ”œโ”€โ”€ pathfinding.py - โ”‚ โ”‚ โ”‚ โ”œโ”€โ”€ heuristics.py - โ”‚ โ”‚ โ”‚ โ””โ”€โ”€ simulation.py - โ”‚ โ”‚ โ””โ”€โ”€ opponent/strategies.py - โ”‚ โ”œโ”€โ”€ gameplay/ - โ”‚ โ”‚ โ”œโ”€โ”€ rules/{movement,collision,input}.py - โ”‚ โ”‚ โ””โ”€โ”€ levels/{layout,procedural}.py - โ”‚ โ”œโ”€โ”€ data/model/{memory,cache,segments}.py - โ”‚ โ”œโ”€โ”€ io/persistence/serialization.py - โ”‚ โ”œโ”€โ”€ simulation/engine/{deterministic,random}.py - โ”‚ โ””โ”€โ”€ network/transport/{reliability,aggregation}.py - โ”œโ”€โ”€ tests/ (mirrored structure) - โ””โ”€โ”€ pyproject.toml, README.md, ... +```bash +cd your-existing-repo +rpgkit init . --encode +# In Claude Code or GitHub Copilot: +# /rpgkit.rpg_edit "Add rate limiting to all API endpoints" ``` -**Saved to:** `checkpoints/skeleton.json`, `checkpoints/skeleton_traj.json` - -### Step 3: Function Design (Interface) - -**Agent:** `FuncDesigner` (via `DataFlowAgent` + `BaseClassAgent` + `InterfaceAgent`) +**Generate a new repository:** -Three sub-phases: -1. **Data Flow Analysis** -- identifies data dependencies between components -2. **Base Class Design** -- designs shared data structures and base classes -3. **Interface Design** -- for each file (in topological order), designs classes/functions with full signatures, docstrings, and type hints - -``` -Output per file: -{ - "src/io/persistence/serialization.py": { - "file_code": "class HighScoreBinarySerializer:\n def serialize(self, entries) -> bytes: ...\n ...", - "units": ["class HighScoreBinarySerializer"], - "units_to_features": { - "class HighScoreBinarySerializer": ["high score binary serialization"] - }, - "units_to_code": { - "class HighScoreBinarySerializer": "class HighScoreBinarySerializer:\n ..." - } - } -} +```bash +rpgkit init my-project +cd my-project +# In Claude Code or GitHub Copilot: +# /rpgkit.feature_spec Build a CLI tool for managing Docker containers +# /rpgkit.feature_build โ†’ /rpgkit.feature_refactor โ†’ ... โ†’ /rpgkit.code_gen ``` -**Saved to:** `checkpoints/graph.json`, `checkpoints/global_repo_rpg.json` (updated) +See [`RPG-Kit/README.md`](RPG-Kit/README.md) for the full setup, slash commands, and MCP tools. +Also available in [็ฎ€ไฝ“ไธญๆ–‡](RPG-Kit/README.zh-CN.md) ยท [ๆ—ฅๆœฌ่ชž](RPG-Kit/README.ja-JP.md) ยท [ํ•œ๊ตญ์–ด](RPG-Kit/README.ko-KR.md) ยท [เคนเคฟเคจเฅเคฆเฅ€](RPG-Kit/README.hi-IN.md). -### Step 4: Task Planning +### Overview -**Agent:** `TaskPlanner` - -Splits the interface designs into **implementation batches** -- one per file, each containing the units (classes/functions) to implement, their skeleton code, and feature descriptions. - -``` -Output: tasks.json -{ - "planned_batches_dict": { - "io.input_persistence": { - "src/io/persistence/serialization.py": [ - { - "task_id": "src_io_persistence_serialization.py_...", - "file_path": "src/io/persistence/serialization.py", - "units_key": ["class HighScoreBinarySerializer"], - "unit_to_code": { ... }, - "unit_to_features": { ... }, - "priority": 0, - "subtree": "io.input_persistence", - "task_type": "implementation" - } - ], - "src/io/levels/portable.py": [...], - ... - }, - "gameplay.core": { ... }, - ... - } -} -``` +RPG-Kit gives Claude Code and GitHub Copilot a **persistent RPG workspace** for repository-level tasks. Instead of relying only on chat history, file search, and local context, the agent can carry repository-level planning state across long tasks. -**Saved to:** `checkpoints/tasks.json` +RPG-Kit exposes the RPG workspace through three interfaces: ---- +- **CLI setup** โ€” initialize RPG-Kit in a new or existing repository with `rpgkit init`. +- **Slash commands** โ€” run build, understand, and update workflows inside the coding agent (`/rpgkit.feature_spec`, `/rpgkit.code_gen`, `/rpgkit.encode`, `/rpgkit.rpg_edit`, and more). +- **MCP graph tools** โ€” let the agent search, inspect, and traverse RPG nodes during coding (`search_rpg`, `explore_rpg`, `get_node_detail`, `list_rpg_tree`). -## Phase 3 -- Code Generation (Iterative Implementation) +RPG-Kit can keep the RPG in sync with code changes through a post-commit hook, so edits made by the agent or directly in code can be reflected back into the graph. -**Module:** `zerorepo/code_gen/` -**Entry:** `IterativeCodeGenerator.task_executor()` +**Supported agents:** Claude Code (verified), GitHub Copilot (verified). -This phase executes each task batch inside a **Docker container** running the trae-agent (an LLM-powered coding agent), using a test-driven development (TDD) loop. +### RPG-Kit in action -### Per-Batch Iterative Loop +The graph below was produced by running `/rpgkit.encode` on this repository: -For each task batch, the system runs up to `max_iterations` cycles: +![RPG visualization of this repository](docs/rpgkit_visualized_graph.png) -``` -Iteration N: - โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” - โ”‚ 1. Generate Tests โ”‚ trae-agent writes test_xxx.patch - โ”‚ (TDD first) โ”‚ โ†’ git commit "test: add tests for ..." - โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค - โ”‚ 2. Generate Code โ”‚ trae-agent writes code_xxx.patch - โ”‚ (implementation) โ”‚ โ†’ git commit "feat: implement ..." - โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค - โ”‚ 3. Run Tests โ”‚ pytest inside container - โ”‚ (validation) โ”‚ - โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค - โ”‚ 4. Analyze Result โ”‚ - โ”‚ โ”œโ”€ PASS โ†’ done โ”‚ - โ”‚ โ”œโ”€ TEST_ERROR โ†’ regenerate tests (go to 1) - โ”‚ โ”œโ”€ CODE_ERROR โ†’ regenerate code (go to 2) - โ”‚ โ””โ”€ ENV_ERROR โ†’ fix environment - โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ -``` +This illustrates how RPG-Kit turns an existing repository into an RPG that agents can search, explore, and use for graph-aware edits. -### Workflow Types +See [`RPG-Kit/`](RPG-Kit/) for the full guide. -| Type | Description | When Used | -|------|-------------|-----------| -| `TEST_DEVELOPMENT` | Write tests from scratch | First iteration | -| `TEST_FIX` | Fix failing tests | Test code has issues | -| `CODE_INCREMENTAL` | Implement code from scratch | First iteration | -| `CODE_BUG_FIX` | Fix code based on test failures | Code has bugs | -| `ENV_SETUP` | Install packages / fix imports | Environment issues | +--- -### Results per Task +## Standalone research code -Each trae-agent invocation produces: +ZeroRepo and RPG-Encoder are the standalone research pipelines for constructing RPGs: ``` -results/ -โ”œโ”€โ”€ test_gen_1770643795/ -โ”‚ โ”œโ”€โ”€ task.txt # Task description sent to agent -โ”‚ โ”œโ”€โ”€ test_gen_1770643795.json # Full trajectory (LLM interactions) -โ”‚ โ””โ”€โ”€ test_gen_1770643795.patch # Unified diff patch -โ”œโ”€โ”€ code_gen_1770644020/ -โ”‚ โ”œโ”€โ”€ task.txt -โ”‚ โ”œโ”€โ”€ code_gen_1770644020.json -โ”‚ โ””โ”€โ”€ code_gen_1770644020.patch -โ””โ”€โ”€ ... +requirements โ†’ RPG โ†’ repository # ZeroRepo +repository โ†’ RPG # RPG-Encoder ``` -### Commit Messages +RPG-Kit (above) is the agent-facing layer that uses RPGs from either direction. The components below run **without an agent CLI** โ€” useful for paper reproduction and benchmarking. -After each successful patch, the system uses a separate LLM call to generate conventional commit messages: +| Component | Use it for | +|---|---| +| **[ZeroRepo](#zerorepo-requirements--rpg--repository)** | Reproduce the RPG paper's forward pipeline. | +| **[RPG-Encoder](#rpg-encoder-repository--rpg)** | Reproduce the RPG-Encoder paper's reverse pipeline. | +| **[RepoCraft](#repocraft-benchmark)** | Evaluate repository-level code generation. | -``` -test: add unit tests for HighScoreBinarySerializer -feat: implement HighScoreBinarySerializer binary encoding/decoding -fix: correct deserialization of optional player_name field -``` +### ZeroRepo: requirements โ†’ RPG โ†’ repository ---- +> *RPG: A Repository Planning Graph for Unified and Scalable Codebase Generation* โ€” [arXiv:2509.16198](https://arxiv.org/abs/2509.16198), ICLR 2026 -## Checkpoint & Resume - -Every major stage writes its output to `checkpoints/` and updates `task_manager_state.json`. The pipeline can be resumed from any point. - -### Stage Completion Tracking - -```json -// task_manager_state.json -{ - "feature_selection": true, // Phase 1, Step 1 - "feature_refactoring": true, // Phase 1, Step 2 - "build_skeleton": true, // Phase 2, Step 2 - "build_function": true, // Phase 2, Step 3 - "plan_tasks": true, // Phase 2, Step 4 - "code_generation": false, // Phase 3 (in progress) - "completed_tasks": ["batch_id_1", "batch_id_2"], - "failed_tasks": [], - "current_task": { ... }, - "last_updated": "2026-02-09T13:56:20" -} -``` +ZeroRepo is the forward generation framework. It turns a natural-language project requirement into an RPG, refines the graph into architecture and implementation tasks, and generates a complete repository in dependency-aware order. -When `--resume` is passed: -- Completed stages are skipped entirely -- Code generation resumes from the next uncompleted batch -- Failed batches can be retried +Pipeline: ---- - -## Intermediate Files Reference - -| File | Phase | Description | -|------|-------|-------------| -| `repo_data.json` | Input / Phase 1 | Repository name + purpose + Feature_tree + Components | -| `feature_selection.json` | Phase 1 | Raw feature tree from LLM | -| `feature_refactoring.json` | Phase 1 | Component decomposition results | -| `global_repo_rpg.json` | Phase 2 | Repository Planning Graph (full node/edge graph) | -| `skeleton.json` | Phase 2 | File/directory layout with code skeletons | -| `skeleton_traj.json` | Phase 2 | LLM trajectories from skeleton design | -| `graph.json` | Phase 2 | Complete interface design (data flow + base classes + per-file interfaces) | -| `tasks.json` | Phase 2 | Implementation task batches grouped by subtree and file | -| `cur_repo_rpg.json` | Phase 3 | Current RPG state (updated as code is generated) | -| `batch_trajectory.json` | Phase 3 | Execution records per batch (trajectories, commits, failures) | -| `execution_history.json` | Phase 3 | Flat list of all workflow executions | -| `iteration_state.json` | Phase 3 | Current iteration state for active batch | -| `task_manager_state.json` | All | Stage completion flags + task progress | +1. **Feature planning** โ€” decompose user requirements into a structured feature tree and component decomposition. +2. **Architecture design** โ€” map features to modules, files, classes, interfaces, and data flows. Build the full RPG. +3. **Graph-guided code generation** โ€” generate interdependent files in dependency order, using the RPG as the persistent execution state. ---- +![ZeroRepo three-phase pipeline](docs/pipeline.png) -## Configuration - -### `configs/zerorepo_config.yaml` - -Controls the overall pipeline: LLM settings for design phases, prop-level parameters, impl-level config paths, Docker settings, and trae-agent options. - -```yaml -llm: - model: "gpt-5-mini-20250807" - provider: "openai" - api_key: "..." - -prop_level: - feature_selection: - mode: "simple" # "simple" or "feature" (multi-agent with vector DB) - feature_refactoring: - refactor_max_iterations: 40 - -impl_level: - file_design_cfg_path: "configs/file_design_config.yaml" - func_design_cfg_path: "configs/func_design_config.yaml" - -code_generation: - docker: - image: "python-azure-pytest:3.12" - container_name: "zerorepo_2" - workspace: "/tare_workspace" - trae_agent: - trae_config: "./configs/trae_config.yaml" - max_iterations: 5 - max_retries_per_workflow: 3 +```bash +python main.py \ + --config configs/zerorepo_config.yaml \ + --checkpoint ../my_project/checkpoints \ + --repo ../my_project/workspace \ + --phase all \ + --resume ``` -### `configs/trae_config.yaml` - -Controls the coding agent (trae-agent) that runs inside Docker: +See [`docs/zerorepo-pipeline.md`](docs/zerorepo-pipeline.md) for phase details, checkpoint files, intermediate file reference, and configuration. -```yaml -model_providers: - openai: - provider: openai - api_key: "..." +### RPG-Encoder: repository โ†’ RPG -models: - trae_agent_model: - model_provider: openai - model: gpt-5-mini-20250807 - max_completion_tokens: 16134 - temperature: 0.0 +> *Closing the Loop: Universal Repository Representation with RPG-Encoder* โ€” [arXiv:2602.02084](https://arxiv.org/abs/2602.02084), ICML 2026 -agents: - trae_agent: - model: trae_agent_model - max_steps: 300 - tools: [bash, str_replace_based_edit_tool, sequentialthinking, task_done] -``` +RPG-Encoder closes the loop by mapping existing codebases back into Repository Planning Graphs. The resulting RPG captures both semantic intent and structural dependencies. ---- +This enables agents to: -## RPG-Encoder +- understand what each part of the repository is for; +- navigate from features to files/functions and back; +- update RPGs incrementally after code changes; +- use the graph as context for maintenance and editing tasks. -**Module:** `zerorepo/rpg_encoder/` -**Paper:** *"Closing the Loop: Universal Repository Representation with RPG-Encoder"* ([arXiv:2602.02084](https://arxiv.org/abs/2602.02084)) - -RPG-Encoder generalizes the Repository Planning Graph (RPG) from a static generative blueprint into a **unified, high-fidelity representation** for existing repositories. It closes the reasoning loop between comprehension and generation through three mechanisms: +#### Three mechanisms | Mechanism | Module | Description | -|-----------|--------|-------------| +|---|---|---| | **Encoding** | `rpg_parsing/` | Extracts RPG from raw codebases via semantic lifting, structure reorganization, and artifact grounding | -| **Evolution** | `rpg_parsing/rpg_evolution.py` | Incrementally maintains RPG via commit-level diff parsing, reducing overhead by 95.7% | +| **Evolution** | `rpg_parsing/rpg_evolution.py` | Incrementally maintains RPGs via commit-level diff parsing, avoiding full re-encoding after every change | | **Operation** | `rpg_agent/` | Provides a unified agentic interface (SearchNode, FetchNode, ExploreRPG) for structure-aware navigation | -### Quick Start +#### Quick start (standalone) ```bash -# Parse a repository into RPG python parse_rpg.py parse \ --repo-dir /path/to/repo \ --repo-name myrepo \ --save-dir ./output -# Incrementally update after code changes +# Incrementally update after code changes: python parse_rpg.py update \ --repo-dir /path/to/updated/repo \ --last-repo-dir /path/to/old/repo \ @@ -494,52 +207,30 @@ python parse_rpg.py update \ See [`zerorepo/rpg_encoder/README.md`](zerorepo/rpg_encoder/README.md) for detailed documentation. ---- +### RepoCraft Benchmark -## RepoCraft Benchmark +RepoCraft is the benchmark and evaluation suite for repository-level code generation. It evaluates whether a model can plan and generate repository-scale software artifacts rather than isolated functions. -**Module:** `repocraft/` -**Paper:** *"RPG: A Repository Planning Graph for Unified and Scalable Codebase Generation"* ([arXiv:2509.16198](https://arxiv.org/abs/2509.16198)) - -RepoCraft is a benchmark for evaluating **repository-level code generation**, consisting of **1,052 tasks** across 6 real-world Python projects (scikit-learn, pandas, sympy, statsmodels, requests, django). It assesses whether AI agents can generate repositories that are functionally complete, algorithmically correct, and at real-world scale. +It consists of **1,052 tasks** across 6 real-world Python projects (scikit-learn, pandas, sympy, statsmodels, requests, django). | Metric | Description | -|--------|-------------| +|---|---| | **Coverage** | Proportion of reference feature categories covered | | **Accuracy** | Pass Rate (unit tests) and Voting Rate (semantic checks) | | **Code Statistics** | File count, Lines of Code (LOC), Token count | -### Quick Start - -```bash -# Build benchmark (parse โ†’ refactor โ†’ sample โ†’ generate queries) -python -m repocraft.benchmark pipeline \ - --repo_dir /path/to/scikit-learn \ - --output_dir ./all_results \ - --repo_name sklearn - -# Evaluate a generated repository -python -m repocraft.run \ - --tasks_file ./all_results/task_results/sklearn.json \ - --method_path /path/to/generated/MLKit-Py \ - --cache_dir ./eval_cache - -# Analyze results -python -m repocraft.evaluation --base-dir ./eval_cache --show-failed -``` - See [`repocraft/README.md`](repocraft/README.md) for the full pipeline documentation. --- -## Acknowledgements -We thank the following projects for inspiration and valuable prior work that helped shape this project: -- Trae Agent: https://github.com/bytedance/trae-agent +## Papers +- **RPG / ZeroRepo:** Luo et al., *RPG: A Repository Planning Graph for Unified and Scalable Codebase Generation*, [arXiv:2509.16198](https://arxiv.org/abs/2509.16198), ICLR 2026. +- **RPG-Encoder:** Luo et al., *Closing the Loop: Universal Repository Representation with RPG-Encoder*, [arXiv:2602.02084](https://arxiv.org/abs/2602.02084), ICML 2026. -## Cite & References -If this codebase is helpful to your research, we would appreciate it if you consider citing our paper. -``` +### Citation + +```bibtex @article{luo2025rpg, title={RPG: A Repository Planning Graph for Unified and Scalable Codebase Generation}, author={Luo, Jane and Zhang, Xin and Liu, Steven and Wu, Jie and Liu, Jianfeng and Huang, Yiming and Huang, Yangyu and Yin, Chengyu and Xin, Ying and Zhan, Yuefeng and others}, @@ -554,3 +245,16 @@ If this codebase is helpful to your research, we would appreciate it if you cons year={2026} } ``` + +--- + +## Acknowledgements + +We thank the following projects for inspiration and valuable prior work that helped shape this project: + +- [Trae Agent](https://github.com/bytedance/trae-agent) +- [GitHub Spec-Kit](https://github.com/github/spec-kit) โ€” foundation for RPG-Kit's CLI and slash command structure + +## License + +MIT License โ€” see [LICENSE](LICENSE) for details. diff --git a/RPG-Kit/.gitignore b/RPG-Kit/.gitignore new file mode 100644 index 0000000..75fae69 --- /dev/null +++ b/RPG-Kit/.gitignore @@ -0,0 +1,234 @@ +# --- Python bytecode / cache --- +__pycache__/ +**/__pycache__/ +*.py[cod] +*$py.class + +# --- RPG-Kit generated data & temp --- +.rpgkit/data/ +.rpgkit/tmp/ +.rpgkit/scripts/**/__pycache__/ + +# --- Logs --- +*.log + +# --- Virtual environments --- +venv/ +.venv/ +.venv_dev/ +/env/ +.rpgkit_dev_env/ + +# --- IDE --- +.idea/ +.vscode/ +*.swp +*.swo + +# --- Testing --- +.pytest_cache/ +.coverage +htmlcov/ + +# --- Distribution --- +.genreleases/ +dist/ +build/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py.cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock +#poetry.toml + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. +# https://pdm-project.org/en/latest/usage/project/#working-with-version-control +#pdm.lock +#pdm.toml +.pdm-python +.pdm-build/ + +# pixi +# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. +#pixi.lock +# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one +# in the .venv directory. It is recommended not to include this directory in version control. +.pixi + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.envrc +.venv +/env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the entire vscode folder +# .vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Cursor +# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to +# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data +# refer to https://docs.cursor.com/context/ignore-files +.cursorignore +.cursorindexingignore + +# Marimo +marimo/_static/ +marimo/_lsp/ +__marimo__/ + +# Planning +workspace*/ +plans/ +# --- RPG-Kit workspace symlinks --- +.claude + +# RPG-Kit ignores (managed by `rpgkit init/update`) +.rpgkit/ +.vscode/mcp.json +.vscode/tasks.json +.mcp.json +.github/agents/ +.github/prompts/ diff --git a/RPG-Kit/README.hi-IN.md b/RPG-Kit/README.hi-IN.md new file mode 100644 index 0000000..565592a --- /dev/null +++ b/RPG-Kit/README.hi-IN.md @@ -0,0 +1,202 @@ +

RPG-Kit

+ +

+ English | + ็ฎ€ไฝ“ไธญๆ–‡ | + ๆ—ฅๆœฌ่ชž | + ํ•œ๊ตญ์–ด | + เคนเคฟเคจเฅเคฆเฅ€ +

+ +## AI coding agents เค•เฅ‹ เคชเฅ‚เคฐเฅ‡ repository เค•เฅ‹ เคธเคฎเคเคจเฅ‡ เคฆเฅ‡เค‚ + +AI coding agents เคถเค•เฅเคคเคฟเคถเคพเคฒเฅ€ เคนเฅ‹เคคเฅ‡ เคนเฅˆเค‚, เคฒเฅ‡เค•เคฟเคจ เคตเฅ‡ เค…เค•เฅเคธเคฐ file-by-file เค•เคพเคฎ เค•เคฐเคคเฅ‡ เคนเฅˆเค‚เฅค เคœเฅˆเคธเฅ‡-เคœเฅˆเคธเฅ‡ project เคฌเคขเคผเคคเคพ เคนเฅˆ, เคตเฅ‡ requirements, architecture, dependencies, เค”เคฐ เคชเคฟเค›เคฒเฅ‡ design decisions เค•เคพ track เค–เฅ‹ เคธเค•เคคเฅ‡ เคนเฅˆเค‚เฅค + +RPG-Kit เค‡เคธ เคธเคฎเคธเฅเคฏเคพ เค•เฅ‹ **Repository Planning Graph (RPG)** maintain เค•เคฐเค•เฅ‡ เคนเคฒ เค•เคฐเคจเฅ‡ เคฎเฅ‡เค‚ เคฎเคฆเคฆ เค•เคฐเคคเคพ เคนเฅˆ: เคเค• structured map เคœเฅ‹ requirements, features, files, components, เค”เคฐ dependencies เค•เฅ‹ เคœเฅ‹เคกเคผเคคเคพ เคนเฅˆเฅค + +เคœเคฌ เค†เคช เคšเคพเคนเคคเฅ‡ เคนเฅˆเค‚ เค•เคฟ AI agents isolated prompts เค•เฅ‡ เคฌเคœเคพเคฏ repository-level context เค•เฅ‡ เคธเคพเคฅ เค•เคพเคฎ เค•เคฐเฅ‡เค‚, เคคเคฌ RPG-Kit เค•เคพ เค‰เคชเคฏเฅ‹เค— เค•เคฐเฅ‡เค‚เฅค + +### RPG-Kit เค•เฅเคฏเฅ‹เค‚? + +| AI coding agents เค•เฅ€ common problem | RPG-Kit เค•เฅˆเคธเฅ‡ เคฎเคฆเคฆ เค•เคฐเคคเคพ เคนเฅˆ | +|---|---| +| Agent เค•เฅเค› prompts เค•เฅ‡ เคฌเคพเคฆ requirements เคญเฅ‚เคฒ เคœเคพเคคเคพ เคนเฅˆ | Requirements RPG เคฎเฅ‡เค‚ encode เค•เฅ€ เคœเคพเคคเฅ€ เคนเฅˆเค‚ | +| Agent related files เค•เฅ‹ เคธเคฎเคเฅ‡ เคฌเคฟเคจเคพ เคเค• file edit เค•เคฐเคคเคพ เคนเฅˆ | Files, components, เค”เคฐ dependencies graph เคฎเฅ‡เค‚ connected เคนเฅ‹เคคเฅ‡ เคนเฅˆเค‚ | +| Generated code original plan เคธเฅ‡ drift เคนเฅ‹ เคœเคพเคคเคพ เคนเฅˆ | Planning artifacts เค”เคฐ code aligned เคฐเค–เฅ‡ เคœเคพเคคเฅ‡ เคนเฅˆเค‚ | +| Existing repositories เค•เฅ‹ agents เค•เฅ‡ เคฒเคฟเค เคธเคฎเคเคจเคพ เค•เค เคฟเคจ เคนเฅ‹เคคเคพ เคนเฅˆ | Codebase เค•เฅ‹ RPG เคฎเฅ‡เค‚ encode เค•เคฟเคฏเคพ เคœเคพ เคธเค•เคคเคพ เคนเฅˆ | +| Targeted edits hidden dependencies เคคเฅ‹เคกเคผ เคธเค•เคคเฅ‡ เคนเฅˆเค‚ | Edits graph-aware context เค•เฅ‡ เคธเคพเคฅ เค•เคฟเค เคœเคพเคคเฅ‡ เคนเฅˆเค‚ | + +### เค…เคชเคจเคพ workflow เคšเฅเคจเฅ‡เค‚ + +| Goal | Workflow | Start here | +|---|---|---| +| Requirements เคธเฅ‡ เคจเคฏเคพ project create เค•เคฐเฅ‡เค‚ | Forward workflow | [`Quick Start: เคจเคฏเคพ Repository`](#quick-start-new-repository) | +| Existing codebase เค•เฅ‹ เคธเคฎเคเฅ‡เค‚ เคฏเคพ update เค•เคฐเฅ‡เค‚ | Reverse workflow | [`Quick Start: เคฎเฅŒเคœเฅ‚เคฆเคพ Repository`](#quick-start-existing-repository) | +| Precise repository-aware edit เค•เคฐเฅ‡เค‚ | Surgical edit workflow | [`Quick Start: เคฎเฅŒเคœเฅ‚เคฆเคพ Repository`](#quick-start-existing-repository) | + +เคจเฅ€เคšเฅ‡ เค‡เคธ repository เค•เฅ‡ เคฒเคฟเค generated graph visualization เค•เคพ เคเค• เคนเคฟเคธเฅเคธเคพ เคนเฅˆเฅค `/rpgkit.encode` เคšเคฒเคพเคเค เค”เคฐ full interactive graph explore เค•เคฐเคจเฅ‡ เค•เฅ‡ เคฒเคฟเค `rpg.html` เค–เฅ‹เคฒเฅ‡เค‚เฅค + +![RPG-Kit repository graph visualization](../docs/rpgkit_visualized_graph.png) + +## Installation + +### เคชเฅ‚เคฐเฅเคตเคพเคชเฅ‡เค•เฅเคทเคพเคเค + +- Python 3.12+ +- [uv](https://docs.astral.sh/uv/) +- Git +- installed เค”เคฐ authenticated AI coding agent CLI: [GitHub Copilot](https://docs.github.com/en/copilot) เคฏเคพ [Claude Code](https://docs.anthropic.com/en/docs/claude-code/setup) + +### RPG-Kit install เค•เคฐเฅ‡เค‚ + +```bash +# Persistent installation (Recommended) +uv tool install rpgkit-cli --from "git+https://github.com/microsoft/RPG-ZeroRepo.git#subdirectory=RPG-Kit" +rpgkit check + +# One-time usage +uvx --from "git+https://github.com/microsoft/RPG-ZeroRepo.git#subdirectory=RPG-Kit" rpgkit init +``` + + + +## Quick Start: เคจเคฏเคพ Repository + +เคœเคฌ เค†เคช เคšเคพเคนเคคเฅ‡ เคนเฅˆเค‚ เค•เคฟ RPG-Kit requirements เค•เฅ‹ เคเค• เคจเค codebase เคฎเฅ‡เค‚ เคฌเคฆเคฒเฅ‡, เคคเฅ‹ เค‡เคธ path เค•เคพ เค‰เคชเคฏเฅ‹เค— เค•เคฐเฅ‡เค‚เฅค + +> [!WARNING] +> เคœเคฟเคจ projects เคฎเฅ‡เค‚ generated code เค•เฅ€ เคฎเคพเคคเฅเคฐเคพ เคฌเคกเคผเฅ€ เคนเฅ‹, เค‰เคจเคฎเฅ‡เค‚ `/rpgkit.design_interfaces` เค”เคฐ `/rpgkit.code_gen` เค•เคพ runtime เคฒเค‚เคฌเคพ เคนเฅ‹ เคธเค•เคคเคพ เคนเฅˆเฅค เคเค• typical example: feature count 100 เคนเฅ‹เคจเฅ‡ เคชเคฐ runtime เคฒเค—เคญเค— 30 minutes เคนเฅ‹เคคเคพ เคนเฅˆเฅค + +1. เคจเคฏเคพ project initialize เค•เคฐเฅ‡เค‚: + + ```bash + rpgkit init my-project + cd my-project + ``` + + เคธเคพเคฎเคพเคจเฅเคฏ variants: + + ```bash + rpgkit init my-project --ai claude --script sh + rpgkit init my-project --ai copilot + rpgkit init my-project --github-token $GITHUB_TOKEN + ``` + +2. **[Optional]** เค…เคชเคจเฅ€ requirement documents เค•เฅ‹ `my-project/docs/` เคฎเฅ‡เค‚ เคฐเค–เฅ‡เค‚เฅค + +3. project directory เคฎเฅ‡เค‚ เค…เคชเคจเคพ AI coding agent launch เค•เคฐเฅ‡เค‚เฅค + +4. forward pipeline run เค•เคฐเฅ‡เค‚: + + ```text + /rpgkit.feature_spec + /rpgkit.feature_build + /rpgkit.feature_refactor + [Optional] /rpgkit.feature_edit + /rpgkit.build_skeleton + /rpgkit.build_data_flow + /rpgkit.design_base_classes + /rpgkit.design_interfaces + /rpgkit.plan_tasks + /rpgkit.code_gen + [Optional] /rpgkit.rpg_edit + ``` + +RPG-Kit เค•เฅเคฐเคฎเคฟเค• เคฐเฅ‚เคช เคธเฅ‡ `.rpgkit/data/rpg.json` เคฌเคจเคพเคคเคพ เคนเฅˆ เค”เคฐ เค‡เคธเค•เคพ เค‰เคชเคฏเฅ‹เค— requirements, planning artifacts, generated code, เค”เคฐ dependency information เค•เฅ‹ aligned เคฐเค–เคจเฅ‡ เค•เฅ‡ เคฒเคฟเค เค•เคฐเคคเคพ เคนเฅˆเฅค + + + +## Quick Start: เคฎเฅŒเคœเฅ‚เคฆเคพ Repository + +เคœเคฌ เค†เคชเค•เฅ‡ เคชเคพเคธ เคชเคนเคฒเฅ‡ เคธเฅ‡ repository เคนเฅ‹ เค”เคฐ เค†เคช เคšเคพเคนเคคเฅ‡ เคนเฅ‹เค‚ เค•เคฟ AI agent เค‰เคธเฅ‡ RPG context เค•เฅ‡ เคธเคพเคฅ เคธเคฎเคเฅ‡ เคฏเคพ edit เค•เคฐเฅ‡, เคคเฅ‹ เค‡เคธ path เค•เคพ เค‰เคชเคฏเฅ‹เค— เค•เคฐเฅ‡เค‚เฅค + +> [!WARNING] +> เคฌเคกเคผเฅ‡ projects เค•เฅ‡ เคฒเคฟเค, `rpgkit init . --encode` เค”เคฐ `/rpgkit.encode` เค•เคพ runtime เคฒเค‚เคฌเคพ เคนเฅ‹ เคธเค•เคคเคพ เคนเฅˆเฅค เคเค• typical example: source code files 200 เคนเฅ‹เคจเฅ‡ เคชเคฐ runtime 100 minutes เคนเฅ‹เคคเคพ เคนเฅˆเฅค + +1. repository root เคฎเฅ‡เค‚ RPG-Kit initialize เค•เคฐเฅ‡เค‚ เค”เคฐ initial graph build เค•เคฐเฅ‡เค‚: + + ```bash + mkdir my-project + cp -r existing-repo/ my-project/ + cd my-project + rpgkit init . --encode + ``` + + เค…เค—เคฐ เค†เคช non-empty directory เค•เฅ‡ confirmation prompt เค•เฅ‹ skip เค•เคฐเคจเคพ เคšเคพเคนเคคเฅ‡ เคนเฅˆเค‚: + + ```bash + rpgkit init . --force --encode + ``` + +2. repository เคฎเฅ‡เค‚ เค…เคชเคจเคพ AI coding agent launch เค•เคฐเฅ‡เค‚เฅค + +3. generated RPG เค•เฅ‹ MCP tools เค”เคฐ slash commands เค•เฅ‡ เคฎเคพเคงเฅเคฏเคฎ เคธเฅ‡ เค‰เคชเคฏเฅ‹เค— เค•เคฐเฅ‡เค‚: + + ```text + /rpgkit.encode # เคœเคฐเฅ‚เคฐเคค เคชเคกเคผเคจเฅ‡ เคชเคฐ full RPG rebuild เค•เคฐเฅ‡เค‚ + /rpgkit.update_rpg # manual incremental update fallback + /rpgkit.rpg_edit # graph-aware code edit + ``` + +4. commits เค•เฅ‡ เคฌเคพเคฆ, RPG-Kit hooks `.rpgkit/data/rpg.json`, `.rpgkit/data/dep_graph.json`, เค”เคฐ `.rpgkit/data/rpg.html` เค•เฅ‹ code changes เค•เฅ‡ เคธเคพเคฅ aligned เคฐเค–เคคเฅ‡ เคนเฅˆเค‚เฅค เค…เค—เคฐ hook fail เคฏเคพ skip เคนเฅ‹ เคœเคพเค, เคคเฅ‹ `/rpgkit.update_rpg` run เค•เคฐเฅ‡เค‚เฅค + +## เค•เฅเคฏเคพ เคœเฅ‹เคกเคผเคพ เคœเคพเคคเคพ เคนเฅˆ + +`rpgkit init` run เค•เคฐเคจเฅ‡ เค•เฅ‡ เคฌเคพเคฆ เคญเฅ€ workspace root เค†เคชเค•เฅ‡ project repository เค•เคพ root เคฐเคนเคคเคพ เคนเฅˆเฅค RPG-Kit command definitions, runtime scripts, MCP configuration, เค”เคฐ generated graph data เค•เฅ‹ เค†เคชเค•เฅ‡ code เค•เฅ‡ เคธเคพเคฅ เคœเฅ‹เคกเคผเคคเคพ เคนเฅˆเฅค + +```text +my-project/ +โ”œโ”€โ”€ docs/ # /rpgkit.feature_spec เค•เฅ‡ เคฒเคฟเค optional requirement docs +โ”œโ”€โ”€ .github/ or .claude/ # AI assistant command definitions เค”เคฐ settings +โ”œโ”€โ”€ .vscode/ # applicable เคนเฅ‹เคจเฅ‡ เคชเคฐ Copilot/VS Code MCP configuration +โ””โ”€โ”€ .rpgkit/ # RPG-Kit runtime + โ”œโ”€โ”€ scripts/ # Pipeline scripts เค”เคฐ support packages + โ”œโ”€โ”€ data/ # Generated artifacts, เคœเคฟเคจเคฎเฅ‡เค‚ rpg.json เค”เคฐ dep_graph.json เคถเคพเคฎเคฟเคฒ เคนเฅˆเค‚ + โ”œโ”€โ”€ logs/ # Per-stage execution logs + โ””โ”€โ”€ reports/ # Generated review เค”เคฐ diagnostic reports +``` + +Full layout เค”เคฐ data file reference เค•เฅ‡ เคฒเคฟเค [docs/project-structure.md](docs/project-structure.md) เคฆเฅ‡เค–เฅ‡เค‚เฅค + +## Supported Platforms + +| เคชเฅเคฒเฅ‡เคŸเคซเคผเฅ‰เคฐเฅเคฎ | Claude Code | GitHub Copilot | Codex | +| ----------------------- | ----------- | -------------- | ----- | +| CLI เค‰เคชเคฏเฅ‹เค— | โœ… | โœ…(MCP เคจเคนเฅ€เค‚) | โŒ› | +| VS Code extension เค‰เคชเคฏเฅ‹เค— | โœ… | โœ… | โŒ› | + +| Script | Linux | Windows | Mac | +| ------ | ----- | ------- | --- | +| sh | โœ… | โŒ› | โŒ› | +| ps | N/A | โŒ› | โŒ› | + +## Documentation + +- [Slash command reference](docs/commands.md) โ€” เคนเคฐ `/rpgkit.*` command, inputs, outputs, เค”เคฐ examplesเฅค +- [CLI reference](docs/cli-reference.md) โ€” `rpgkit init`, `rpgkit update`, `rpgkit check`, `rpgkit version`, เค”เคฐ เคธเคญเฅ€ optionsเฅค +- [Configuration](docs/configuration.md) โ€” AI assistant setup, MCP registration, hooks, auto-approval, เค”เคฐ troubleshootingเฅค +- [Project structure](docs/project-structure.md) โ€” RPG-Kit เคฆเฅเคตเคพเคฐเคพ เคฌเคจเคพเค เค—เค files เค”เคฐ directoriesเฅค + +## เค†เค—เคพเคฎเฅ€ เคซเฅ€เคšเคฐเฅเคธ + +- **เคธเคฐเคฒ decoder commands:** เคฎเฅŒเคœเฅ‚เคฆเคพ decoder flow เค•เฅ‹ เค•เคฎ commands เคฎเฅ‡เค‚ merge เค•เคฐเคจเคพ, เคœเคฟเคธเคฎเฅ‡เค‚ end-to-end repository generation เค•เฅ‡ เคฒเคฟเค `/rpgkit.generate_repo`, เค”เคฐ feature generation เคคเคฅเคพ RPG planning เค•เฅ‡ เคฒเคฟเค `/rpgkit.generate_feature` plus `/rpgkit.plan` เคถเคพเคฎเคฟเคฒ เคนเฅˆเค‚เฅค +- **Multi-language support:** Go, C++, Rust, JavaScript/TypeScript, เค”เคฐ เค…เคจเฅเคฏ เค•เฅ‡ เคฒเคฟเค support เคœเฅ‹เคกเคผเคจเคพเฅค +- **เค…เคงเคฟเค• platform integrations:** เค…เคฒเค—-เค…เคฒเค— systems เคชเคฐ เค…เคฒเค—-เค…เคฒเค— AI coding agents เค•เฅ‡ เคฒเคฟเค CLI เค”เคฐ VS Code extension workflows เคฎเฅ‡เค‚ RPG-Kit support เค•เคฐเคจเคพเฅค + +## Troubleshooting + +**AI assistant CLI เคจเคนเฅ€เค‚ เคฎเคฟเคฒเคพ:** `rpgkit check` run เค•เคฐเฅ‡เค‚, selected assistant CLI install เค”เคฐ authenticate เค•เคฐเฅ‡เค‚, เคซเคฟเคฐ `rpgkit init` เคฏเคพ `rpgkit update` เคฆเฅ‹เคฌเคพเคฐเคพ run เค•เคฐเฅ‡เค‚เฅค + +**MCP tools `rpg_unavailable` report เค•เคฐเคคเฅ‡ เคนเฅˆเค‚:** `.rpgkit/data/rpg.json` create เค•เคฐเคจเฅ‡ เค•เฅ‡ เคฒเคฟเค `/rpgkit.encode` run เค•เคฐเฅ‡เค‚เฅค + +**Incremental update failed:** `.rpgkit/logs/update_rpg.log` inspect เค•เคฐเฅ‡เค‚, เคซเคฟเคฐ `/rpgkit.update_rpg` run เค•เคฐเฅ‡เค‚เฅค + +**Rate limits เคฏเคพ private repo access เค•เฅ‡ เค•เคพเคฐเคฃ template download fail เคนเฅ‹เคคเคพ เคนเฅˆ:** `--github-token $GITHUB_TOKEN` pass เค•เคฐเฅ‡เค‚ เคฏเคพ `GH_TOKEN` / `GITHUB_TOKEN` set เค•เคฐเฅ‡เค‚เฅค + +## License + +MIT License - เคตเคฟเคตเคฐเคฃ เค•เฅ‡ เคฒเคฟเค [LICENSE](LICENSE) เคฆเฅ‡เค–เฅ‡เค‚เฅค + +## Acknowledgements + +[GitHub Spec-Kit](https://github.com/github/spec-kit) เคชเคฐ เค†เคงเคพเคฐเคฟเคคเฅค diff --git a/RPG-Kit/README.ja-JP.md b/RPG-Kit/README.ja-JP.md new file mode 100644 index 0000000..6fa1cdb --- /dev/null +++ b/RPG-Kit/README.ja-JP.md @@ -0,0 +1,202 @@ +

RPG-Kit

+ +

+ English | + ็ฎ€ไฝ“ไธญๆ–‡ | + ๆ—ฅๆœฌ่ชž | + ํ•œ๊ตญ์–ด | + เคนเคฟเคจเฅเคฆเฅ€ +

+ +## AI ใ‚ณใƒผใƒ‡ใ‚ฃใƒณใ‚ฐใ‚จใƒผใ‚ธใ‚งใƒณใƒˆใซใƒชใƒใ‚ธใƒˆใƒชๅ…จไฝ“ใ‚’็†่งฃใ•ใ›ใ‚‹ + +AI ใ‚ณใƒผใƒ‡ใ‚ฃใƒณใ‚ฐใ‚จใƒผใ‚ธใ‚งใƒณใƒˆใฏๅผทๅŠ›ใงใ™ใŒใ€ๅคšใใฎๅ ดๅˆใ€ใƒ•ใ‚กใ‚คใƒซๅ˜ไฝใงไฝœๆฅญใ—ใพใ™ใ€‚ใƒ—ใƒญใ‚ธใ‚งใ‚ฏใƒˆใŒๆˆ้•ทใ™ใ‚‹ใจใ€่ฆไปถใ€ใ‚ขใƒผใ‚ญใƒ†ใ‚ฏใƒใƒฃใ€ไพๅญ˜้–ขไฟ‚ใ€้ŽๅŽปใฎ่จญ่จˆๅˆคๆ–ญใ‚’่ฆ‹ๅคฑใ†ใ“ใจใŒใ‚ใ‚Šใพใ™ใ€‚ + +RPG-Kit ใฏ **Repository Planning Graph (RPG)** ใ‚’็ถญๆŒใ™ใ‚‹ใ“ใจใงใ€ใ“ใฎๅ•้กŒใฎ่งฃๆฑบใ‚’ๆ”ฏๆดใ—ใพใ™ใ€‚RPG ใฏใ€่ฆไปถใ€ๆฉŸ่ƒฝใ€ใƒ•ใ‚กใ‚คใƒซใ€ใ‚ณใƒณใƒใƒผใƒใƒณใƒˆใ€ไพๅญ˜้–ขไฟ‚ใ‚’ๆŽฅ็ถšใ™ใ‚‹ๆง‹้€ ๅŒ–ใ•ใ‚ŒใŸใƒžใƒƒใƒ—ใงใ™ใ€‚ + +ๅญค็ซ‹ใ—ใŸใƒ—ใƒญใƒณใƒ—ใƒˆใงใฏใชใใ€ใƒชใƒใ‚ธใƒˆใƒชใƒฌใƒ™ใƒซใฎใ‚ณใƒณใƒ†ใ‚ญใ‚นใƒˆใง AI ใ‚จใƒผใ‚ธใ‚งใƒณใƒˆใซไฝœๆฅญใ•ใ›ใŸใ„ๅ ดๅˆใซ RPG-Kit ใ‚’ไฝฟ็”จใ—ใพใ™ใ€‚ + +### RPG-Kit ใ‚’ไฝฟใ†็†็”ฑ + +| AI ใ‚ณใƒผใƒ‡ใ‚ฃใƒณใ‚ฐใ‚จใƒผใ‚ธใ‚งใƒณใƒˆใซใ‚ˆใใ‚ใ‚‹ๅ•้กŒ | RPG-Kit ใซใ‚ˆใ‚‹่งฃๆฑบ | +|---|---| +| ๆ•ฐๅ›žใฎใƒ—ใƒญใƒณใƒ—ใƒˆใฎๅพŒใซใ‚จใƒผใ‚ธใ‚งใƒณใƒˆใŒ่ฆไปถใ‚’ๅฟ˜ใ‚Œใ‚‹ | ่ฆไปถใŒ RPG ใซใ‚จใƒณใ‚ณใƒผใƒ‰ใ•ใ‚Œใพใ™ | +| ้–ข้€ฃใƒ•ใ‚กใ‚คใƒซใ‚’็†่งฃใ›ใšใซ 1 ใคใฎใƒ•ใ‚กใ‚คใƒซใ ใ‘ใ‚’็ทจ้›†ใ™ใ‚‹ | ใƒ•ใ‚กใ‚คใƒซใ€ใ‚ณใƒณใƒใƒผใƒใƒณใƒˆใ€ไพๅญ˜้–ขไฟ‚ใŒใ‚ฐใƒฉใƒ•ใงๆŽฅ็ถšใ•ใ‚Œใพใ™ | +| ็”Ÿๆˆใ•ใ‚ŒใŸใ‚ณใƒผใƒ‰ใŒๅ…ƒใฎ่จˆ็”ปใ‹ใ‚‰ใšใ‚Œใฆใ„ใ | ่จˆ็”ปๆˆๆžœ็‰ฉใจใ‚ณใƒผใƒ‰ใŒๆ•ดๅˆใ—ใŸ็Šถๆ…‹ใซไฟใŸใ‚Œใพใ™ | +| ๆ—ขๅญ˜ใƒชใƒใ‚ธใƒˆใƒชใ‚’ใ‚จใƒผใ‚ธใ‚งใƒณใƒˆใŒ็†่งฃใ—ใซใใ„ | ใ‚ณใƒผใƒ‰ใƒ™ใƒผใ‚นใ‚’ RPG ใซใ‚จใƒณใ‚ณใƒผใƒ‰ใงใใพใ™ | +| ๅฏพ่ฑกใ‚’็ตžใฃใŸ็ทจ้›†ใŒ้š ใ‚ŒใŸไพๅญ˜้–ขไฟ‚ใ‚’ๅฃŠใ™ๅฏ่ƒฝๆ€งใŒใ‚ใ‚‹ | ใ‚ฐใƒฉใƒ•่ช่ญ˜ๅž‹ใฎใ‚ณใƒณใƒ†ใ‚ญใ‚นใƒˆใง็ทจ้›†ใ•ใ‚Œใพใ™ | + +### ใƒฏใƒผใ‚ฏใƒ•ใƒญใƒผใ‚’้ธๆŠžใ™ใ‚‹ + +| ็›ฎ็š„ | ใƒฏใƒผใ‚ฏใƒ•ใƒญใƒผ | ใ“ใ“ใ‹ใ‚‰้–‹ๅง‹ | +|---|---|---| +| ่ฆไปถใ‹ใ‚‰ๆ–ฐใ—ใ„ใƒ—ใƒญใ‚ธใ‚งใ‚ฏใƒˆใ‚’ไฝœๆˆใ™ใ‚‹ | ้ †ๆ–นๅ‘ใƒฏใƒผใ‚ฏใƒ•ใƒญใƒผ | [`ใ‚ฏใ‚คใƒƒใ‚ฏใ‚นใ‚ฟใƒผใƒˆ๏ผšๆ–ฐ่ฆใƒชใƒใ‚ธใƒˆใƒช`](#quick-start-new-repository) | +| ๆ—ขๅญ˜ใฎใ‚ณใƒผใƒ‰ใƒ™ใƒผใ‚นใ‚’็†่งฃใพใŸใฏๆ›ดๆ–ฐใ™ใ‚‹ | ้€†ๆ–นๅ‘ใƒฏใƒผใ‚ฏใƒ•ใƒญใƒผ | [`ใ‚ฏใ‚คใƒƒใ‚ฏใ‚นใ‚ฟใƒผใƒˆ๏ผšๆ—ขๅญ˜ใƒชใƒใ‚ธใƒˆใƒช`](#quick-start-existing-repository) | +| ๆญฃ็ขบใชใƒชใƒใ‚ธใƒˆใƒช่ช่ญ˜ๅž‹็ทจ้›†ใ‚’่กŒใ† | ๅค–็ง‘็š„็ทจ้›†ใƒฏใƒผใ‚ฏใƒ•ใƒญใƒผ | [`ใ‚ฏใ‚คใƒƒใ‚ฏใ‚นใ‚ฟใƒผใƒˆ๏ผšๆ—ขๅญ˜ใƒชใƒใ‚ธใƒˆใƒช`](#quick-start-existing-repository) | + +ไปฅไธ‹ใฏใ€ใ“ใฎใƒชใƒใ‚ธใƒˆใƒช็”จใซ็”Ÿๆˆใ•ใ‚ŒใŸใ‚ฐใƒฉใƒ•ๅฏ่ฆ–ๅŒ–ใฎไธ€้ƒจใงใ™ใ€‚`/rpgkit.encode` ใ‚’ๅฎŸ่กŒใ—ใ€`rpg.html` ใ‚’้–‹ใใจใ€ๅฎŒๅ…จใชใ‚คใƒณใ‚ฟใƒฉใ‚ฏใƒ†ใ‚ฃใƒ–ใ‚ฐใƒฉใƒ•ใ‚’ๆŽข็ดขใงใใพใ™ใ€‚ + +![RPG-Kit ใƒชใƒใ‚ธใƒˆใƒชใ‚ฐใƒฉใƒ•ๅฏ่ฆ–ๅŒ–](../docs/rpgkit_visualized_graph.png) + +## ใ‚คใƒณใ‚นใƒˆใƒผใƒซ + +### ๅ‰ๆๆกไปถ + +- Python 3.12+ +- [uv](https://docs.astral.sh/uv/) +- Git +- ใ‚คใƒณใ‚นใƒˆใƒผใƒซๆธˆใฟใง่ช่จผๆธˆใฟใฎ AI ใ‚ณใƒผใƒ‡ใ‚ฃใƒณใ‚ฐใ‚จใƒผใ‚ธใ‚งใƒณใƒˆ CLI๏ผš[GitHub Copilot](https://docs.github.com/en/copilot) ใพใŸใฏ [Claude Code](https://docs.anthropic.com/en/docs/claude-code/setup) + +### RPG-Kit ใ‚’ใ‚คใƒณใ‚นใƒˆใƒผใƒซใ™ใ‚‹ + +```bash +# ๆฐธ็ถšใ‚คใƒณใ‚นใƒˆใƒผใƒซ๏ผˆๆŽจๅฅจ๏ผ‰ +uv tool install rpgkit-cli --from "git+https://github.com/microsoft/RPG-ZeroRepo.git#subdirectory=RPG-Kit" +rpgkit check + +# ไธ€ๅ›ž้™ใ‚Šใฎไฝฟ็”จ +uvx --from "git+https://github.com/microsoft/RPG-ZeroRepo.git#subdirectory=RPG-Kit" rpgkit init +``` + + + +## ใ‚ฏใ‚คใƒƒใ‚ฏใ‚นใ‚ฟใƒผใƒˆ๏ผšๆ–ฐ่ฆใƒชใƒใ‚ธใƒˆใƒช + +RPG-Kit ใซ่ฆไปถใ‚’ๆ–ฐใ—ใ„ใ‚ณใƒผใƒ‰ใƒ™ใƒผใ‚นใธๅค‰ๆ›ใ•ใ›ใŸใ„ๅ ดๅˆใฏใ€ใ“ใฎๆ‰‹้ †ใ‚’ไฝฟ็”จใ—ใพใ™ใ€‚ + +> [!WARNING] +> ็”Ÿๆˆใ•ใ‚Œใ‚‹ใ‚ณใƒผใƒ‰้‡ใŒๅคšใ„ใƒ—ใƒญใ‚ธใ‚งใ‚ฏใƒˆใงใฏใ€`/rpgkit.design_interfaces` ใจ `/rpgkit.code_gen` ใฎๅฎŸ่กŒๆ™‚้–“ใŒ้•ทใใชใ‚‹ๅ ดๅˆใŒใ‚ใ‚Šใพใ™ใ€‚ๅ…ธๅž‹็š„ใชไพ‹ใจใ—ใฆใ€ๆฉŸ่ƒฝๆ•ฐใŒ 100 ใฎๅ ดๅˆใ€ๅฎŸ่กŒๆ™‚้–“ใฏ็ด„ 30 ๅˆ†ใงใ™ใ€‚ + +1. ๆ–ฐใ—ใ„ใƒ—ใƒญใ‚ธใ‚งใ‚ฏใƒˆใ‚’ๅˆๆœŸๅŒ–ใ—ใพใ™๏ผš + + ```bash + rpgkit init my-project + cd my-project + ``` + + ไธ€่ˆฌ็š„ใชใƒใƒชใ‚จใƒผใ‚ทใƒงใƒณ๏ผš + + ```bash + rpgkit init my-project --ai claude --script sh + rpgkit init my-project --ai copilot + rpgkit init my-project --github-token $GITHUB_TOKEN + ``` + +2. **[ไปปๆ„]** ่ฆไปถใƒ‰ใ‚ญใƒฅใƒกใƒณใƒˆใ‚’ `my-project/docs/` ใซ้…็ฝฎใ—ใพใ™ใ€‚ + +3. ใƒ—ใƒญใ‚ธใ‚งใ‚ฏใƒˆใƒ‡ใ‚ฃใƒฌใ‚ฏใƒˆใƒชใง AI ใ‚ณใƒผใƒ‡ใ‚ฃใƒณใ‚ฐใ‚จใƒผใ‚ธใ‚งใƒณใƒˆใ‚’่ตทๅ‹•ใ—ใพใ™ใ€‚ + +4. ้ †ๆ–นๅ‘ใƒ‘ใ‚คใƒ—ใƒฉใ‚คใƒณใ‚’ๅฎŸ่กŒใ—ใพใ™๏ผš + + ```text + /rpgkit.feature_spec + /rpgkit.feature_build + /rpgkit.feature_refactor + [Optional] /rpgkit.feature_edit + /rpgkit.build_skeleton + /rpgkit.build_data_flow + /rpgkit.design_base_classes + /rpgkit.design_interfaces + /rpgkit.plan_tasks + /rpgkit.code_gen + [Optional] /rpgkit.rpg_edit + ``` + +RPG-Kit ใฏ `.rpgkit/data/rpg.json` ใ‚’ๆฎต้šŽ็š„ใซไฝœๆˆใ—ใ€ใใ‚Œใ‚’ไฝฟ็”จใ—ใฆ่ฆไปถใ€่จˆ็”ปๆˆๆžœ็‰ฉใ€็”Ÿๆˆใ•ใ‚ŒใŸใ‚ณใƒผใƒ‰ใ€ไพๅญ˜้–ขไฟ‚ๆƒ…ๅ ฑใฎๆ•ดๅˆๆ€งใ‚’ไฟใกใพใ™ใ€‚ + + + +## ใ‚ฏใ‚คใƒƒใ‚ฏใ‚นใ‚ฟใƒผใƒˆ๏ผšๆ—ขๅญ˜ใƒชใƒใ‚ธใƒˆใƒช + +ใ™ใงใซใƒชใƒใ‚ธใƒˆใƒชใŒใ‚ใ‚Šใ€AI ใ‚จใƒผใ‚ธใ‚งใƒณใƒˆใซ RPG ใ‚ณใƒณใƒ†ใ‚ญใ‚นใƒˆใ‚’ไฝฟใฃใฆ็†่งฃใพใŸใฏ็ทจ้›†ใ•ใ›ใŸใ„ๅ ดๅˆใฏใ€ใ“ใฎๆ‰‹้ †ใ‚’ไฝฟ็”จใ—ใพใ™ใ€‚ + +> [!WARNING] +> ๆฏ”่ผƒ็š„ๅคงใใชใƒ—ใƒญใ‚ธใ‚งใ‚ฏใƒˆใงใฏใ€`rpgkit init . --encode` ใจ `/rpgkit.encode` ใฎๅฎŸ่กŒๆ™‚้–“ใŒ้•ทใใชใ‚‹ๅ ดๅˆใŒใ‚ใ‚Šใพใ™ใ€‚ๅ…ธๅž‹็š„ใชไพ‹ใจใ—ใฆใ€ใ‚ฝใƒผใ‚นใ‚ณใƒผใƒ‰ใƒ•ใ‚กใ‚คใƒซๆ•ฐใŒ 200 ใฎๅ ดๅˆใ€ๅฎŸ่กŒๆ™‚้–“ใฏ็ด„ 100 ๅˆ†ใงใ™ใ€‚ + +1. ใƒชใƒใ‚ธใƒˆใƒชใƒซใƒผใƒˆใง RPG-Kit ใ‚’ๅˆๆœŸๅŒ–ใ—ใ€ๅˆๆœŸใ‚ฐใƒฉใƒ•ใ‚’ๆง‹็ฏ‰ใ—ใพใ™๏ผš + + ```bash + mkdir my-project + cp -r existing-repo/ my-project/ + cd my-project + rpgkit init . --encode + ``` + + ็ฉบใงใชใ„ใƒ‡ใ‚ฃใƒฌใ‚ฏใƒˆใƒชใฎ็ขบ่ชใƒ—ใƒญใƒณใƒ—ใƒˆใ‚’ใ‚นใ‚ญใƒƒใƒ—ใ—ใŸใ„ๅ ดๅˆ๏ผš + + ```bash + rpgkit init . --force --encode + ``` + +2. ใƒชใƒใ‚ธใƒˆใƒชๅ†…ใง AI ใ‚ณใƒผใƒ‡ใ‚ฃใƒณใ‚ฐใ‚จใƒผใ‚ธใ‚งใƒณใƒˆใ‚’่ตทๅ‹•ใ—ใพใ™ใ€‚ + +3. MCP ใƒ„ใƒผใƒซใจใ‚นใƒฉใƒƒใ‚ทใƒฅใ‚ณใƒžใƒณใƒ‰ใ‚’้€šใ˜ใฆใ€็”Ÿๆˆใ•ใ‚ŒใŸ RPG ใ‚’ไฝฟ็”จใ—ใพใ™๏ผš + + ```text + /rpgkit.encode # ๅฟ…่ฆใซๅฟœใ˜ใฆๅฎŒๅ…จใช RPG ใ‚’ๅ†ๆง‹็ฏ‰ + /rpgkit.update_rpg # ๆ‰‹ๅ‹•ใ‚คใƒณใ‚ฏใƒชใƒกใƒณใ‚ฟใƒซๆ›ดๆ–ฐใฎใƒ•ใ‚ฉใƒผใƒซใƒใƒƒใ‚ฏ + /rpgkit.rpg_edit # ใ‚ฐใƒฉใƒ•่ช่ญ˜ๅž‹ใ‚ณใƒผใƒ‰็ทจ้›† + ``` + +4. ใ‚ณใƒŸใƒƒใƒˆๅพŒใ€RPG-Kit hooks ใฏ `.rpgkit/data/rpg.json`ใ€`.rpgkit/data/dep_graph.json`ใ€`.rpgkit/data/rpg.html` ใ‚’ใ‚ณใƒผใƒ‰ๅค‰ๆ›ดใจๆ•ดๅˆใ•ใ›ใพใ™ใ€‚hook ใŒๅคฑๆ•—ใพใŸใฏใ‚นใ‚ญใƒƒใƒ—ใ•ใ‚ŒใŸๅ ดๅˆใฏใ€`/rpgkit.update_rpg` ใ‚’ๅฎŸ่กŒใ—ใฆใใ ใ•ใ„ใ€‚ + +## ่ฟฝๅŠ ใ•ใ‚Œใ‚‹ใ‚‚ใฎ + +`rpgkit init` ใฎๅฎŸ่กŒๅพŒใ‚‚ใ€workspace root ใฏใƒ—ใƒญใ‚ธใ‚งใ‚ฏใƒˆใƒชใƒใ‚ธใƒˆใƒชใฎใƒซใƒผใƒˆใฎใพใพใงใ™ใ€‚RPG-Kit ใฏใ€ใ‚ณใƒžใƒณใƒ‰ๅฎš็พฉใ€ใƒฉใƒณใ‚ฟใ‚คใƒ ใ‚นใ‚ฏใƒชใƒ—ใƒˆใ€MCP ่จญๅฎšใ€็”Ÿๆˆใ•ใ‚ŒใŸใ‚ฐใƒฉใƒ•ใƒ‡ใƒผใ‚ฟใ‚’ใ‚ณใƒผใƒ‰ใจไธฆในใฆ่ฟฝๅŠ ใ—ใพใ™ใ€‚ + +```text +my-project/ +โ”œโ”€โ”€ docs/ # /rpgkit.feature_spec ็”จใฎไปปๆ„ใฎ่ฆไปถใƒ‰ใ‚ญใƒฅใƒกใƒณใƒˆ +โ”œโ”€โ”€ .github/ or .claude/ # AI assistant ใ‚ณใƒžใƒณใƒ‰ๅฎš็พฉใจ่จญๅฎš +โ”œโ”€โ”€ .vscode/ # ่ฉฒๅฝ“ใ™ใ‚‹ๅ ดๅˆใฎ Copilot/VS Code MCP ่จญๅฎš +โ””โ”€โ”€ .rpgkit/ # RPG-Kit ใƒฉใƒณใ‚ฟใ‚คใƒ  + โ”œโ”€โ”€ scripts/ # ใƒ‘ใ‚คใƒ—ใƒฉใ‚คใƒณใ‚นใ‚ฏใƒชใƒ—ใƒˆใจใ‚ตใƒใƒผใƒˆใƒ‘ใƒƒใ‚ฑใƒผใ‚ธ + โ”œโ”€โ”€ data/ # rpg.json ใจ dep_graph.json ใ‚’ๅซใ‚€็”Ÿๆˆใ‚ขใƒผใƒ†ใ‚ฃใƒ•ใ‚กใ‚ฏใƒˆ + โ”œโ”€โ”€ logs/ # ใ‚นใƒ†ใƒผใ‚ธใ”ใจใฎๅฎŸ่กŒใƒญใ‚ฐ + โ””โ”€โ”€ reports/ # ็”Ÿๆˆใ•ใ‚ŒใŸใƒฌใƒ“ใƒฅใƒผใŠใ‚ˆใณ่จบๆ–ญใƒฌใƒใƒผใƒˆ +``` + +ๅฎŒๅ…จใชใƒฌใ‚คใ‚ขใ‚ฆใƒˆใจใƒ‡ใƒผใ‚ฟใƒ•ใ‚กใ‚คใƒซใƒชใƒ•ใ‚กใƒฌใƒณใ‚นใซใคใ„ใฆใฏใ€[docs/project-structure.md](docs/project-structure.md) ใ‚’ๅ‚็…งใ—ใฆใใ ใ•ใ„ใ€‚ + +## ใ‚ตใƒใƒผใƒˆใ•ใ‚Œใ‚‹ใƒ—ใƒฉใƒƒใƒˆใƒ•ใ‚ฉใƒผใƒ  + +| ใƒ—ใƒฉใƒƒใƒˆใƒ•ใ‚ฉใƒผใƒ  | Claude Code | GitHub Copilot | Codex | +| ----------------------- | ----------- | -------------- | ----- | +| CLI ไฝฟ็”จ | โœ… | โœ…(MCP ใชใ—) | โŒ› | +| VS Code ๆ‹กๅผตใฎไฝฟ็”จ | โœ… | โœ… | โŒ› | + +| ใ‚นใ‚ฏใƒชใƒ—ใƒˆ | Linux | Windows | Mac | +| ---------- | ----- | ------- | --- | +| sh | โœ… | โŒ› | โŒ› | +| ps | N/A | โŒ› | โŒ› | + +## ใƒ‰ใ‚ญใƒฅใƒกใƒณใƒˆ + +- [ใ‚นใƒฉใƒƒใ‚ทใƒฅใ‚ณใƒžใƒณใƒ‰ใƒชใƒ•ใ‚กใƒฌใƒณใ‚น](docs/commands.md) โ€” ใ™ในใฆใฎ `/rpgkit.*` ใ‚ณใƒžใƒณใƒ‰ใ€ๅ…ฅๅŠ›ใ€ๅ‡บๅŠ›ใ€ไพ‹ใ€‚ +- [CLI ใƒชใƒ•ใ‚กใƒฌใƒณใ‚น](docs/cli-reference.md) โ€” `rpgkit init`ใ€`rpgkit update`ใ€`rpgkit check`ใ€`rpgkit version`ใ€ใŠใ‚ˆใณใ™ในใฆใฎใ‚ชใƒ—ใ‚ทใƒงใƒณใ€‚ +- [่จญๅฎš](docs/configuration.md) โ€” AI assistant ใฎใ‚ปใƒƒใƒˆใ‚ขใƒƒใƒ—ใ€MCP ็™ป้Œฒใ€hooksใ€่‡ชๅ‹•ๆ‰ฟ่ชใ€ใƒˆใƒฉใƒ–ใƒซใ‚ทใƒฅใƒผใƒ†ใ‚ฃใƒณใ‚ฐใ€‚ +- [ใƒ—ใƒญใ‚ธใ‚งใ‚ฏใƒˆๆง‹้€ ](docs/project-structure.md) โ€” RPG-Kit ใŒไฝœๆˆใ™ใ‚‹ใƒ•ใ‚กใ‚คใƒซใจใƒ‡ใ‚ฃใƒฌใ‚ฏใƒˆใƒชใ€‚ + +## ไปŠๅพŒใฎๆฉŸ่ƒฝ + +- **ใ‚ˆใ‚Šใ‚ทใƒณใƒ—ใƒซใชใƒ‡ใ‚ณใƒผใƒ€ใƒผใ‚ณใƒžใƒณใƒ‰๏ผš** ็พๅœจใฎใƒ‡ใ‚ณใƒผใƒ€ใƒผใƒ•ใƒญใƒผใ‚’ใ‚ˆใ‚Šๅฐ‘ใชใ„ใ‚ณใƒžใƒณใƒ‰ใซ็ตฑๅˆใ—ใพใ™ใ€‚ใ“ใ‚Œใซใฏใ€ใ‚จใƒณใƒ‰ใƒ„ใƒผใ‚จใƒณใƒ‰ใฎใƒชใƒใ‚ธใƒˆใƒช็”Ÿๆˆ็”จใฎ `/rpgkit.generate_repo`ใ€ใŠใ‚ˆใณๆฉŸ่ƒฝ็”Ÿๆˆใจ RPG ่จˆ็”ป็”จใฎ `/rpgkit.generate_feature` ใจ `/rpgkit.plan` ใŒๅซใพใ‚Œใพใ™ใ€‚ +- **ๅคš่จ€่ชžใ‚ตใƒใƒผใƒˆ๏ผš** Goใ€C++ใ€Rustใ€JavaScript/TypeScript ใชใฉใฎใ‚ตใƒใƒผใƒˆใ‚’่ฟฝๅŠ ใ—ใพใ™ใ€‚ +- **ใ‚ˆใ‚Šๅคšใใฎใƒ—ใƒฉใƒƒใƒˆใƒ•ใ‚ฉใƒผใƒ ็ตฑๅˆ๏ผš** ใ•ใพใ–ใพใชใ‚ทใ‚นใƒ†ใƒ ไธŠใงใ€็•ฐใชใ‚‹ AI ใ‚ณใƒผใƒ‡ใ‚ฃใƒณใ‚ฐใ‚จใƒผใ‚ธใ‚งใƒณใƒˆๅ‘ใ‘ใซ CLI ใจ VS Code ๆ‹กๅผตใƒฏใƒผใ‚ฏใƒ•ใƒญใƒผใง RPG-Kit ใ‚’ใ‚ตใƒใƒผใƒˆใ—ใพใ™ใ€‚ + +## ใƒˆใƒฉใƒ–ใƒซใ‚ทใƒฅใƒผใƒ†ใ‚ฃใƒณใ‚ฐ + +**AI assistant CLI ใŒ่ฆ‹ใคใ‹ใ‚‰ใชใ„๏ผš** `rpgkit check` ใ‚’ๅฎŸ่กŒใ—ใ€้ธๆŠžใ—ใŸ assistant CLI ใ‚’ใ‚คใƒณใ‚นใƒˆใƒผใƒซใ—ใฆ่ช่จผใ—ใŸใ†ใˆใงใ€`rpgkit init` ใพใŸใฏ `rpgkit update` ใ‚’ๅ†ๅฎŸ่กŒใ—ใพใ™ใ€‚ + +**MCP ใƒ„ใƒผใƒซใŒ `rpg_unavailable` ใ‚’ๅ ฑๅ‘Šใ™ใ‚‹๏ผš** `/rpgkit.encode` ใ‚’ๅฎŸ่กŒใ—ใฆ `.rpgkit/data/rpg.json` ใ‚’ไฝœๆˆใ—ใพใ™ใ€‚ + +**ใ‚คใƒณใ‚ฏใƒชใƒกใƒณใ‚ฟใƒซๆ›ดๆ–ฐใซๅคฑๆ•—ใ—ใŸ๏ผš** `.rpgkit/logs/update_rpg.log` ใ‚’็ขบ่ชใ—ใ€ใใฎๅพŒ `/rpgkit.update_rpg` ใ‚’ๅฎŸ่กŒใ—ใพใ™ใ€‚ + +**ใƒฌใƒผใƒˆๅˆถ้™ใพใŸใฏใƒ—ใƒฉใ‚คใƒ™ใƒผใƒˆใƒชใƒใ‚ธใƒˆใƒชใ‚ขใ‚ฏใ‚ปใ‚นใซใ‚ˆใ‚Šใƒ†ใƒณใƒ—ใƒฌใƒผใƒˆใฎใƒ€ใ‚ฆใƒณใƒญใƒผใƒ‰ใซๅคฑๆ•—ใ™ใ‚‹๏ผš** `--github-token $GITHUB_TOKEN` ใ‚’ๆธกใ™ใ‹ใ€`GH_TOKEN` / `GITHUB_TOKEN` ใ‚’่จญๅฎšใ—ใพใ™ใ€‚ + +## ใƒฉใ‚คใ‚ปใƒณใ‚น + +MIT License - ่ฉณ็ดฐใฏ [LICENSE](LICENSE) ใ‚’ๅ‚็…งใ—ใฆใใ ใ•ใ„ใ€‚ + +## ่ฌ่พž + +[GitHub Spec-Kit](https://github.com/github/spec-kit) ใซๅŸบใฅใ„ใฆใ„ใพใ™ใ€‚ diff --git a/RPG-Kit/README.ko-KR.md b/RPG-Kit/README.ko-KR.md new file mode 100644 index 0000000..da4940a --- /dev/null +++ b/RPG-Kit/README.ko-KR.md @@ -0,0 +1,202 @@ +

RPG-Kit

+ +

+ English | + ็ฎ€ไฝ“ไธญๆ–‡ | + ๆ—ฅๆœฌ่ชž | + ํ•œ๊ตญ์–ด | + เคนเคฟเคจเฅเคฆเฅ€ +

+ +## AI ์ฝ”๋”ฉ ์—์ด์ „ํŠธ๊ฐ€ ์ „์ฒด ๋ฆฌํฌ์ง€ํ† ๋ฆฌ๋ฅผ ์ดํ•ดํ•˜๋„๋ก ํ•˜๊ธฐ + +AI ์ฝ”๋”ฉ ์—์ด์ „ํŠธ๋Š” ๊ฐ•๋ ฅํ•˜์ง€๋งŒ, ๋Œ€๊ฐœ ํŒŒ์ผ ๋‹จ์œ„๋กœ ์ž‘์—…ํ•ฉ๋‹ˆ๋‹ค. ํ”„๋กœ์ ํŠธ๊ฐ€ ์ปค์งˆ์ˆ˜๋ก ์š”๊ตฌ์‚ฌํ•ญ, ์•„ํ‚คํ…์ฒ˜, ์˜์กด์„ฑ, ์ด์ „ ์„ค๊ณ„ ๊ฒฐ์ •์„ ๋†“์น  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. + +RPG-Kit์€ **Repository Planning Graph (RPG)** ๋ฅผ ์œ ์ง€ํ•˜์—ฌ ์ด ๋ฌธ์ œ๋ฅผ ํ•ด๊ฒฐํ•˜๋„๋ก ๋•์Šต๋‹ˆ๋‹ค. RPG๋Š” ์š”๊ตฌ์‚ฌํ•ญ, ๊ธฐ๋Šฅ, ํŒŒ์ผ, ์ปดํฌ๋„ŒํŠธ, ์˜์กด์„ฑ์„ ์—ฐ๊ฒฐํ•˜๋Š” ๊ตฌ์กฐํ™”๋œ ์ง€๋„์ž…๋‹ˆ๋‹ค. + +๊ณ ๋ฆฝ๋œ ํ”„๋กฌํ”„ํŠธ ๋Œ€์‹  ๋ฆฌํฌ์ง€ํ† ๋ฆฌ ์ˆ˜์ค€์˜ ์ปจํ…์ŠคํŠธ๋กœ AI ์—์ด์ „ํŠธ๊ฐ€ ์ž‘์—…ํ•˜๊ธฐ๋ฅผ ์›ํ•  ๋•Œ RPG-Kit์„ ์‚ฌ์šฉํ•˜์„ธ์š”. + +### ์™œ RPG-Kit์ธ๊ฐ€์š”? + +| AI ์ฝ”๋”ฉ ์—์ด์ „ํŠธ์˜ ์ผ๋ฐ˜์ ์ธ ๋ฌธ์ œ | RPG-Kit์˜ ๋„์›€ ๋ฐฉ์‹ | +|---|---| +| ์—์ด์ „ํŠธ๊ฐ€ ๋ช‡ ๋ฒˆ์˜ ํ”„๋กฌํ”„ํŠธ ํ›„ ์š”๊ตฌ์‚ฌํ•ญ์„ ์žŠ์–ด๋ฒ„๋ฆผ | ์š”๊ตฌ์‚ฌํ•ญ์ด RPG์— ์ธ์ฝ”๋”ฉ๋ฉ๋‹ˆ๋‹ค | +| ๊ด€๋ จ ํŒŒ์ผ์„ ์ดํ•ดํ•˜์ง€ ๋ชปํ•œ ์ฑ„ ํ•œ ํŒŒ์ผ๋งŒ ํŽธ์ง‘ํ•จ | ํŒŒ์ผ, ์ปดํฌ๋„ŒํŠธ, ์˜์กด์„ฑ์ด ๊ทธ๋ž˜ํ”„์—์„œ ์—ฐ๊ฒฐ๋ฉ๋‹ˆ๋‹ค | +| ์ƒ์„ฑ๋œ ์ฝ”๋“œ๊ฐ€ ์›๋ž˜ ๊ณ„ํš์—์„œ ๋ฒ—์–ด๋‚จ | ๊ณ„ํš ์‚ฐ์ถœ๋ฌผ๊ณผ ์ฝ”๋“œ๊ฐ€ ์ •๋ ฌ๋œ ์ƒํƒœ๋กœ ์œ ์ง€๋ฉ๋‹ˆ๋‹ค | +| ๊ธฐ์กด ๋ฆฌํฌ์ง€ํ† ๋ฆฌ๋ฅผ ์—์ด์ „ํŠธ๊ฐ€ ์ดํ•ดํ•˜๊ธฐ ์–ด๋ ค์›€ | ์ฝ”๋“œ๋ฒ ์ด์Šค๋ฅผ RPG๋กœ ์ธ์ฝ”๋”ฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค | +| ๋Œ€์ƒ์ด ๋ช…ํ™•ํ•œ ํŽธ์ง‘์ด ์ˆจ๊ฒจ์ง„ ์˜์กด์„ฑ์„ ๊นจ๋œจ๋ฆด ์ˆ˜ ์žˆ์Œ | ๊ทธ๋ž˜ํ”„ ์ธ์‹ ์ปจํ…์ŠคํŠธ๋กœ ํŽธ์ง‘๋ฉ๋‹ˆ๋‹ค | + +### ์›Œํฌํ”Œ๋กœ ์„ ํƒ + +| ๋ชฉํ‘œ | ์›Œํฌํ”Œ๋กœ | ์‹œ์ž‘ ์œ„์น˜ | +|---|---|---| +| ์š”๊ตฌ์‚ฌํ•ญ์—์„œ ์ƒˆ ํ”„๋กœ์ ํŠธ ์ƒ์„ฑ | ์ •๋ฐฉํ–ฅ ์›Œํฌํ”Œ๋กœ | [`๋น ๋ฅธ ์‹œ์ž‘: ์ƒˆ ๋ฆฌํฌ์ง€ํ† ๋ฆฌ`](#quick-start-new-repository) | +| ๊ธฐ์กด ์ฝ”๋“œ๋ฒ ์ด์Šค ์ดํ•ด ๋˜๋Š” ์—…๋ฐ์ดํŠธ | ์—ญ๋ฐฉํ–ฅ ์›Œํฌํ”Œ๋กœ | [`๋น ๋ฅธ ์‹œ์ž‘: ๊ธฐ์กด ๋ฆฌํฌ์ง€ํ† ๋ฆฌ`](#quick-start-existing-repository) | +| ์ •๋ฐ€ํ•œ ๋ฆฌํฌ์ง€ํ† ๋ฆฌ ์ธ์‹ ํŽธ์ง‘ ์ˆ˜ํ–‰ | ์™ธ๊ณผ์  ํŽธ์ง‘ ์›Œํฌํ”Œ๋กœ | [`๋น ๋ฅธ ์‹œ์ž‘: ๊ธฐ์กด ๋ฆฌํฌ์ง€ํ† ๋ฆฌ`](#quick-start-existing-repository) | + +์•„๋ž˜๋Š” ์ด ๋ฆฌํฌ์ง€ํ† ๋ฆฌ๋ฅผ ์œ„ํ•ด ์ƒ์„ฑ๋œ ๊ทธ๋ž˜ํ”„ ์‹œ๊ฐํ™”์˜ ์ผ๋ถ€์ž…๋‹ˆ๋‹ค. `/rpgkit.encode`๋ฅผ ์‹คํ–‰ํ•˜๊ณ  `rpg.html`์„ ์—ด์–ด ์ „์ฒด ์ธํ„ฐ๋ž™ํ‹ฐ๋ธŒ ๊ทธ๋ž˜ํ”„๋ฅผ ์‚ดํŽด๋ณด์„ธ์š”. + +![RPG-Kit ๋ฆฌํฌ์ง€ํ† ๋ฆฌ ๊ทธ๋ž˜ํ”„ ์‹œ๊ฐํ™”](../docs/rpgkit_visualized_graph.png) + +## ์„ค์น˜ + +### ํ•„์ˆ˜ ์กฐ๊ฑด + +- Python 3.12+ +- [uv](https://docs.astral.sh/uv/) +- Git +- ์„ค์น˜ ๋ฐ ์ธ์ฆ์ด ์™„๋ฃŒ๋œ AI ์ฝ”๋”ฉ ์—์ด์ „ํŠธ CLI: [GitHub Copilot](https://docs.github.com/en/copilot) ๋˜๋Š” [Claude Code](https://docs.anthropic.com/en/docs/claude-code/setup) + +### RPG-Kit ์„ค์น˜ + +```bash +# ์˜๊ตฌ ์„ค์น˜(๊ถŒ์žฅ) +uv tool install rpgkit-cli --from "git+https://github.com/microsoft/RPG-ZeroRepo.git#subdirectory=RPG-Kit" +rpgkit check + +# ์ผํšŒ์„ฑ ์‚ฌ์šฉ +uvx --from "git+https://github.com/microsoft/RPG-ZeroRepo.git#subdirectory=RPG-Kit" rpgkit init +``` + + + +## ๋น ๋ฅธ ์‹œ์ž‘: ์ƒˆ ๋ฆฌํฌ์ง€ํ† ๋ฆฌ + +RPG-Kit์ด ์š”๊ตฌ์‚ฌํ•ญ์„ ์ƒˆ ์ฝ”๋“œ๋ฒ ์ด์Šค๋กœ ๋ณ€ํ™˜ํ•˜๋„๋ก ํ•˜๋ ค๋ฉด ์ด ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉํ•˜์„ธ์š”. + +> [!WARNING] +> ์ƒ์„ฑ๋˜๋Š” ์ฝ”๋“œ ์–‘์ด ๋งŽ์€ ํ”„๋กœ์ ํŠธ์˜ ๊ฒฝ์šฐ, `/rpgkit.design_interfaces`์™€ `/rpgkit.code_gen`์˜ ์‹คํ–‰ ์‹œ๊ฐ„์ด ๊ธธ์–ด์งˆ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. ๋Œ€ํ‘œ์ ์ธ ์˜ˆ๋กœ, ๊ธฐ๋Šฅ ์ˆ˜๊ฐ€ 100๊ฐœ์ธ ๊ฒฝ์šฐ ์‹คํ–‰ ์‹œ๊ฐ„์€ ์•ฝ 30๋ถ„์ž…๋‹ˆ๋‹ค. + +1. ์ƒˆ ํ”„๋กœ์ ํŠธ๋ฅผ ์ดˆ๊ธฐํ™”ํ•ฉ๋‹ˆ๋‹ค: + + ```bash + rpgkit init my-project + cd my-project + ``` + + ์ผ๋ฐ˜์ ์ธ ๋ณ€ํ˜•: + + ```bash + rpgkit init my-project --ai claude --script sh + rpgkit init my-project --ai copilot + rpgkit init my-project --github-token $GITHUB_TOKEN + ``` + +2. **[์„ ํƒ ์‚ฌํ•ญ]** ์š”๊ตฌ์‚ฌํ•ญ ๋ฌธ์„œ๋ฅผ `my-project/docs/`์— ๋„ฃ์Šต๋‹ˆ๋‹ค. + +3. ํ”„๋กœ์ ํŠธ ๋””๋ ‰ํ„ฐ๋ฆฌ์—์„œ AI ์ฝ”๋”ฉ ์—์ด์ „ํŠธ๋ฅผ ์‹คํ–‰ํ•ฉ๋‹ˆ๋‹ค. + +4. ์ •๋ฐฉํ–ฅ ํŒŒ์ดํ”„๋ผ์ธ์„ ์‹คํ–‰ํ•ฉ๋‹ˆ๋‹ค: + + ```text + /rpgkit.feature_spec + /rpgkit.feature_build + /rpgkit.feature_refactor + [Optional] /rpgkit.feature_edit + /rpgkit.build_skeleton + /rpgkit.build_data_flow + /rpgkit.design_base_classes + /rpgkit.design_interfaces + /rpgkit.plan_tasks + /rpgkit.code_gen + [Optional] /rpgkit.rpg_edit + ``` + +RPG-Kit์€ `.rpgkit/data/rpg.json`์„ ์ ์ง„์ ์œผ๋กœ ์ƒ์„ฑํ•˜๊ณ , ์ด๋ฅผ ์‚ฌ์šฉํ•ด ์š”๊ตฌ์‚ฌํ•ญ, ๊ณ„ํš ์‚ฐ์ถœ๋ฌผ, ์ƒ์„ฑ๋œ ์ฝ”๋“œ, ์˜์กด์„ฑ ์ •๋ณด๋ฅผ ์ •๋ ฌ๋œ ์ƒํƒœ๋กœ ์œ ์ง€ํ•ฉ๋‹ˆ๋‹ค. + + + +## ๋น ๋ฅธ ์‹œ์ž‘: ๊ธฐ์กด ๋ฆฌํฌ์ง€ํ† ๋ฆฌ + +์ด๋ฏธ ๋ฆฌํฌ์ง€ํ† ๋ฆฌ๊ฐ€ ์žˆ๊ณ  AI ์—์ด์ „ํŠธ๊ฐ€ RPG ์ปจํ…์ŠคํŠธ๋กœ ์ด๋ฅผ ์ดํ•ดํ•˜๊ฑฐ๋‚˜ ํŽธ์ง‘ํ•˜๊ฒŒ ํ•˜๋ ค๋ฉด ์ด ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉํ•˜์„ธ์š”. + +> [!WARNING] +> ๊ทœ๋ชจ๊ฐ€ ํฐ ํ”„๋กœ์ ํŠธ์˜ ๊ฒฝ์šฐ, `rpgkit init . --encode`์™€ `/rpgkit.encode`์˜ ์‹คํ–‰ ์‹œ๊ฐ„์ด ๊ธธ์–ด์งˆ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. ๋Œ€ํ‘œ์ ์ธ ์˜ˆ๋กœ, ์†Œ์Šค ์ฝ”๋“œ ํŒŒ์ผ ์ˆ˜๊ฐ€ 200๊ฐœ์ธ ๊ฒฝ์šฐ ์‹คํ–‰ ์‹œ๊ฐ„์€ ์•ฝ 100๋ถ„์ž…๋‹ˆ๋‹ค. + +1. ๋ฆฌํฌ์ง€ํ† ๋ฆฌ ๋ฃจํŠธ์—์„œ RPG-Kit์„ ์ดˆ๊ธฐํ™”ํ•˜๊ณ  ์ดˆ๊ธฐ ๊ทธ๋ž˜ํ”„๋ฅผ ๊ตฌ์ถ•ํ•ฉ๋‹ˆ๋‹ค: + + ```bash + mkdir my-project + cp -r existing-repo/ my-project/ + cd my-project + rpgkit init . --encode + ``` + + ๋น„์–ด ์žˆ์ง€ ์•Š์€ ๋””๋ ‰ํ„ฐ๋ฆฌ์— ๋Œ€ํ•œ ํ™•์ธ ํ”„๋กฌํ”„ํŠธ๋ฅผ ๊ฑด๋„ˆ๋›ฐ๋ ค๋ฉด: + + ```bash + rpgkit init . --force --encode + ``` + +2. ๋ฆฌํฌ์ง€ํ† ๋ฆฌ์—์„œ AI ์ฝ”๋”ฉ ์—์ด์ „ํŠธ๋ฅผ ์‹คํ–‰ํ•ฉ๋‹ˆ๋‹ค. + +3. MCP ๋„๊ตฌ์™€ slash command๋ฅผ ํ†ตํ•ด ์ƒ์„ฑ๋œ RPG๋ฅผ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค: + + ```text + /rpgkit.encode # ํ•„์š”ํ•  ๋•Œ ์ „์ฒด RPG ์žฌ๊ตฌ์ถ• + /rpgkit.update_rpg # ์ˆ˜๋™ ์ฆ๋ถ„ ์—…๋ฐ์ดํŠธ ํด๋ฐฑ + /rpgkit.rpg_edit # ๊ทธ๋ž˜ํ”„ ์ธ์‹ ์ฝ”๋“œ ํŽธ์ง‘ + ``` + +4. ์ปค๋ฐ‹ ํ›„ RPG-Kit hooks๋Š” `.rpgkit/data/rpg.json`, `.rpgkit/data/dep_graph.json`, `.rpgkit/data/rpg.html`์„ ์ฝ”๋“œ ๋ณ€๊ฒฝ๊ณผ ์ •๋ ฌ๋œ ์ƒํƒœ๋กœ ์œ ์ง€ํ•ฉ๋‹ˆ๋‹ค. hook์ด ์‹คํŒจํ•˜๊ฑฐ๋‚˜ ๊ฑด๋„ˆ๋›ฐ์–ด์ง„ ๊ฒฝ์šฐ `/rpgkit.update_rpg`๋ฅผ ์‹คํ–‰ํ•˜์„ธ์š”. + +## ์ถ”๊ฐ€๋˜๋Š” ํ•ญ๋ชฉ + +`rpgkit init`์„ ์‹คํ–‰ํ•œ ํ›„์—๋„ workspace root๋Š” ํ”„๋กœ์ ํŠธ ๋ฆฌํฌ์ง€ํ† ๋ฆฌ ๋ฃจํŠธ์ž…๋‹ˆ๋‹ค. RPG-Kit์€ ๋ช…๋ น ์ •์˜, ๋Ÿฐํƒ€์ž„ ์Šคํฌ๋ฆฝํŠธ, MCP ๊ตฌ์„ฑ, ์ƒ์„ฑ๋œ ๊ทธ๋ž˜ํ”„ ๋ฐ์ดํ„ฐ๋ฅผ ์ฝ”๋“œ์™€ ํ•จ๊ป˜ ์ถ”๊ฐ€ํ•ฉ๋‹ˆ๋‹ค. + +```text +my-project/ +โ”œโ”€โ”€ docs/ # /rpgkit.feature_spec์šฉ ์„ ํƒ์  ์š”๊ตฌ์‚ฌํ•ญ ๋ฌธ์„œ +โ”œโ”€โ”€ .github/ or .claude/ # AI assistant ๋ช…๋ น ์ •์˜ ๋ฐ ์„ค์ • +โ”œโ”€โ”€ .vscode/ # ํ•ด๋‹น๋˜๋Š” ๊ฒฝ์šฐ Copilot/VS Code MCP ๊ตฌ์„ฑ +โ””โ”€โ”€ .rpgkit/ # RPG-Kit ๋Ÿฐํƒ€์ž„ + โ”œโ”€โ”€ scripts/ # ํŒŒ์ดํ”„๋ผ์ธ ์Šคํฌ๋ฆฝํŠธ ๋ฐ ์ง€์› ํŒจํ‚ค์ง€ + โ”œโ”€โ”€ data/ # rpg.json ๋ฐ dep_graph.json์„ ํฌํ•จํ•œ ์ƒ์„ฑ ์•„ํ‹ฐํŒฉํŠธ + โ”œโ”€โ”€ logs/ # ๋‹จ๊ณ„๋ณ„ ์‹คํ–‰ ๋กœ๊ทธ + โ””โ”€โ”€ reports/ # ์ƒ์„ฑ๋œ ๋ฆฌ๋ทฐ ๋ฐ ์ง„๋‹จ ๋ณด๊ณ ์„œ +``` + +์ „์ฒด ๋ ˆ์ด์•„์›ƒ ๋ฐ ๋ฐ์ดํ„ฐ ํŒŒ์ผ ์ฐธ์กฐ๋Š” [docs/project-structure.md](docs/project-structure.md)๋ฅผ ์ฐธ์กฐํ•˜์„ธ์š”. + +## ์ง€์› ํ”Œ๋žซํผ + +| ํ”Œ๋žซํผ | Claude Code | GitHub Copilot | Codex | +| ---------------------- | ----------- | -------------- | ----- | +| CLI ์‚ฌ์šฉ | โœ… | โœ…(MCP ์—†์Œ) | โŒ› | +| VS Code ํ™•์žฅ ์‚ฌ์šฉ | โœ… | โœ… | โŒ› | + +| ์Šคํฌ๋ฆฝํŠธ | Linux | Windows | Mac | +| -------- | ----- | ------- | --- | +| sh | โœ… | โŒ› | โŒ› | +| ps | N/A | โŒ› | โŒ› | + +## ๋ฌธ์„œ + +- [Slash command ์ฐธ์กฐ](docs/commands.md) โ€” ๋ชจ๋“  `/rpgkit.*` ๋ช…๋ น, ์ž…๋ ฅ, ์ถœ๋ ฅ, ์˜ˆ์‹œ. +- [CLI ์ฐธ์กฐ](docs/cli-reference.md) โ€” `rpgkit init`, `rpgkit update`, `rpgkit check`, `rpgkit version` ๋ฐ ๋ชจ๋“  ์˜ต์…˜. +- [๊ตฌ์„ฑ](docs/configuration.md) โ€” AI assistant ์„ค์ •, MCP ๋“ฑ๋ก, hooks, ์ž๋™ ์Šน์ธ, ๋ฌธ์ œ ํ•ด๊ฒฐ. +- [ํ”„๋กœ์ ํŠธ ๊ตฌ์กฐ](docs/project-structure.md) โ€” RPG-Kit์ด ์ƒ์„ฑํ•˜๋Š” ํŒŒ์ผ๊ณผ ๋””๋ ‰ํ„ฐ๋ฆฌ. + +## ์˜ˆ์ • ๊ธฐ๋Šฅ + +- **๋” ๋‹จ์ˆœํ•œ ๋””์ฝ”๋” ๋ช…๋ น:** ํ˜„์žฌ ๋””์ฝ”๋” ํ๋ฆ„์„ ๋” ์ ์€ ๋ช…๋ น์œผ๋กœ ๋ณ‘ํ•ฉํ•ฉ๋‹ˆ๋‹ค. ์—ฌ๊ธฐ์—๋Š” ์—”๋“œํˆฌ์—”๋“œ ๋ฆฌํฌ์ง€ํ† ๋ฆฌ ์ƒ์„ฑ์„ ์œ„ํ•œ `/rpgkit.generate_repo`, ๊ธฐ๋Šฅ ์ƒ์„ฑ๊ณผ RPG ๊ณ„ํš์„ ์œ„ํ•œ `/rpgkit.generate_feature` ๋ฐ `/rpgkit.plan`์ด ํฌํ•จ๋ฉ๋‹ˆ๋‹ค. +- **๋‹ค์ค‘ ์–ธ์–ด ์ง€์›:** Go, C++, Rust, JavaScript/TypeScript ๋“ฑ์— ๋Œ€ํ•œ ์ง€์›์„ ์ถ”๊ฐ€ํ•ฉ๋‹ˆ๋‹ค. +- **๋” ๋งŽ์€ ํ”Œ๋žซํผ ํ†ตํ•ฉ:** ๋‹ค์–‘ํ•œ ์‹œ์Šคํ…œ์—์„œ ์—ฌ๋Ÿฌ AI ์ฝ”๋”ฉ ์—์ด์ „ํŠธ๋ฅผ ์œ„ํ•œ CLI ๋ฐ VS Code ํ™•์žฅ ์›Œํฌํ”Œ๋กœ ์ „๋ฐ˜์— RPG-Kit์„ ์ง€์›ํ•ฉ๋‹ˆ๋‹ค. + +## ๋ฌธ์ œ ํ•ด๊ฒฐ + +**AI assistant CLI๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Œ:** `rpgkit check`๋ฅผ ์‹คํ–‰ํ•˜๊ณ , ์„ ํƒํ•œ assistant CLI๋ฅผ ์„ค์น˜ ๋ฐ ์ธ์ฆํ•œ ๋‹ค์Œ `rpgkit init` ๋˜๋Š” `rpgkit update`๋ฅผ ๋‹ค์‹œ ์‹คํ–‰ํ•˜์„ธ์š”. + +**MCP ๋„๊ตฌ๊ฐ€ `rpg_unavailable`๋ฅผ ๋ณด๊ณ ํ•จ:** `/rpgkit.encode`๋ฅผ ์‹คํ–‰ํ•˜์—ฌ `.rpgkit/data/rpg.json`์„ ์ƒ์„ฑํ•˜์„ธ์š”. + +**์ฆ๋ถ„ ์—…๋ฐ์ดํŠธ ์‹คํŒจ:** `.rpgkit/logs/update_rpg.log`๋ฅผ ํ™•์ธํ•œ ๋‹ค์Œ `/rpgkit.update_rpg`๋ฅผ ์‹คํ–‰ํ•˜์„ธ์š”. + +**rate limit ๋˜๋Š” ํ”„๋ผ์ด๋น— ๋ฆฌํฌ์ง€ํ† ๋ฆฌ ์ ‘๊ทผ์œผ๋กœ ์ธํ•ด ํ…œํ”Œ๋ฆฟ ๋‹ค์šด๋กœ๋“œ ์‹คํŒจ:** `--github-token $GITHUB_TOKEN`์„ ์ „๋‹ฌํ•˜๊ฑฐ๋‚˜ `GH_TOKEN` / `GITHUB_TOKEN`์„ ์„ค์ •ํ•˜์„ธ์š”. + +## ๋ผ์ด์„ ์Šค + +MIT License - ์ž์„ธํ•œ ๋‚ด์šฉ์€ [LICENSE](LICENSE)๋ฅผ ์ฐธ์กฐํ•˜์„ธ์š”. + +## ๊ฐ์‚ฌ์˜ ๋ง + +[GitHub Spec-Kit](https://github.com/github/spec-kit)์„ ๊ธฐ๋ฐ˜์œผ๋กœ ํ•ฉ๋‹ˆ๋‹ค. diff --git a/RPG-Kit/README.md b/RPG-Kit/README.md new file mode 100644 index 0000000..f44847c --- /dev/null +++ b/RPG-Kit/README.md @@ -0,0 +1,243 @@ +

RPG-Kit

+ +

+ English | + ็ฎ€ไฝ“ไธญๆ–‡ | + ๆ—ฅๆœฌ่ชž | + ํ•œ๊ตญ์–ด | + เคนเคฟเคจเฅเคฆเฅ€ +

+ +## Make coding agents plan before they edit + +Coding agents are strong at local edits, but repo-level tasks often fail without a stable planning structure. Requirements drift, architecture decisions disappear, multi-file generation becomes inconsistent, and updates can miss hidden dependencies. + +RPG-Kit gives Claude Code and GitHub Copilot a **persistent RPG workspace** for repository-level coding. The workspace is built around a Repository Planning Graph (RPG) that connects requirements, features, architecture, files, code entities, and dependencies. + +With RPG-Kit, agents work through graph-driven workflows: + +- **Build**: turn requirements into an RPG plan, then generate a multi-file repository. +- **Understand**: map an existing repo into RPG, then search, explore, and explain it. +- **Update**: locate affected RPG nodes, plan the edit, and update code and graph together. + +### Choose your workflow + +| Goal | Workflow | Start here | +|---|---|---| +| Build a new repository from requirements | Build workflow (requirements โ†’ RPG โ†’ code) | [`Quick Start: New Repository`](#quick-start-new-repository) | +| Understand an existing repository | Understand workflow (repository โ†’ RPG โ†’ search/explore) | [`Quick Start: Existing Repository`](#quick-start-existing-repository) | +| Update an existing repository | Update workflow (change request โ†’ affected RPG nodes โ†’ edit plan โ†’ code/RPG update) | [`Quick Start: Existing Repository`](#quick-start-existing-repository) | + +### Detailed pipeline + +New users can skip this and start from the Quick Start sections below. + +
+Full command-level workflow diagram + +```text +Forward Direction: Requirements โ†’ RPG โ†’ Code + + Phase 1: Feature Specification Phase 2: RPG Construction & Planning Phase 3 +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ feature โ”‚ โ”‚ feature โ”‚ โ”‚ feature โ”‚ โ”‚ build โ”‚ โ”‚ build โ”‚ โ”‚ design โ”‚ โ”‚ design โ”‚ โ”‚ plan โ”‚ โ”‚ โ”‚ +โ”‚ _spec โ”œโ”€โ–ถ _build โ”œโ”€โ–ถ_refactor โ”œโ”€โ–ถ skeleton โ”œโ”€โ–ถ data โ”œโ”€โ–ถ base โ”œโ”€โ–ถinterfacesโ”œโ”€โ–ถ tasks โ”œโ”€โ–ถ code_gen โ”‚ +โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ flow โ”‚ โ”‚ classes โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ (TDD) โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”˜ + feature_ feature_ โ”‚ skeleton data_flow base_ interfaces tasks source + spec/ build โ”‚ .json .json classes .json .json code + feature_ .json โ”‚ skeleton_ data_flow .json + spec.json โ”‚ summary.txt _viz.html + โ”‚ + โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ–ผโ”€โ”€โ”€โ”€โ”€โ”€โ” + โ”‚ feature_editโ”‚ optional pre-planning edits to feature_tree.json + โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ•ฐโ”€โ”€โ”€โ”€โ”€ rpg.json (created โ†’ progressively enriched) โ”€โ”€โ”€โ”€โ”€โ•ฏ + โ”‚ + โ–ผ + โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +Surgical edit workflow: Requirements -> RPG update -> Code Update โ”‚ rpg_edit โ”‚ optional synchronized RPG + code + dep_graph edits + โ””โ”€โ”€โ–ฒโ”€โ”€โ”€โ”€โ–ฒโ”€โ”€โ”˜ + โ”‚ โ”‚ +Reverse Direction: Code โ†’ RPG โ”‚ โ”‚ + โ”‚ โ”‚ +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚ โ”‚ +โ”‚ Existing Codebaseโ”‚โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–ถโ”‚ encode โ”‚โ”€โ”€โ”€โ”€โ”€โ”€โ–ถโ”‚update_rpgโ”‚โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚ +โ”‚ โ”‚ โ”‚ (full) โ”‚ โ”‚ (manual โ”‚ โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚ fallback)โ”‚ โ”‚ + rpg.json โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚ + dep_graph.json rpg.json / dep_graph.json โ”‚ + โ”‚ โ”‚ + โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ–ฒ + โ”‚ post-commit hook normally runs incremental updates + +MCP Server: search_rpg / explore_rpg / get_node_detail / list_rpg_tree +``` + +
+ +### RPG-Kit in action + +Below is part of the graph visualization generated for this repository. Run `/rpgkit.encode` and open `.rpgkit/data/rpg.html` to explore the full interactive graph. + +![RPG-Kit repository graph visualization](../docs/rpgkit_visualized_graph.png) + +## Installation + +### Prerequisites + +- Python 3.12+ +- [uv](https://docs.astral.sh/uv/) +- Git +- An installed and authenticated AI coding agent CLI: [GitHub Copilot](https://docs.github.com/en/copilot) or [Claude Code](https://docs.anthropic.com/en/docs/claude-code/setup) + +### Install RPG-Kit + +```bash +# For persistent installation (Recommended) +uv tool install rpgkit-cli --from "git+https://github.com/microsoft/RPG-ZeroRepo.git#subdirectory=RPG-Kit" +rpgkit check + +# For one-time usage +uvx --from "git+https://github.com/microsoft/RPG-ZeroRepo.git#subdirectory=RPG-Kit" rpgkit init +``` + +## Quick Start: New Repository + +Use this path when you want RPG-Kit to turn requirements into a new codebase. + +> [!WARNING] +> For projects with a large amount of generated code, `/rpgkit.design_interfaces` and `/rpgkit.code_gen` can take a long time to run. As a typical example: 100 features take about 30 minutes. + +1. Initialize a new project: + + ```bash + rpgkit init my-project + cd my-project + ``` + + Common variants: + + ```bash + rpgkit init my-project --ai claude --script sh + rpgkit init my-project --ai copilot + rpgkit init my-project --github-token $GITHUB_TOKEN + ``` + +2. **[Optional]** place your requirement documents in `my-project/docs/`. + +3. Launch your AI coding agent in the project directory. + +4. Run the forward pipeline: + + ```text + /rpgkit.feature_spec + /rpgkit.feature_build + /rpgkit.feature_refactor + [Optional] /rpgkit.feature_edit + /rpgkit.build_skeleton + /rpgkit.build_data_flow + /rpgkit.design_base_classes + /rpgkit.design_interfaces + /rpgkit.plan_tasks + /rpgkit.code_gen + [Optional] /rpgkit.rpg_edit + ``` + +RPG-Kit progressively creates `.rpgkit/data/rpg.json` and uses it to keep requirements, planning artifacts, generated code, and dependency information aligned. + +## Quick Start: Existing Repository + +Use this path when you already have a repository and want an AI agent to understand or edit it with RPG context. + +> [!WARNING] +> For larger projects, `rpgkit init . --encode` and `/rpgkit.encode` can take a long time to run. As a typical example: 200 source files take about 100 minutes. + +1. Initialize RPG-Kit in the repository root and build the initial graph: + + ```bash + mkdir my-project + cp -r existing-repo/ my-project/ + cd my-project + rpgkit init . --encode + ``` + + If you want to skip the confirmation prompt for a non-empty directory: + + ```bash + rpgkit init . --force --encode + ``` + +2. Launch your AI coding agent in the repository. + +3. Use the generated RPG through MCP tools and slash commands: + + ```text + /rpgkit.encode # rebuild the full RPG when needed + /rpgkit.update_rpg # manual incremental update fallback + /rpgkit.rpg_edit # graph-aware code edit + ``` + +4. After commits, RPG-Kit hooks keep `.rpgkit/data/rpg.json`, `.rpgkit/data/dep_graph.json`, and `.rpgkit/data/rpg.html` aligned with code changes. If the hook fails or is skipped, run `/rpgkit.update_rpg`. + +## What happens after `rpgkit init` + +`rpgkit init` does not modify your source files. It adds command definitions, runtime scripts, MCP configuration, and generated graph data alongside your code. + +```text +my-project/ +โ”œโ”€โ”€ docs/ # Optional requirement docs for /rpgkit.feature_spec +โ”œโ”€โ”€ .github/ or .claude/ # AI assistant command definitions and settings +โ”œโ”€โ”€ .vscode/ # Copilot/VS Code MCP configuration when applicable +โ””โ”€โ”€ .rpgkit/ # RPG-Kit runtime + โ”œโ”€โ”€ scripts/ # Pipeline scripts and support packages + โ”œโ”€โ”€ data/ # Generated artifacts, including rpg.json and dep_graph.json + โ”œโ”€โ”€ logs/ # Per-stage execution logs + โ””โ”€โ”€ reports/ # Review and diagnostic reports when generated +``` + +See [docs/project-structure.md](docs/project-structure.md) for the full layout and data file reference. + +## Supported Platforms + +| Platform | Claude Code | GitHub Copilot | Codex | +| ----------------------- | ----------- | -------------- | ----- | +| CLI usage | โœ… | โœ… (No MCP) | โŒ› | +| VS Code extension usage | โœ… | โœ… | โŒ› | + +| Script | Linux | Windows | Mac | +| ------ | ----- | ------- | --- | +| sh | โœ… | โŒ› | โŒ› | +| ps | N/A | โŒ› | โŒ› | + +## Documentation + +- [Slash command reference](docs/commands.md) โ€” every `/rpgkit.*` command, inputs, outputs, and examples. +- [CLI reference](docs/cli-reference.md) โ€” `rpgkit init`, `rpgkit update`, `rpgkit check`, `rpgkit version`, and all options. +- [Configuration](docs/configuration.md) โ€” AI assistant setup, MCP registration, hooks, auto-approval, and troubleshooting. +- [Project structure](docs/project-structure.md) โ€” files and directories created by RPG-Kit. + +## Upcoming Features + +- **Simpler generation commands:** merge the current multi-step generation flow into fewer commands, such as `/rpgkit.generate_repo`, `/rpgkit.generate_feature`, and `/rpgkit.plan`. +- **Multi-language support:** add support for Go, C++, Rust, JavaScript/TypeScript, and more. +- **More platform integrations:** support RPG-Kit across CLI and VS Code extension workflows for different AI coding agents on different systems. + +## Troubleshooting + +**AI assistant CLI not found:** run `rpgkit check`, install and authenticate the selected assistant CLI, then rerun `rpgkit init` or `rpgkit update`. + +**MCP tools report `rpg_unavailable`:** run `/rpgkit.encode` to create `.rpgkit/data/rpg.json`. + +**Incremental update failed:** inspect `.rpgkit/logs/update_rpg.log`, then run `/rpgkit.update_rpg`. + +**Template download fails due to rate limits or private repo access:** pass `--github-token $GITHUB_TOKEN` or set `GH_TOKEN` / `GITHUB_TOKEN`. + +## License + +MIT License - See [LICENSE](LICENSE) for details. + +## Acknowledgements + +Based on [GitHub Spec-Kit](https://github.com/github/spec-kit). diff --git a/RPG-Kit/README.zh-CN.md b/RPG-Kit/README.zh-CN.md new file mode 100644 index 0000000..86d46c0 --- /dev/null +++ b/RPG-Kit/README.zh-CN.md @@ -0,0 +1,202 @@ +

RPG-Kit

+ +

+ English | + ็ฎ€ไฝ“ไธญๆ–‡ | + ๆ—ฅๆœฌ่ชž | + ํ•œ๊ตญ์–ด | + เคนเคฟเคจเฅเคฆเฅ€ +

+ +## ่ฎฉ AI ็ผ–็ ๆ™บ่ƒฝไฝ“็†่งฃๆ•ดไธชไป“ๅบ“ + +AI ็ผ–็ ๆ™บ่ƒฝไฝ“ๅพˆๅผบๅคง๏ผŒไฝ†ๅฎƒไปฌ้€šๅธธ้€ๆ–‡ไปถๅทฅไฝœใ€‚้š็€้กน็›ฎๅขž้•ฟ๏ผŒๅฎƒไปฌๅฏ่ƒฝไผšไธขๅคฑๅฏน้œ€ๆฑ‚ใ€ๆžถๆž„ใ€ไพ่ต–ๅ…ณ็ณปๅ’Œๆ—ขๆœ‰่ฎพ่ฎกๅ†ณ็ญ–็š„ๆŠŠๆกใ€‚ + +RPG-Kit ้€š่ฟ‡็ปดๆŠคไธ€ไธช **Repository Planning Graph (RPG)** ๆฅๅธฎๅŠฉ่งฃๅ†ณ่ฟ™ไธช้—ฎ้ข˜๏ผš่ฟ™ๆ˜ฏไธ€ๅผ ็ป“ๆž„ๅŒ–ๅœฐๅ›พ๏ผŒ่ฟžๆŽฅ้œ€ๆฑ‚ใ€ๅŠŸ่ƒฝใ€ๆ–‡ไปถใ€็ป„ไปถๅ’Œไพ่ต–ๅ…ณ็ณปใ€‚ + +ๅฝ“ไฝ ๅธŒๆœ› AI ๆ™บ่ƒฝไฝ“ๅŸบไบŽไป“ๅบ“็บงไธŠไธ‹ๆ–‡ๅทฅไฝœ๏ผŒ่€Œไธๆ˜ฏไพ่ต–ๅญค็ซ‹็š„ๆ็คบๆ—ถ๏ผŒๅฏไปฅไฝฟ็”จ RPG-Kitใ€‚ + +### ไธบไป€ไนˆ้€‰ๆ‹ฉ RPG-Kit๏ผŸ + +| AI ็ผ–็ ๆ™บ่ƒฝไฝ“็š„ๅธธ่ง้—ฎ้ข˜ | RPG-Kit ๅฆ‚ไฝ•ๅธฎๅŠฉ | +|---|---| +| ๆ™บ่ƒฝไฝ“ๅœจๅ‡ ่ฝฎๆ็คบๅŽๅฟ˜่ฎฐ้œ€ๆฑ‚ | ้œ€ๆฑ‚ไผš่ขซ็ผ–็ ่ฟ› RPG | +| ๆ™บ่ƒฝไฝ“ๅœจไธไบ†่งฃ็›ธๅ…ณๆ–‡ไปถ็š„ๆƒ…ๅ†ตไธ‹็ผ–่พ‘ๅ•ไธชๆ–‡ไปถ | ๆ–‡ไปถใ€็ป„ไปถๅ’Œไพ่ต–ๅ…ณ็ณปไผšๅœจๅ›พไธญ่ฟžๆŽฅ่ตทๆฅ | +| ็”Ÿๆˆ็š„ไปฃ็ ้€ๆธๅ็ฆปๅŽŸๅง‹่ฎกๅˆ’ | ่ง„ๅˆ’ไบง็‰ฉๅ’Œไปฃ็ ไผšไฟๆŒไธ€่‡ด | +| ็Žฐๆœ‰ไป“ๅบ“ๅพˆ้šพ่ฎฉๆ™บ่ƒฝไฝ“็†่งฃ | ๅฏไปฅๅฐ†ไปฃ็ ๅบ“็ผ–็ ไธบ RPG | +| ๆœ‰้’ˆๅฏนๆ€ง็š„็ผ–่พ‘ๅฏ่ƒฝ็ ดๅ้š่—ไพ่ต– | ็ผ–่พ‘ไผšๅŸบไบŽๅ›พๆ„Ÿ็ŸฅไธŠไธ‹ๆ–‡่ฟ›่กŒ | + +### ้€‰ๆ‹ฉไฝ ็š„ๅทฅไฝœๆต + +| ็›ฎๆ ‡ | ๅทฅไฝœๆต | ไปŽ่ฟ™้‡Œๅผ€ๅง‹ | +|---|---|---| +| ไปŽ้œ€ๆฑ‚ๅˆ›ๅปบๆ–ฐ้กน็›ฎ | ๆญฃๅ‘ๅทฅไฝœๆต | [`ๅฟซ้€Ÿๅผ€ๅง‹๏ผšๆ–ฐไป“ๅบ“`](#quick-start-new-repository) | +| ็†่งฃๆˆ–ๆ›ดๆ–ฐ็Žฐๆœ‰ไปฃ็ ๅบ“ | ๅๅ‘ๅทฅไฝœๆต | [`ๅฟซ้€Ÿๅผ€ๅง‹๏ผš็Žฐๆœ‰ไป“ๅบ“`](#quick-start-existing-repository) | +| ่ฟ›่กŒ็ฒพ็กฎ็š„ไป“ๅบ“ๆ„Ÿ็Ÿฅ็ผ–่พ‘ | ๅค–็ง‘ๅผ็ผ–่พ‘ๅทฅไฝœๆต | [`ๅฟซ้€Ÿๅผ€ๅง‹๏ผš็Žฐๆœ‰ไป“ๅบ“`](#quick-start-existing-repository) | + +ไธ‹้ขๆ˜ฏไธบๆญคไป“ๅบ“็”Ÿๆˆ็š„้ƒจๅˆ†ๅ›พๅฏ่ง†ๅŒ–็ป“ๆžœใ€‚่ฟ่กŒ `/rpgkit.encode` ๅนถๆ‰“ๅผ€ `rpg.html`๏ผŒๅณๅฏๆŽข็ดขๅฎŒๆ•ด็š„ไบคไบ’ๅผๅ›พใ€‚ + +![RPG-Kit ไป“ๅบ“ๅ›พๅฏ่ง†ๅŒ–](../docs/rpgkit_visualized_graph.png) + +## ๅฎ‰่ฃ… + +### ๅ…ˆๅ†ณๆกไปถ + +- Python 3.12+ +- [uv](https://docs.astral.sh/uv/) +- Git +- ๅทฒๅฎ‰่ฃ…ๅนถๅฎŒๆˆ่บซไปฝ้ชŒ่ฏ็š„ AI ็ผ–็ ๆ™บ่ƒฝไฝ“ CLI๏ผš[GitHub Copilot](https://docs.github.com/en/copilot) ๆˆ– [Claude Code](https://docs.anthropic.com/en/docs/claude-code/setup) + +### ๅฎ‰่ฃ… RPG-Kit + +```bash +# ๆŒไน…ๅฎ‰่ฃ…๏ผˆๆŽจ่๏ผ‰ +uv tool install rpgkit-cli --from "git+https://github.com/microsoft/RPG-ZeroRepo.git#subdirectory=RPG-Kit" +rpgkit check + +# ไธ€ๆฌกๆ€งไฝฟ็”จ +uvx --from "git+https://github.com/microsoft/RPG-ZeroRepo.git#subdirectory=RPG-Kit" rpgkit init +``` + + + +## ๅฟซ้€Ÿๅผ€ๅง‹๏ผšๆ–ฐไป“ๅบ“ + +ๅฝ“ไฝ ๆƒณ่ฎฉ RPG-Kit ๅฐ†้œ€ๆฑ‚่ฝฌๆขไธบๆ–ฐไปฃ็ ๅบ“ๆ—ถ๏ผŒไฝฟ็”จๆญค่ทฏๅพ„ใ€‚ + +> [!WARNING] +> ๅฏนไบŽ็”Ÿๆˆไปฃ็ ้‡ๆฏ”่พƒๅคง็š„้กน็›ฎ๏ผŒ`/rpgkit.design_interfaces` ๅ’Œ `/rpgkit.code_gen` ็š„่ฟ่กŒๆ—ถ้—ดไผšๆฏ”่พƒ้•ฟใ€‚ไธ€ไธชๅ…ธๅž‹็š„ไพ‹ๅญ๏ผš็‰นๅพๆ•ฐไธบ100๏ผŒ่ฟ่กŒๆ—ถ้—ดๅคง็บฆ30ๅˆ†้’Ÿใ€‚ + +1. ๅˆๅง‹ๅŒ–ๆ–ฐ้กน็›ฎ๏ผš + + ```bash + rpgkit init my-project + cd my-project + ``` + + ๅธธ่งๅ˜ไฝ“๏ผš + + ```bash + rpgkit init my-project --ai claude --script sh + rpgkit init my-project --ai copilot + rpgkit init my-project --github-token $GITHUB_TOKEN + ``` + +2. **[ๅฏ้€‰]** ๅฐ†ไฝ ็š„้œ€ๆฑ‚ๆ–‡ๆกฃๆ”พๅ…ฅ `my-project/docs/`ใ€‚ + +3. ๅœจ้กน็›ฎ็›ฎๅฝ•ไธญๅฏๅŠจไฝ ็š„ AI ็ผ–็ ๆ™บ่ƒฝไฝ“ใ€‚ + +4. ่ฟ่กŒๆญฃๅ‘ๆตๆฐด็บฟ๏ผš + + ```text + /rpgkit.feature_spec + /rpgkit.feature_build + /rpgkit.feature_refactor + [Optional] /rpgkit.feature_edit + /rpgkit.build_skeleton + /rpgkit.build_data_flow + /rpgkit.design_base_classes + /rpgkit.design_interfaces + /rpgkit.plan_tasks + /rpgkit.code_gen + [Optional] /rpgkit.rpg_edit + ``` + +RPG-Kit ไผš้€ๆญฅๅˆ›ๅปบ `.rpgkit/data/rpg.json`๏ผŒๅนถไฝฟ็”จๅฎƒๆฅไฟๆŒ้œ€ๆฑ‚ใ€่ง„ๅˆ’ไบง็‰ฉใ€็”Ÿๆˆ็š„ไปฃ็ ๅ’Œไพ่ต–ไฟกๆฏไธ€่‡ดใ€‚ + + + +## ๅฟซ้€Ÿๅผ€ๅง‹๏ผš็Žฐๆœ‰ไป“ๅบ“ + +ๅฝ“ไฝ ๅทฒ็ปๆœ‰ไธ€ไธชไปฃ็ ไป“ๅบ“๏ผŒๅนถๅธŒๆœ› AI ๆ™บ่ƒฝไฝ“ๅ€ŸๅŠฉ RPG ไธŠไธ‹ๆ–‡็†่งฃๆˆ–็ผ–่พ‘ๅฎƒๆ—ถ๏ผŒไฝฟ็”จๆญค่ทฏๅพ„ใ€‚ + +> [!WARNING] +> ๅฏนไบŽๆฏ”่พƒๅคง็š„้กน็›ฎ๏ผŒ`rpgkit init . --encode` ๅ’Œ `/rpgkit.encode` ็š„่ฟ่กŒๆ—ถ้—ดๅฏ่ƒฝไผšๆฏ”่พƒ้•ฟใ€‚ไธ€ไธชๅ…ธๅž‹็š„ไพ‹ๅญ๏ผšๆบไปฃ็ ๆ–‡ไปถๆ•ฐไธบ200๏ผŒ่ฟ่กŒๆ—ถ้—ด100ๅˆ†้’Ÿใ€‚ + +1. ๅœจไป“ๅบ“ๆ น็›ฎๅฝ•ๅˆๅง‹ๅŒ– RPG-Kit๏ผŒๅนถๆž„ๅปบๅˆๅง‹ๅ›พ๏ผš + + ```bash + mkdir my-project + cp -r existing-repo/ my-project/ + cd my-project + rpgkit init . --encode + ``` + + ๅฆ‚ๆžœไฝ ๆƒณ่ทณ่ฟ‡้ž็ฉบ็›ฎๅฝ•็š„็กฎ่ฎคๆ็คบ๏ผš + + ```bash + rpgkit init . --force --encode + ``` + +2. ๅœจไป“ๅบ“ไธญๅฏๅŠจไฝ ็š„ AI ็ผ–็ ๆ™บ่ƒฝไฝ“ใ€‚ + +3. ้€š่ฟ‡ MCP ๅทฅๅ…ทๅ’Œๆ–œๆ ๅ‘ฝไปคไฝฟ็”จ็”Ÿๆˆ็š„ RPG๏ผš + + ```text + /rpgkit.encode # ้œ€่ฆๆ—ถ้‡ๅปบๅฎŒๆ•ด RPG + /rpgkit.update_rpg # ๆ‰‹ๅŠจๅขž้‡ๆ›ดๆ–ฐๅ…œๅบ• + /rpgkit.rpg_edit # ๅ›พๆ„Ÿ็Ÿฅไปฃ็ ็ผ–่พ‘ + ``` + +4. ๆไบคๅŽ๏ผŒRPG-Kit hooks ไผšไฟๆŒ `.rpgkit/data/rpg.json`ใ€`.rpgkit/data/dep_graph.json` ๅ’Œ `.rpgkit/data/rpg.html` ไธŽไปฃ็ ๆ”นๅŠจไธ€่‡ดใ€‚ๅฆ‚ๆžœ hook ๅคฑ่ดฅๆˆ–่ขซ่ทณ่ฟ‡๏ผŒ่ฏท่ฟ่กŒ `/rpgkit.update_rpg`ใ€‚ + +## ๆ–ฐๅขžๅ†…ๅฎน + +่ฟ่กŒ `rpgkit init` ๅŽ๏ผŒworkspace root ไป็„ถๆ˜ฏไฝ ็š„้กน็›ฎไป“ๅบ“ๆ น็›ฎๅฝ•ใ€‚RPG-Kit ไผšๅฐ†ๅ‘ฝไปคๅฎšไน‰ใ€่ฟ่กŒๆ—ถ่„šๆœฌใ€MCP ้…็ฝฎๅ’Œ็”Ÿๆˆ็š„ๅ›พๆ•ฐๆฎไธŽไปฃ็ ไธ€่ตทๆทปๅŠ ๅˆฐ้กน็›ฎไธญใ€‚ + +```text +my-project/ +โ”œโ”€โ”€ docs/ # /rpgkit.feature_spec ็š„ๅฏ้€‰้œ€ๆฑ‚ๆ–‡ๆกฃ +โ”œโ”€โ”€ .github/ or .claude/ # AI assistant ๅ‘ฝไปคๅฎšไน‰ๅ’Œ่ฎพ็ฝฎ +โ”œโ”€โ”€ .vscode/ # ้€‚็”จๆ—ถ็š„ Copilot/VS Code MCP ้…็ฝฎ +โ””โ”€โ”€ .rpgkit/ # RPG-Kit ่ฟ่กŒๆ—ถ + โ”œโ”€โ”€ scripts/ # ๆตๆฐด็บฟ่„šๆœฌๅ’Œๆ”ฏๆŒๅŒ… + โ”œโ”€โ”€ data/ # ็”Ÿๆˆไบง็‰ฉ๏ผŒๅŒ…ๆ‹ฌ rpg.json ๅ’Œ dep_graph.json + โ”œโ”€โ”€ logs/ # ๆฏไธช้˜ถๆฎต็š„ๆ‰ง่กŒๆ—ฅๅฟ— + โ””โ”€โ”€ reports/ # ็”Ÿๆˆ็š„่ฏ„ๅฎกๅ’Œ่ฏŠๆ–ญๆŠฅๅ‘Š +``` + +ๅฎŒๆ•ด็›ฎๅฝ•ๅธƒๅฑ€ๅ’Œๆ•ฐๆฎๆ–‡ไปถๅ‚่€ƒ่ง [docs/project-structure.md](docs/project-structure.md)ใ€‚ + +## ๆ”ฏๆŒ็š„ๅนณๅฐ + +| ๅนณๅฐ | Claude Code | GitHub Copilot | Codex | +| ----------------------- | ----------- | -------------- | ----- | +| CLI ไฝฟ็”จ | โœ… | โœ…(ๆ—  MCP) | โŒ› | +| VS Code ๆ‰ฉๅฑ•ไฝฟ็”จ | โœ… | โœ… | โŒ› | + +| ่„šๆœฌ | Linux | Windows | Mac | +| ---- | ----- | ------- | --- | +| sh | โœ… | โŒ› | โŒ› | +| ps | N/A | โŒ› | โŒ› | + +## ๆ–‡ๆกฃ + +- [ๆ–œๆ ๅ‘ฝไปคๅ‚่€ƒ](docs/commands.md) โ€” ๆฏไธช `/rpgkit.*` ๅ‘ฝไปคใ€่พ“ๅ…ฅใ€่พ“ๅ‡บๅ’Œ็คบไพ‹ใ€‚ +- [CLI ๅ‚่€ƒ](docs/cli-reference.md) โ€” `rpgkit init`ใ€`rpgkit update`ใ€`rpgkit check`ใ€`rpgkit version` ไปฅๅŠๆ‰€ๆœ‰้€‰้กนใ€‚ +- [้…็ฝฎ](docs/configuration.md) โ€” AI assistant ่ฎพ็ฝฎใ€MCP ๆณจๅ†Œใ€hooksใ€่‡ชๅŠจๆ‰นๅ‡†ๅ’Œๆ•…้šœๆŽ’้™คใ€‚ +- [้กน็›ฎ็ป“ๆž„](docs/project-structure.md) โ€” RPG-Kit ๅˆ›ๅปบ็š„ๆ–‡ไปถๅ’Œ็›ฎๅฝ•ใ€‚ + +## ๅณๅฐ†ๆŽจๅ‡บ็š„ๅŠŸ่ƒฝ + +- **ๆ›ด็ฎ€ๅ•็š„่งฃ็ ๅ™จๅ‘ฝไปค๏ผš** ๅฐ†ๅฝ“ๅ‰่งฃ็ ๅ™จๆต็จ‹ๅˆๅนถไธบๆ›ดๅฐ‘็š„ๅ‘ฝไปค๏ผŒๅŒ…ๆ‹ฌ็”จไบŽ็ซฏๅˆฐ็ซฏไป“ๅบ“็”Ÿๆˆ็š„ `/rpgkit.generate_repo`๏ผŒไปฅๅŠ็”จไบŽๅŠŸ่ƒฝ็”Ÿๆˆๅ’Œ RPG ่ง„ๅˆ’็š„ `/rpgkit.generate_feature` ๅŠ  `/rpgkit.plan`ใ€‚ +- **ๅคš่ฏญ่จ€ๆ”ฏๆŒ๏ผš** ๅขžๅŠ ๅฏน Goใ€C++ใ€Rustใ€JavaScript/TypeScript ็ญ‰่ฏญ่จ€็š„ๆ”ฏๆŒใ€‚ +- **ๆ›ดๅคšๅนณๅฐ้›†ๆˆ๏ผš** ๆ”ฏๆŒ RPG-Kit ๅœจไธๅŒ็ณป็ปŸไธŠไธŽไธๅŒ AI ็ผ–็ ๆ™บ่ƒฝไฝ“็š„ CLI ๅ’Œ VS Code ๆ‰ฉๅฑ•ๅทฅไฝœๆต้…ๅˆไฝฟ็”จใ€‚ + +## ๆ•…้šœๆŽ’้™ค + +**ๆ‰พไธๅˆฐ AI assistant CLI๏ผš** ่ฟ่กŒ `rpgkit check`๏ผŒๅฎ‰่ฃ…ๅนถ่ฎค่ฏๆ‰€้€‰ assistant CLI๏ผŒ็„ถๅŽ้‡ๆ–ฐ่ฟ่กŒ `rpgkit init` ๆˆ– `rpgkit update`ใ€‚ + +**MCP ๅทฅๅ…ทๆŠฅๅ‘Š `rpg_unavailable`๏ผš** ่ฟ่กŒ `/rpgkit.encode` ๆฅๅˆ›ๅปบ `.rpgkit/data/rpg.json`ใ€‚ + +**ๅขž้‡ๆ›ดๆ–ฐๅคฑ่ดฅ๏ผš** ๆฃ€ๆŸฅ `.rpgkit/logs/update_rpg.log`๏ผŒ็„ถๅŽ่ฟ่กŒ `/rpgkit.update_rpg`ใ€‚ + +**็”ฑไบŽ้€Ÿ็އ้™ๅˆถๆˆ–็งๆœ‰ไป“ๅบ“่ฎฟ้—ฎๅฏผ่‡ดๆจกๆฟไธ‹่ฝฝๅคฑ่ดฅ๏ผš** ไผ ๅ…ฅ `--github-token $GITHUB_TOKEN`๏ผŒๆˆ–่ฎพ็ฝฎ `GH_TOKEN` / `GITHUB_TOKEN`ใ€‚ + +## ่ฎธๅฏ่ฏ + +MIT License - ่ฏฆ่ง [LICENSE](LICENSE)ใ€‚ + +## ่‡ด่ฐข + +ๅŸบไบŽ [GitHub Spec-Kit](https://github.com/github/spec-kit)ใ€‚ diff --git a/RPG-Kit/docs/cli-reference.md b/RPG-Kit/docs/cli-reference.md new file mode 100644 index 0000000..bb6ab41 --- /dev/null +++ b/RPG-Kit/docs/cli-reference.md @@ -0,0 +1,111 @@ +# CLI Reference + +This document covers the `rpgkit` command-line interface. Use the CLI to install templates, initialize projects, update RPG-Kit files, and verify local tool availability. + +## `rpgkit init` + +Initialize a new project from the latest template, or add RPG-Kit to an existing repository. + +```bash +rpgkit init [options] +rpgkit init --here [options] +rpgkit init . [options] +``` + +### Options + +| Option | Description | +| ------ | ----------- | +| `--ai ` | AI assistant: `copilot` or `claude` | +| `--script ` | Script type: `sh` (POSIX) or `ps` (PowerShell) | +| `--here` | Initialize in current directory | +| `--force` | Skip confirmation for non-empty current directory | +| `--no-git` | Skip git initialization | +| `--no-mcp` | Skip MCP server configuration | +| `--ignore-agent-tools` | Skip checks for AI agent CLI tools | +| `--github-token ` | GitHub token for private repos or higher rate limits | +| `--pre` | Download the latest pre-release template | +| `--skip-tls` | Skip SSL/TLS verification | +| `--encode/--no-encode` | Run or skip initial RPG encoding at the end of init | +| `--debug` | Show verbose diagnostic output | + +### Supported AI Assistants + +| Agent | Folder | Description | Status | +| ----- | ------ | ----------- | ------ | +| `copilot` | `.github/`, `.vscode/` | GitHub Copilot | Verified | +| `claude` | `.claude/` | Claude Code | Verified | + +RPG-Kit currently supports only **GitHub Copilot** and **Claude Code** in the CLI. Additional agents may be adapted in future releases. + +### Examples + +```bash +rpgkit init my-project +rpgkit init my-project --ai claude --script sh +rpgkit init . --force +rpgkit init . --encode +rpgkit init . --force --encode +rpgkit init --here --ai copilot +rpgkit init --here --github-token $GITHUB_TOKEN +``` + +## `rpgkit update` + +Update RPG-Kit template files, scripts, command definitions, MCP configuration, gitignore rules, and hooks in an existing project. The AI assistant is auto-detected from existing project configuration when possible. + +```bash +rpgkit update +rpgkit update --ai claude +rpgkit update --pre +rpgkit update --no-mcp +rpgkit update --github-token $GITHUB_TOKEN +``` + +### Options + +| Option | Description | +| ------ | ----------- | +| `--ai ` | AI assistant, auto-detected if not specified | +| `--script ` | Script type: `sh` (POSIX) or `ps` (PowerShell) | +| `--github-token ` | GitHub token for private repos or higher rate limits | +| `--pre` | Download the latest pre-release template | +| `--no-mcp` | Skip MCP server configuration | +| `--skip-tls` | Skip SSL/TLS verification | +| `--debug` | Show verbose diagnostic output | + +## `rpgkit check` + +Verify that required tools are installed. + +```bash +rpgkit check +``` + +Run this after installation to confirm Python, Git, uv, and the selected AI assistant CLI are available. + +## `rpgkit version` + +Display version and system information. + +```bash +rpgkit version +``` + +## Network and Release Options + +```bash +rpgkit init my-project --github-token $GITHUB_TOKEN +rpgkit init my-project --pre +rpgkit init my-project --skip-tls +rpgkit init my-project --debug +``` + +| Option | Description | +| ------ | ----------- | +| `--github-token ` | Uses a GitHub token for API requests, useful for private repos or rate limits | +| `--pre` | Downloads the latest pre-release template instead of the latest stable release | +| `--skip-tls` | Skips SSL/TLS verification; use only for constrained environments | +| `--debug` | Prints verbose diagnostic output for network and extraction failures | + +`GH_TOKEN` and `GITHUB_TOKEN` are also recognized for GitHub API requests. diff --git a/RPG-Kit/docs/commands.md b/RPG-Kit/docs/commands.md new file mode 100644 index 0000000..e94858f --- /dev/null +++ b/RPG-Kit/docs/commands.md @@ -0,0 +1,495 @@ +# /rpgkit Commands Reference + +RPG-Kit provides 13 slash commands that work in three paths: + +- **Forward pipeline:** Requirements โ†’ Repository Planning Graph (RPG) โ†’ Code +- **Reverse encoder:** Existing code โ†’ RPG +- **Surgical edit:** Natural-language changes applied to code, RPG, and dependency graph together + +## Command Overview + +### Phase 1: Feature Specification + +| Command | Description | +| ------- | ----------- | +| `/rpgkit.feature_spec ` | Create structured feature specifications from user input or `docs/` files | +| `/rpgkit.feature_build` | Generate and expand the feature tree from specifications | +| `/rpgkit.feature_refactor` | Refactor feature tree into modular component architecture | +| `/rpgkit.feature_edit ` | Edit feature tree nodes before skeleton planning โ€” optional | + +### Phase 2: RPG Construction and Planning + +| Command | Description | +| ------- | ----------- | +| `/rpgkit.build_skeleton` | Build repository file skeleton from component architecture; creates `.rpgkit/data/rpg.json` | +| `/rpgkit.build_data_flow` | Build inter-component data flow DAG and update the RPG | +| `/rpgkit.design_base_classes` | Design shared base classes and data structures | +| `/rpgkit.design_interfaces` | Design function/class interfaces with type hints and docstrings | +| `/rpgkit.plan_tasks` | Plan dependency-ordered implementation task batches | + +### Phase 3: Code Generation and Surgical Edits + +| Command | Description | +| ------- | ----------- | +| `/rpgkit.code_gen` | TDD-based implementation with iterative test-code-fix cycles | +| `/rpgkit.rpg_edit ` | Surgical edit of RPG graph, code, and dependency graph from a natural-language instruction โ€” optional | + +### RPG Encoder: Code to RPG + +| Command | Description | +| ------- | ----------- | +| `/rpgkit.encode` | Encode an existing repository into `.rpgkit/data/rpg.json` | +| `/rpgkit.update_rpg` | Manually run incremental RPG update when the automatic hook is skipped or fails | + +Both directions produce the same RPG structure at `.rpgkit/data/rpg.json`, enabling AI agents to query the graph via the **MCP server** (`search_rpg`, `explore_rpg`, `get_node_detail`, `list_rpg_tree`). See [configuration.md](configuration.md) for MCP details. + +--- + +## Phase 1: Feature Specification + +### `/rpgkit.feature_spec` + +Create structured feature specifications from user input or documentation files. + +**Input modes:** + +- **Direct input:** provide a description after the command +- **Auto-detect:** omit input to auto-detect `docs/*.md` files + +**Output:** + +```text +.rpgkit/data/feature_spec/ +โ”œโ”€โ”€ evidence/ # Source evidence files +โ”‚ โ”œโ”€โ”€ user_input.md # From direct user input, or +โ”‚ โ”œโ”€โ”€ 01_project_charter.md +โ”‚ โ””โ”€โ”€ ... +โ”œโ”€โ”€ feature_spec.md # Meta + Background + NFR +โ””โ”€โ”€ features/ # Feature tree documents + โ”œโ”€โ”€ FT-001.md + โ”œโ”€โ”€ FT-002.md + โ””โ”€โ”€ ... +``` + +Also generates `.rpgkit/data/feature_spec.json`. + +**Examples:** + +```text +/rpgkit.feature_spec Build a CLI tool for managing Docker containers +/rpgkit.feature_spec # Auto-detect docs/ files +``` + +--- + +### `/rpgkit.feature_build` + +Generate and iteratively refine the feature tree from `.rpgkit/data/feature_spec.json`. + +**Input:** `.rpgkit/data/feature_spec.json` + +**Output:** `.rpgkit/data/feature_build.json` + +**Current workflow:** + +1. **Validate status** โ€” runs `.rpgkit/scripts/feature_build_validation.py` to verify that `feature_spec.json` exists and decide whether this is a first build or an expansion. +2. **Build or expand** โ€” runs `.rpgkit/scripts/feature_build.py --mode step1`. + - If `feature_build.json` does not exist, RPG-Kit builds the feature tree from the specification and iterates until requirements are covered. + - If `feature_build.json` already exists, RPG-Kit switches to beyond-spec expansion mode and adds production-relevant features not described by the original spec. +3. **Review** โ€” validates coverage, duplicates, and MIU constraints. Coverage review uses a default threshold of `98.0` and up to `3` review iterations. +4. **Optional user-guided expansion** โ€” the agent can ask whether to suggest additional expansion directions, then run `--mode suggest-directions` and `--mode step2 --direction `. + +The spec-driven expansion loop has a hard safety cap of 20 iterations; the model self-terminates when it determines the spec is covered. + +**Examples:** + +```text +/rpgkit.feature_build +``` + +--- + +### `/rpgkit.feature_refactor` + +Refactor the feature tree into a modular component architecture. + +**Input:** `.rpgkit/data/feature_build.json` + +**Output:** `.rpgkit/data/feature_tree.json` + +**Process:** + +1. **Plan** โ€” analyze domains and plan subtree structure. +2. **Assign** โ€” iteratively assign features to planned subtrees. The default assignment budget is 10 iterations and stops early when assignment reaches at least 99%. + +**Example:** + +```text +/rpgkit.feature_refactor +``` + +--- + +### `/rpgkit.feature_edit` + +Edit feature tree nodes before repository planning begins. + +**Input/Output:** `.rpgkit/data/feature_tree.json` + +**Supported edits:** add, delete, modify, expand, move, or merge feature tree nodes. + +**Process:** + +1. **Plan** โ€” generate an edit plan from the user's instruction. +2. **Execute** โ€” apply the planned changes. +3. **Review** โ€” verify and auto-fix if needed, up to 3 rounds. + +**Examples:** + +```text +/rpgkit.feature_edit Delete the 'cloud integration' component +/rpgkit.feature_edit Add logging features under 'cli operations' +/rpgkit.feature_edit Expand the 'security' component with encryption options +/rpgkit.feature_edit Merge 'analytics telemetry' into 'monitoring observability' +``` + +--- + +## Phase 2: RPG Construction and Planning + +### `/rpgkit.build_skeleton` + +Build the repository file skeleton from the component architecture. This is where the forward pipeline first creates the RPG. + +**Input:** `.rpgkit/data/feature_tree.json` + +**Output:** + +- `.rpgkit/data/skeleton.json` โ€” file skeleton +- `.rpgkit/data/skeleton_summary.txt` โ€” human-readable skeleton summary +- `.rpgkit/data/rpg.json` โ€” initial Repository Planning Graph with file and feature nodes + +**Process:** + +1. **Directory design** โ€” design directory structure for each component. +2. **File assignment** โ€” assign features to source files. The default assignment budget is 10 iterations. + +**Examples:** + +```text +/rpgkit.build_skeleton +/rpgkit.build_skeleton Prefer flat directory structure +``` + +--- + +### `/rpgkit.build_data_flow` + +Build inter-component data flow as a directed acyclic graph (DAG). + +**Input:** `.rpgkit/data/skeleton.json`, `.rpgkit/data/feature_tree.json` + +**Output:** + +- `.rpgkit/data/data_flow.json` โ€” data flow DAG +- `.rpgkit/data/data_flow_viz.html` โ€” interactive visualization +- Updates `.rpgkit/data/rpg.json` โ€” adds data-flow edges + +**Process:** + +1. **Pre-check** โ€” verifies whether data flow is missing, valid, or mismatched with the skeleton. +2. **Iteration choice** โ€” asks for max iterations: + - `Y` uses the default of 5 iterations. + - A number sets a custom iteration budget. +3. **DAG design** โ€” runs `.rpgkit/scripts/build_data_flow.py --max-iterations `. +4. **Validation** โ€” runs `.rpgkit/scripts/check_data_flow.py --verbose`. +5. **Visualization** โ€” runs `.rpgkit/scripts/generate_viz.py` when a new data flow is built. + +**Example:** + +```text +/rpgkit.build_data_flow +/rpgkit.build_data_flow Make the ingestion layer independent from reporting +``` + +--- + +### `/rpgkit.design_base_classes` + +Design shared base classes and global data structures to improve modularity and reuse. + +**Input:** `.rpgkit/data/skeleton.json`, `.rpgkit/data/data_flow.json` + +**Output:** + +- `.rpgkit/data/base_classes.json` โ€” base class and global data structure definitions +- Updates `.rpgkit/data/rpg.json` โ€” adds base-class relationship edges + +**Process:** + +1. **Functional base classes** โ€” design behavioral abstractions. +2. **Global data structures** โ€” design shared data formats. + +**Options:** + +| Input | Description | +| ----- | ----------- | +| `Y` | Use defaults, 5 iterations | +| Number | Set a custom iteration count | + +**Example:** + +```text +/rpgkit.design_base_classes +``` + +--- + +### `/rpgkit.design_interfaces` + +Design function and class interfaces with type hints and docstrings for all planned repository files. + +**Input:** `.rpgkit/data/skeleton.json`, `.rpgkit/data/data_flow.json`, `.rpgkit/data/base_classes.json` + +**Output:** + +- `.rpgkit/data/interfaces.json` โ€” function/class interface definitions +- Updates `.rpgkit/data/rpg.json` โ€” adds fine-grained dependency edges such as inheritance, invocation, and references + +**Process:** + +1. Read skeleton, data flow, and base classes for context. +2. Process components in dependency order from the data flow DAG. +3. Design functions and classes with type-hinted signatures. +4. Map each unit to the features it implements. + +**Example:** + +```text +/rpgkit.design_interfaces +``` + +--- + +### `/rpgkit.plan_tasks` + +Plan implementation tasks from interface definitions, organized into dependency-ordered batches. + +**Input:** `.rpgkit/data/interfaces.json`, `.rpgkit/data/data_flow.json`, `.rpgkit/data/rpg.json` + +**Output:** `.rpgkit/data/tasks.json` + +**Process:** + +1. Analyze dependencies between units using the RPG. +2. Sort units topologically. +3. Group units into implementation batches. +4. Add auxiliary file tasks such as `requirements.txt`, `main.py`, `README.md`, and `.gitignore`. + +**Example:** + +```text +/rpgkit.plan_tasks +``` + +--- + +## Phase 3: Code Generation and Surgical Edits + +### `/rpgkit.code_gen` + +Execute TDD-based code implementation with iterative test-code-fix cycles. + +**Input:** `.rpgkit/data/tasks.json`, `.rpgkit/data/interfaces.json`, `.rpgkit/data/base_classes.json`, `.rpgkit/data/data_flow.json`, `.rpgkit/data/rpg.json` + +**Output:** complete tested source code, `.rpgkit/data/code_gen_state.jsonl`, and updated `.rpgkit/data/rpg.json` + +**Batch modes:** + +| Mode | Description | +| ---- | ----------- | +| `S` | Single-batch mode: one batch at a time | +| `F` | File-merge mode: merge batches per file, optionally limited by max units | + +**TDD cycle:** + +1. Initialize the codebase if needed. +2. Create a branch from `main` for the next batch. +3. Dispatch a sub-agent to write tests, implement code, run pytest, and fix failures. +4. Independently verify the batch. +5. Merge successful batches into `main`; preserve failed branches for inspection. +6. Continue autonomously until all tasks are processed. +7. Run final test and global review. + +**Auxiliary files:** + +| File | Test method | +| ---- | ----------- | +| `requirements.txt` | Import validation in an isolated virtual environment | +| `main.py` | Execution test, usually `--help` | +| `README.md` | No direct test | +| `.gitignore` | No direct test | + +**Example:** + +```text +/rpgkit.code_gen +``` + +--- + +### `/rpgkit.rpg_edit` + +Apply a natural-language edit to code, RPG, and dependency graph in sync. + +This command is independent from `/rpgkit.feature_edit` and `/rpgkit.update_rpg`. It does not edit `feature_tree.json`; it uses the current RPG feature graph as the authoritative entry point for code modifications. + +**Input:** edit instruction after the command + +**Input files:** `.rpgkit/data/rpg.json`, `.rpgkit/data/dep_graph.json` + +**Generated files:** + +- `.rpgkit/data/rpg_edit_impact.json` โ€” impact analysis output +- `.rpgkit/data/rpg_edit_plan.json` โ€” user-confirmed edit plan +- `.rpgkit/data/rpg_edit_code_result.json` โ€” code application result + +**Workflow:** + +1. **Pre-check** โ€” runs `.rpgkit/scripts/rpg_edit/validate.py --json` and stops if the RPG or dependency graph is unavailable. +2. **Locate target nodes** โ€” runs `.rpgkit/scripts/rpg_edit/locate.py --query "" --json` and selects existing nodes or nearest parent nodes for new features. +3. **Analyze impact** โ€” runs `.rpgkit/scripts/rpg_edit/impact.py --node-id ... --json` to identify affected nodes, callers, callees, and files. +4. **Optional visual reconnaissance** โ€” for UI/layout/style edits, probes the app with the browser helper when available. +5. **Mandatory code reconnaissance** โ€” reads affected files and searches related patterns before producing a plan. +6. **Generate and confirm plan** โ€” writes `.rpgkit/data/rpg_edit_plan.json` and asks the user to apply, cancel, revise, or inspect a node. +7. **Apply on a branch** โ€” creates a `rpg-edit/` branch only after a clean working-tree preflight. +8. **RPG-first apply** โ€” updates RPG feature changes first, then dispatches code changes, refreshes `dep_graph.json`, and folds graph updates into the branch commit. +9. **Test and review** โ€” runs smoke tests and impact review. +10. **Merge or preserve** โ€” merges into `main` only after tests pass; failed runs leave the branch for inspection. + +**Examples:** + +```text +/rpgkit.rpg_edit Add a last_login field to the User model and update it on login +/rpgkit.rpg_edit Add rate limiting to all API endpoints +/rpgkit.rpg_edit Refactor auth into separate registration and login modules +``` + +--- + +## RPG Encoder: Code to RPG + +The encoder works in the reverse direction from the forward pipeline. It takes an existing codebase and produces the same Repository Planning Graph structure used by RPG-Kit's planning, editing, and MCP tooling. + +### `/rpgkit.encode` + +Encode the current repository into an RPG from scratch. + +**Output:** + +- `.rpgkit/data/rpg.json` โ€” Repository Planning Graph +- `.rpgkit/data/dep_graph.json` โ€” code dependency graph used for incremental sync and edits + +**Process:** + +1. **Pre-check** โ€” runs `.rpgkit/scripts/rpg_encoder/check_encode.py --json`. +2. **Full encode** โ€” runs `.rpgkit/scripts/rpg_encoder/run_encode.py --json`. +3. **Next steps** โ€” suggests `/rpgkit.update_rpg` for incremental updates and MCP tools for exploration. + +If `rpg.json` already exists, the command asks whether to full re-encode, switch to `/rpgkit.update_rpg`, or quit. + +**Example:** + +```text +/rpgkit.encode +``` + +--- + +### `/rpgkit.update_rpg` + +Manually trigger an incremental RPG update when the automatic hook did not run or when the user wants an immediate foreground update. + +Under normal use, RPG-Kit installs a post-commit hook that updates the RPG in the background after each commit. This command is the manual fallback. + +**Input:** existing `.rpgkit/data/rpg.json` and a git repository with at least two commits + +**Output:** updated `.rpgkit/data/rpg.json` and `.rpgkit/data/dep_graph.json` + +**Process:** + +1. **Pre-check** โ€” runs `.rpgkit/scripts/rpg_encoder/check_encode.py --json` and stops if `rpg.json` is missing or corrupt. +2. **Commit baseline check** โ€” verifies `HEAD~1` exists. If there is no previous commit, run `/rpgkit.encode` instead. +3. **Incremental update** โ€” runs `.rpgkit/scripts/update_graphs.py update-rpg --json`, comparing the current workspace against `HEAD~1`, the same baseline used by the hook. +4. **Report result** โ€” displays node/edge deltas, functional areas, alignment status, and output path. + +Use this command when: + +- The post-commit hook failed or was skipped. +- `.rpgkit/logs/update_rpg.log` shows an error. +- The RPG seems stale and you want to force a synchronous update. + +**Example:** + +```text +/rpgkit.update_rpg +``` + +--- + +## MCP Server Tools + +RPG-Kit registers an MCP server named `rpg-tools` so AI agents can query `.rpgkit/data/rpg.json` during chat. The server exposes four read-only tools: + +| Tool | Description | +| ---- | ----------- | +| `search_rpg` | Search code entities or features by keyword, path, class, function, or feature name | +| `explore_rpg` | Traverse dependencies and call chains from a starting node | +| `get_node_detail` | Fetch full details for a function, class, file, or feature node | +| `list_rpg_tree` | Render the functional architecture as a tree | + +If `.rpgkit/data/rpg.json` is not available yet, the tools return an `rpg_unavailable` response that asks the agent to run `/rpgkit.encode`. + +See [configuration.md](configuration.md) for MCP registration, auto-approval, hooks, and initialization options. + +--- + +## Data Files + +All intermediate data is stored in `.rpgkit/data/`: + +| File | Produced by | Description | +| ---- | ----------- | ----------- | +| `feature_spec/` | `feature_spec` | Evidence and feature specification documents | +| `feature_spec.json` | `feature_spec` | Structured feature specification | +| `feature_build.json` | `feature_build` | Expanded feature tree | +| `feature_tree.json` | `feature_refactor` / `feature_edit` | Component architecture | +| `skeleton.json` | `build_skeleton` | File skeleton | +| `skeleton_summary.txt` | `build_skeleton` | Human-readable skeleton summary | +| `rpg.json` | `build_skeleton` / `encode`, then updated by later commands | Repository Planning Graph | +| `dep_graph.json` | `encode` / `update_rpg` / `rpg_edit` | Code dependency graph used for incremental sync and edits | +| `data_flow.json` | `build_data_flow` | Inter-component data flow DAG | +| `data_flow_viz.html` | `build_data_flow` | Data flow visualization | +| `base_classes.json` | `design_base_classes` | Shared base class definitions | +| `interfaces.json` | `design_interfaces` | Function/class interface definitions | +| `tasks.json` | `plan_tasks` | Dependency-ordered implementation batches | +| `code_gen_state.jsonl` | `code_gen` | Code generation progress state, append-only JSONL | +| `rpg_edit_impact.json` | `rpg_edit` | Impact analysis for a surgical edit | +| `rpg_edit_plan.json` | `rpg_edit` | Confirmed surgical edit plan | +| `rpg_edit_code_result.json` | `rpg_edit` | Code application result for a surgical edit | +| `trajectory/` | All scripts | Execution trajectory logs | + +### `rpg.json` โ€” The Repository Planning Graph + +`rpg.json` is the central artifact that ties the pipeline together. It can be created in either direction: + +1. **Forward:** `/rpgkit.build_skeleton` creates it from `feature_tree.json`; later planning and generation commands enrich it. +2. **Reverse:** `/rpgkit.encode` creates it from an existing codebase; `/rpgkit.update_rpg` keeps it aligned after commits. + +Subsequent commands update the same file: + +1. **`build_data_flow`** โ€” adds data-flow edges. +2. **`design_base_classes`** โ€” adds base-class relationship edges. +3. **`design_interfaces`** โ€” adds fine-grained dependency edges. +4. **`code_gen`** โ€” updates implementation status as code is generated. +5. **`rpg_edit`** โ€” applies targeted feature graph edits together with code and dependency graph changes. diff --git a/RPG-Kit/docs/configuration.md b/RPG-Kit/docs/configuration.md new file mode 100644 index 0000000..9b5d36b --- /dev/null +++ b/RPG-Kit/docs/configuration.md @@ -0,0 +1,247 @@ +# Configuration + +This document covers RPG-Kit configuration that is useful after installation: AI assistant setup, MCP registration, auto-approval, hooks, and initial encoding. + +## AI Assistant CLI Requirements + +RPG-Kit slash commands are executed by an AI coding agent. Before running `rpgkit init`, install and authenticate at least one supported AI assistant CLI. + +Currently verified assistants: + +| Agent | `--ai` value | Generated configuration | Requirement | +| ----- | ------------ | ----------------------- | ----------- | +| GitHub Copilot | `copilot` | `.github/`, `.vscode/` | Copilot CLI available and authenticated | +| Claude Code | `claude` | `.claude/` | Claude Code CLI available and authenticated | + +Use `rpgkit check` to verify required local tools. + +```bash +rpgkit check +``` + +If the selected AI assistant is not found, install and authenticate it, then rerun `rpgkit init` or `rpgkit update`. + +## Initialization Options + +### AI assistant selection + +```bash +rpgkit init my-project --ai claude +rpgkit init my-project --ai copilot +``` + +If `--ai` is omitted in an interactive terminal, RPG-Kit prompts for a supported assistant. + +### Script type + +```bash +rpgkit init my-project --script sh +rpgkit init my-project --script ps +``` + +`sh` installs POSIX shell-oriented command snippets. `ps` installs PowerShell-oriented snippets. + +### MCP registration + +By default, `rpgkit init` registers the RPG-Kit MCP server for the selected assistant. + +```bash +rpgkit init my-project +``` + +Pass `--no-mcp` to skip MCP registration: + +```bash +rpgkit init my-project --no-mcp +rpgkit update --no-mcp +``` + +Skipping MCP means the slash-command pipeline still works, but the AI assistant will not get the `rpg-tools` graph-query tools automatically. + +### Initial encode + +The MCP tools query `.rpgkit/data/rpg.json`. For existing codebases, that file is created by the encoder. + +`rpgkit init` supports: + +```bash +rpgkit init --here --encode +rpgkit init --here --no-encode +``` + +Behavior: + +- `--encode` runs the encoder at the end of init without prompting. +- `--no-encode` skips the encoder prompt. +- If neither flag is provided, RPG-Kit may prompt in an interactive terminal when Python code is present. + +You can always run the encoder later from the AI assistant: + +```text +/rpgkit.encode +``` + +## MCP Server + +RPG-Kit's MCP server is named `rpg-tools`. It reads `.rpgkit/data/rpg.json` and exposes read-only graph-query tools to the AI assistant. + +| Tool | Purpose | +| ---- | ------- | +| `search_rpg` | Search code entities or features by keyword, path, function, class, or feature name | +| `explore_rpg` | Traverse dependencies and call chains from a starting node | +| `get_node_detail` | Fetch details for a specific node, optionally including source code | +| `list_rpg_tree` | Render the functional architecture as a tree | + +If `.rpgkit/data/rpg.json` does not exist yet, the tools return an `rpg_unavailable` response with a next step telling the agent to run `/rpgkit.encode`. + +## Assistant Configuration Files + +### Claude Code + +For Claude Code, RPG-Kit writes command definitions and settings under `.claude/`: + +```text +.claude/ +โ”œโ”€โ”€ commands/ # /rpgkit.* command definitions +โ””โ”€โ”€ settings.json # permissions and MCP auto-approval +``` + +The settings file grants project-scoped permissions needed by RPG-Kit commands, including access to the `rpg-tools` MCP server. Review `.claude/settings.json` if your team wants stricter local permission prompts. + +### GitHub Copilot / VS Code + +For Copilot, RPG-Kit writes agent instructions under `.github/` and VS Code MCP configuration under `.vscode/`: + +```text +.github/ +โ”œโ”€โ”€ agents/ # rpgkit.* agent definitions +โ””โ”€โ”€ prompts/ # companion prompts +.vscode/ +โ””โ”€โ”€ mcp.json # rpg-tools registration +``` + +Open the project in VS Code after initialization so the workspace MCP configuration is available to Copilot. + +## Auto-approval and Scope + +RPG-Kit pre-authorizes the `rpg-tools` MCP server where the selected assistant supports project-scoped permissions. The goal is to avoid prompting on every graph query during chat. + +Scope rules: + +- Configuration is written into the project that ran `rpgkit init` or `rpgkit update`. +- User-level assistant settings are not modified. +- Passing `--no-mcp` skips MCP registration and related auto-approval entries. + +## Git Hooks and Incremental Updates + +RPG-Kit installs local git hooks to keep the RPG aligned with code changes. + +The important hook behavior is: + +- After commits, RPG-Kit can run an incremental update in the background. +- The update refreshes `.rpgkit/data/rpg.json` and `.rpgkit/data/dep_graph.json`. +- Logs are written to `.rpgkit/logs/update_rpg.log`. + +Manual fallback: + +```text +/rpgkit.update_rpg +``` + +Use `/rpgkit.update_rpg` when: + +- The hook failed. +- The hook was skipped. +- You want to force a foreground update and inspect the result. + +If the RPG seems significantly stale or corrupted, run a full encode instead: + +```text +/rpgkit.encode +``` + +## Updating an Existing RPG-Kit Project + +Run `rpgkit update` from the project root to refresh scripts, command definitions, MCP configuration, gitignore rules, and hooks. + +```bash +rpgkit update +rpgkit update --ai claude +rpgkit update --pre +rpgkit update --no-mcp +``` + +`rpgkit update` auto-detects the existing assistant configuration when possible. + +## Network and Release Options + +```bash +rpgkit init my-project --github-token $GITHUB_TOKEN +rpgkit init my-project --pre +rpgkit init my-project --skip-tls +rpgkit init my-project --debug +``` + +| Option | Description | +| ------ | ----------- | +| `--github-token ` | Uses a GitHub token for API requests, useful for private repos or rate limits | +| `--pre` | Downloads the latest pre-release template instead of the latest stable release | +| `--skip-tls` | Skips SSL/TLS verification; use only for constrained environments | +| `--debug` | Prints verbose diagnostic output for network and extraction failures | + +`GH_TOKEN` and `GITHUB_TOKEN` are also recognized for GitHub API requests. + +## Troubleshooting + +### AI assistant CLI not found + +Run: + +```bash +rpgkit check +``` + +Install and authenticate the missing assistant CLI, or rerun init with the assistant you want: + +```bash +rpgkit init my-project --ai claude +rpgkit init my-project --ai copilot +``` + +### MCP tools say `rpg_unavailable` + +The MCP server is configured, but `.rpgkit/data/rpg.json` has not been created yet. Run: + +```text +/rpgkit.encode +``` + +### Incremental update failed + +Check: + +```bash +tail -n 200 .rpgkit/logs/update_rpg.log +``` + +Then run: + +```text +/rpgkit.update_rpg +``` + +If the graph is corrupted or too stale, run `/rpgkit.encode` for a full rebuild. + +### Template download hits rate limits or private repo access errors + +Use a token: + +```bash +rpgkit init my-project --github-token $GITHUB_TOKEN +``` + +or set an environment variable: + +```bash +export GH_TOKEN=your_token +``` diff --git a/RPG-Kit/docs/project-structure.md b/RPG-Kit/docs/project-structure.md new file mode 100644 index 0000000..8a3e466 --- /dev/null +++ b/RPG-Kit/docs/project-structure.md @@ -0,0 +1,154 @@ +# Project Structure + +## Workspace == Repo + +RPG-Kit installs alongside your project code: the directory you run `rpgkit init` in, also called the workspace root, **is** the project repository root. There is no separate `repo/` subdirectory. This means: + +- `rpgkit init my-project` creates `my-project/` containing both your source code (`src/`, `tests/`, `docs/`) and RPG-Kit's runtime files (`.rpgkit/`, `.claude/`, `.github/`, `.vscode/`, depending on the selected agent). +- `rpgkit init --here` inside an existing git repository adds RPG-Kit on top of the existing code without moving the repository. +- A single `.git` repository tracks user-owned code and any RPG-Kit files the user chooses to commit. Runtime data under `.rpgkit/data/` is gitignored by default. + +## After `rpgkit init` + +Running `rpgkit init` downloads a template and creates a structure like this: + +```text +my-project/ +โ”œโ”€โ”€ docs/ # Optional requirement docs for /rpgkit.feature_spec +โ”‚ โ”œโ”€โ”€ project_charter.md # Auto-detected when no description is provided +โ”‚ โ””โ”€โ”€ ... +โ”œโ”€โ”€ .claude/ # Claude Code configuration when --ai claude +โ”‚ โ”œโ”€โ”€ commands/ # /rpgkit.* command definitions +โ”‚ โ”‚ โ”œโ”€โ”€ rpgkit.feature_spec.md +โ”‚ โ”‚ โ”œโ”€โ”€ rpgkit.feature_build.md +โ”‚ โ”‚ โ”œโ”€โ”€ rpgkit.feature_refactor.md +โ”‚ โ”‚ โ”œโ”€โ”€ rpgkit.feature_edit.md +โ”‚ โ”‚ โ”œโ”€โ”€ rpgkit.build_skeleton.md +โ”‚ โ”‚ โ”œโ”€โ”€ rpgkit.build_data_flow.md +โ”‚ โ”‚ โ”œโ”€โ”€ rpgkit.design_base_classes.md +โ”‚ โ”‚ โ”œโ”€โ”€ rpgkit.design_interfaces.md +โ”‚ โ”‚ โ”œโ”€โ”€ rpgkit.plan_tasks.md +โ”‚ โ”‚ โ”œโ”€โ”€ rpgkit.code_gen.md +โ”‚ โ”‚ โ”œโ”€โ”€ rpgkit.rpg_edit.md +โ”‚ โ”‚ โ”œโ”€โ”€ rpgkit.encode.md +โ”‚ โ”‚ โ””โ”€โ”€ rpgkit.update_rpg.md +โ”‚ โ””โ”€โ”€ settings.json # Permissions and MCP auto-approval +โ”œโ”€โ”€ .github/ # Copilot configuration when --ai copilot +โ”‚ โ”œโ”€โ”€ agents/ # rpgkit.* agent definitions +โ”‚ โ””โ”€โ”€ prompts/ # companion prompts +โ”œโ”€โ”€ .vscode/ # Copilot/VS Code configuration when applicable +โ”‚ โ”œโ”€โ”€ mcp.json # MCP server registration +โ”‚ โ””โ”€โ”€ tasks.json # Optional workspace tasks +โ””โ”€โ”€ .rpgkit/ + โ”œโ”€โ”€ scripts/ # Pipeline scripts and support packages + โ”‚ โ”œโ”€โ”€ feature_spec_to_json.py # Feature specification + โ”‚ โ”œโ”€โ”€ feature_build.py + โ”‚ โ”œโ”€โ”€ feature_build_validation.py + โ”‚ โ”œโ”€โ”€ feature_refactor.py + โ”‚ โ”œโ”€โ”€ feature_refactor_validation.py + โ”‚ โ”œโ”€โ”€ feature_edit.py + โ”‚ โ”œโ”€โ”€ feature_edit_validation.py + โ”‚ โ”œโ”€โ”€ build_skeleton.py # RPG construction + โ”‚ โ”œโ”€โ”€ check_skeleton.py + โ”‚ โ”œโ”€โ”€ summary_skeleton.py + โ”‚ โ”œโ”€โ”€ build_data_flow.py + โ”‚ โ”œโ”€โ”€ check_data_flow.py + โ”‚ โ”œโ”€โ”€ generate_viz.py + โ”‚ โ”œโ”€โ”€ design_base_classes.py + โ”‚ โ”œโ”€โ”€ check_base_classes.py + โ”‚ โ”œโ”€โ”€ design_interfaces.py + โ”‚ โ”œโ”€โ”€ check_interfaces.py + โ”‚ โ”œโ”€โ”€ plan_tasks.py + โ”‚ โ”œโ”€โ”€ check_tasks.py + โ”‚ โ”œโ”€โ”€ init_codebase.py # Code generation + โ”‚ โ”œโ”€โ”€ run_batch.py # TDD batch executor, final test, global review + โ”‚ โ”œโ”€โ”€ check_code_gen.py + โ”‚ โ”œโ”€โ”€ update_graphs.py # Incremental RPG and dependency graph updates + โ”‚ โ”œโ”€โ”€ mcp_server.py # rpg-tools MCP server + โ”‚ โ”œโ”€โ”€ code_gen/ # Code generation subpackage + โ”‚ โ”œโ”€โ”€ common/ # Shared utilities and path definitions + โ”‚ โ”œโ”€โ”€ feature/ # Feature processing + โ”‚ โ”œโ”€โ”€ func_design/ # Function/interface design agents + โ”‚ โ”œโ”€โ”€ skeleton/ # Skeleton building + โ”‚ โ”œโ”€โ”€ rpg/ # RPG models, services, graph query engine + โ”‚ โ”œโ”€โ”€ rpg_edit/ # Surgical RPG/code edit pipeline + โ”‚ โ””โ”€โ”€ rpg_encoder/ # Reverse encoder + โ”‚ โ”œโ”€โ”€ check_encode.py # Pre-check rpg.json state + โ”‚ โ”œโ”€โ”€ run_encode.py # Full encode + โ”‚ โ”œโ”€โ”€ run_update_rpg.py # Incremental update implementation + โ”‚ โ”œโ”€โ”€ rpg_encoding.py # RPG encoding pipeline + โ”‚ โ”œโ”€โ”€ rpg_evolution.py # Incremental RPG evolution + โ”‚ โ”œโ”€โ”€ semantic_parsing.py # Semantic feature extraction + โ”‚ โ””โ”€โ”€ refactor_tree.py # Feature tree refactoring + โ”œโ”€โ”€ data/ # Runtime artifacts, populated by commands + โ”œโ”€โ”€ logs/ # Per-stage logs + โ””โ”€โ”€ reports/ # Review and diagnostic reports when generated +``` + +The agent configuration directory varies by the selected AI assistant and release package. For the verified CLI path, `--ai claude` installs `.claude/commands/`, while `--ai copilot` installs `.github/agents/`, `.github/prompts/`, and `.vscode/mcp.json`. + +Command definitions are installed into the AI-agent-specific folder. Normal users should not need to edit `.rpgkit/scripts/` or `.rpgkit/data/` manually. + +## Generated Data Files + +As you run `/rpgkit.*` commands, `.rpgkit/data/` is progressively populated: + +| Generated file | Command | Description | +| -------------- | ------- | ----------- | +| `feature_spec/` | `feature_spec` | Evidence and feature specification documents | +| `feature_spec.json` | `feature_spec` | Structured feature specification | +| `feature_build.json` | `feature_build` | Expanded feature tree | +| `feature_tree.json` | `feature_refactor` / `feature_edit` | Component architecture | +| `skeleton.json` | `build_skeleton` | File skeleton | +| `skeleton_summary.txt` | `build_skeleton` | Human-readable skeleton summary | +| `rpg.json` | `build_skeleton` / `encode`, then updated by later commands | Repository Planning Graph | +| `dep_graph.json` | `encode` / `update_rpg` / `rpg_edit` | Code dependency graph used for incremental sync and edits | +| `data_flow.json` | `build_data_flow` | Inter-component data flow DAG | +| `data_flow_viz.html` | `build_data_flow` | Data flow visualization | +| `base_classes.json` | `design_base_classes` | Shared base class definitions | +| `interfaces.json` | `design_interfaces` | Function/class interface definitions | +| `tasks.json` | `plan_tasks` | Implementation task batches | +| `code_gen_state.jsonl` | `code_gen` | Code generation progress state, append-only JSONL | +| `rpg_edit_impact.json` | `rpg_edit` | Impact analysis for a surgical edit | +| `rpg_edit_plan.json` | `rpg_edit` | Confirmed surgical edit plan | +| `rpg_edit_code_result.json` | `rpg_edit` | Code application result for a surgical edit | +| `trajectory/` | All scripts | Execution trajectory logs | + +## `rpg.json` โ€” The Repository Planning Graph + +`rpg.json` is the central graph artifact used by the forward pipeline, reverse encoder, MCP tools, incremental update hooks, and `/rpgkit.rpg_edit`. + +It can be created in either direction: + +1. **Forward pipeline:** `/rpgkit.build_skeleton` creates `rpg.json` from `feature_tree.json`. +2. **Reverse encoder:** `/rpgkit.encode` creates `rpg.json` from an existing codebase. + +Later commands enrich or maintain the same file: + +1. **`build_data_flow`** โ€” adds data-flow edges between components. +2. **`design_base_classes`** โ€” adds base-class relationship edges. +3. **`design_interfaces`** โ€” adds fine-grained dependency edges such as inheritance, invocation, and references. +4. **`code_gen`** โ€” updates implementation status as code is generated. +5. **`update_rpg`** โ€” incrementally updates the RPG after commits when the hook is skipped or needs to be run manually. +6. **`rpg_edit`** โ€” applies targeted feature graph edits together with code and dependency graph changes. + +## `dep_graph.json` โ€” Code Dependency Graph + +`dep_graph.json` stores the code-level dependency graph used by the encoder, incremental update path, and surgical edit path. It is maintained alongside `rpg.json` so RPG-Kit can keep feature-level structure and code-level dependencies aligned. + +Typical producers and updaters: + +- `/rpgkit.encode` creates the initial dependency graph when encoding an existing codebase. +- The post-commit hook and `/rpgkit.update_rpg` refresh it after code changes. +- `/rpgkit.rpg_edit` refreshes it after applying targeted code edits. + +## Runtime Logs and Reports + +Runtime logs are written under `.rpgkit/logs/`, for example: + +- `.rpgkit/logs/encode.log` +- `.rpgkit/logs/update_rpg.log` +- `.rpgkit/logs/feature_build.log` +- `.rpgkit/logs/build_data_flow.log` + +Execution traces are written under `.rpgkit/data/trajectory/`. Review or diagnostic artifacts may be written under `.rpgkit/reports/` when a command generates them. diff --git a/RPG-Kit/pyproject.toml b/RPG-Kit/pyproject.toml new file mode 100644 index 0000000..545a3a5 --- /dev/null +++ b/RPG-Kit/pyproject.toml @@ -0,0 +1,41 @@ +[project] +name = "rpgkit-cli" +version = "0.1.2" +description = "RPG-Kit CLI - A tool to generate feature trees for repository planning and code generation." +requires-python = ">=3.12" +dependencies = [ + "typer", + "rich", + "httpx[socks]", + "platformdirs", + "readchar", + "truststore>=0.10.4", + "pydantic>=2.0.0", + "pytest", + "tree-sitter", + "tree-sitter-json", + "networkx", + "rank_bm25", + "rapidfuzz", + "libcst", + "json5", + "tiktoken", + "tqdm", + "openai>=1.0.0", + "anthropic>=0.20.0", + "mcp>=1.0.0", + "pyyaml>=6.0" +] + +[project.scripts] +rpgkit = "rpgkit_cli:main" + +[project.urls] +Repository = "https://github.com/microsoft/RPG-ZeroRepo" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/rpgkit_cli"] diff --git a/RPG-Kit/scripts/build_data_flow.py b/RPG-Kit/scripts/build_data_flow.py new file mode 100644 index 0000000..1294e81 --- /dev/null +++ b/RPG-Kit/scripts/build_data_flow.py @@ -0,0 +1,385 @@ +#!/usr/bin/env python3 +"""Build Data Flow Script - Implementation Level Step 2. + +Function: Design inter-component data flow as a directed acyclic graph (DAG) +- Reads skeleton.json to get component information +- Designs how data flows between components (what data, types, transformations) +- Validates the data flow graph is acyclic +- Generates subtree processing order for later steps +- Adds data flow dependencies as edges to repo_rpg.json + +Input: .rpgkit/skeleton.json (file structure with component info) +Output: .rpgkit/data_flow.json (data flow edges and subtree order) + .rpgkit/repo_rpg.json (updated with data flow edges) +""" + +import json +import logging +import argparse +from pathlib import Path +from typing import Dict, Any, Optional + +# Import trajectory module +from common.trajectory import Trajectory, load_or_create_trajectory +from common import ( + get_skeleton_tree_string, + extract_functional_areas_from_skeleton, + extract_component_directories, + print_unicode_table, + get_repo_info_from_files, +) + +# Import the DataFlowAgent +from func_design.data_flow_agent import DataFlowAgent + +# Import RPG models for adding edges to repo_rpg.json +from rpg import EdgeType + +# Import centralized paths +from common.paths import SKELETON_FILE, DATA_FLOW_FILE, REPO_RPG_FILE +from common import get_project_background_context + + +# ============================================================================ +# RPG Update Function +# ============================================================================ + +def update_rpg_with_data_flow(data_flow_data: Dict[str, Any], rpg_path: Path): + """Update RPG with data flow edges. + + Args: + data_flow_data: Result dict containing data_flow list + rpg_path: Path to the repo_rpg.json file + """ + if not rpg_path.exists(): + logging.info(f"Skipping repo_rpg.json update: file not found at {rpg_path}") + return + + data_flow = data_flow_data.get("data_flow", []) + if not data_flow: + return + + from rpg.service import RPGService + + try: + svc = RPGService.load(rpg_path) + except Exception as e: + logging.error(f"Failed to load RPG: {e}") + return + + # Cleanup old edges first + svc.refresh_stage_edges("build_data_flow") + + added = 0 + + for edge_data in data_flow: + source_name = edge_data.get("source", "") + target_name = edge_data.get("target", "") + data_id = edge_data.get("data_id", "") + data_type = edge_data.get("data_type", "") + + # Find source node + src_node = svc.find_functional_area_by_name(source_name) + if not src_node: + logging.warning(f"Source component not found: {source_name}") + continue + + # Find target node + dst_node = svc.find_functional_area_by_name(target_name) + if not dst_node: + logging.warning(f"Target component not found: {target_name}") + continue + + # Add data flow edge with dedup + was_added = svc.add_dependency_edge( + src_node, dst_node, + EdgeType.REFERENCES, + "build_data_flow", + description=edge_data.get("transformation", ""), + content=f"data_id={data_id}, data_type={data_type}", + ) + if was_added: + added += 1 + logging.info(f"Added data flow edge: {source_name} -> {target_name} ({data_id})") + else: + logging.info(f"Edge already exists (by signature): {source_name} -> {target_name}") + + svc.save(rpg_path) + if added > 0: + print(f"[OK] Added {added} data flow edges to: {rpg_path}") + else: + print(f"No new data flow edges to add to: {rpg_path}") + + +# ============================================================================ +# Data Flow Builder +# ============================================================================ + +class DataFlowBuilder: + """Build data flow using DataFlowAgent.""" + + def __init__( + self, + max_iterations: int = 5, + trajectory: Optional[Trajectory] = None + ): + self.max_iterations = max_iterations + self.trajectory = trajectory + self.logger = logging.getLogger(__name__) + self._current_step_id: Optional[int] = None + + def build(self, skeleton: Dict[str, Any]) -> Dict[str, Any]: + """Build data flow from skeleton. + + Args: + skeleton: The skeleton.json data + + Returns: + Dict containing data_flow, subtree_order, components, etc. + """ + # Get repository info + repo_name, repo_info = get_repo_info_from_files() + + # Enrich repo_info with project background / technology context + project_background = get_project_background_context() + if project_background and project_background.strip(): + repo_info = f"{repo_info}\n\n{project_background}" + + # Extract functional areas (components) from skeleton + functional_areas = extract_functional_areas_from_skeleton(skeleton) + component_dirs = extract_component_directories(skeleton) + + if len(functional_areas) < 2: + self.logger.warning("Less than 2 components found, skipping data flow design") + return { + "data_flow": [], + "subtree_order": functional_areas, + "components": functional_areas, + "warning": "Not enough components for data flow" + } + + print("\n" + "=" * 70) + print("DATA FLOW DESIGN") + print("=" * 70) + print(f"Repository: {repo_name}") + print(f"Components: {len(functional_areas)}") + for area in functional_areas: + dir_info = f" [{component_dirs.get(area, '')}]" if area in component_dirs else "" + print(f" - {area}{dir_info}") + print("=" * 70) + + # Record step start + if self.trajectory: + step = self.trajectory.add_step( + "design_data_flow", + f"Design data flow for {len(functional_areas)} components" + ) + self._current_step_id = step.step_id + self.trajectory.start_step(step.step_id) + + # Get skeleton tree for context + skeleton_tree = get_skeleton_tree_string(skeleton, max_depth=3) + + # Initialize agent and run + agent = DataFlowAgent( + max_iterations=self.max_iterations, + logger=self.logger, + trajectory=self.trajectory, + step_id=self._current_step_id + ) + + result = agent.build_data_flow( + repo_name=repo_name, + repo_info=repo_info, + functional_areas=functional_areas, + component_dirs=component_dirs, + skeleton_tree=skeleton_tree + ) + + # Add components to result + result["components"] = functional_areas + + # Update trajectory + if self.trajectory and self._current_step_id: + if result.get("success"): + self.trajectory.complete_step( + self._current_step_id, + {"edge_count": len(result.get("data_flow", []))} + ) + else: + self.trajectory.fail_step( + self._current_step_id, + result.get("error", "Unknown error") + ) + + return result + + def print_summary(self, result: Dict[str, Any]) -> None: + """Print summary of data flow design.""" + print("\n" + "=" * 60) + print("DATA FLOW DESIGN SUMMARY") + print("=" * 60) + + components = result.get("components", []) + data_flow = result.get("data_flow", []) + subtree_order = result.get("subtree_order", []) + + print(f"\nComponents: {len(components)}") + print(f"Data Flow Edges: {len(data_flow)}") + + if subtree_order: + print(f"\nSubtree Processing Order:") + for i, comp in enumerate(subtree_order, 1): + print(f" {i}. {comp}") + + if data_flow: + rows = [] + for edge in data_flow: + source = str(edge.get("source", ""))[:20] + target = str(edge.get("target", ""))[:20] + data_id = str(edge.get("data_id", ""))[:25] + data_type = str(edge.get("data_type", ""))[:20] + rows.append([source, "โ†’", target, data_id, data_type]) + + print_unicode_table( + headers=["Source", "", "Target", "Data ID", "Data Type"], + rows=rows, + title="Data Flow Edges" + ) + + if result.get("error"): + print(f"\n[WARNING] Error: {result['error']}") + if result.get("warning"): + print(f"\n[WARNING] Warning: {result['warning']}") + + print("=" * 60) + + +# ============================================================================ +# Main Entry Point +# ============================================================================ + +def main(): + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Build inter-component data flow graph" + ) + parser.add_argument( + "--skeleton", "-s", + type=str, + default=str(SKELETON_FILE), + help=f"Input skeleton file (default: {SKELETON_FILE})" + ) + parser.add_argument( + "--output", "-o", + type=str, + default=str(DATA_FLOW_FILE), + help=f"Output data flow file (default: {DATA_FLOW_FILE})" + ) + parser.add_argument( + "--repo-rpg", "-r", + type=str, + default=str(REPO_RPG_FILE), + help=f"Repo RPG file to update with data flow edges (default: {REPO_RPG_FILE})" + ) + parser.add_argument( + "--max-iterations", "-m", + type=int, + default=5, + help="Max iterations for valid design (default: 5)" + ) + parser.add_argument( + "--verbose", "-v", + action="store_true", + help="Enable verbose logging" + ) + parser.add_argument( + "--no-trajectory", + action="store_true", + help="Disable trajectory recording" + ) + + args = parser.parse_args() + + # Setup logging + log_level = logging.DEBUG if args.verbose else logging.INFO + logging.basicConfig( + level=log_level, + format="%(asctime)s - %(levelname)s - %(message)s" + ) + logger = logging.getLogger(__name__) + + # Load input + input_path = Path(args.skeleton) + if not input_path.exists(): + logger.error(f"Input file not found: {input_path}") + print(f"ERROR: Input file not found: {input_path}") + print("Please run /rpgkit.build_skeleton first.") + return 1 + + with open(input_path, "r", encoding="utf-8") as f: + skeleton = json.load(f) + + # Initialize trajectory + trajectory = None + if not args.no_trajectory: + trajectory = load_or_create_trajectory("build_data_flow") + + if trajectory.is_resumable(): + print(f"\n[WARNING] Found in-progress execution from {trajectory.started_at}") + print(f" Resume point: {trajectory.resume_point.step_name}") + print(" (Use --no-trajectory to start fresh)") + + trajectory.start(metadata={ + "input_file": str(input_path), + "output_file": str(args.output), + "max_iterations": args.max_iterations + }) + + try: + # Build data flow + builder = DataFlowBuilder( + max_iterations=args.max_iterations, + trajectory=trajectory + ) + + result = builder.build(skeleton) + + # Save output + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + + with open(output_path, "w", encoding="utf-8") as f: + json.dump(result, f, indent=2, ensure_ascii=False) + + logger.info(f"[OK] Data flow saved to: {output_path}") + builder.print_summary(result) + print(f"\n[OK] Data flow saved to: {output_path}") + + # Add data flow edges to repo_rpg.json + update_rpg_with_data_flow(result, Path(args.repo_rpg)) + + if not result.get("success", True) and "error" in result: + if trajectory: + trajectory.fail(result["error"]) + return 1 + + # Mark trajectory as complete + if trajectory: + trajectory.complete(metadata={ + "components": len(result.get("components", [])), + "edges": len(result.get("data_flow", [])) + }) + print(f"[OK] Trajectory saved to: {trajectory.trajectory_file}") + + return 0 + + except Exception as e: + logger.error(f"Build failed: {e}") + if trajectory: + trajectory.fail(str(e)) + raise + + +if __name__ == "__main__": + exit(main()) diff --git a/RPG-Kit/scripts/build_skeleton.py b/RPG-Kit/scripts/build_skeleton.py new file mode 100644 index 0000000..0cf9b71 --- /dev/null +++ b/RPG-Kit/scripts/build_skeleton.py @@ -0,0 +1,716 @@ +#!/usr/bin/env python3 +"""Build Skeleton Script - Implementation Level Step 1. + +Function: Design repository file structure from component architecture +- Step 1: Build RPG (Repository Program Graph) from component data +- Step 2: Generate directory structure mapping components to directories +- Step 3: Assign features to specific Python files using professional prompts + +Input: .rpgkit/feature_tree.json (component list from refactor step) +Output: .rpgkit/skeleton.json (tree-structured file skeleton with feature assignments) + .rpgkit/repo_rpg.json (intermediate RPG structure) +""" + +import json +import logging +import argparse +from pathlib import Path +from typing import Dict, Any, Optional + +# Import trajectory module +from common.trajectory import Trajectory, load_or_create_trajectory + +# Import required modules +from rpg import RPG +from rpg.builder import create_initial_rpg, load_refactor_feature_data, get_rpg_statistics +from skeleton.skeleton_models import RepoSkeleton, DirectoryNode, normalize_path +from skeleton.file_designer import FileDesigner + +# Import centralized paths +from common.paths import ( + FEATURE_TREE_FILE, + SKELETON_FILE, + REPO_RPG_FILE, +) +from common import print_unicode_table +from pathlib import Path as PPath +from rpg import NodeMetaData +from skeleton.skeleton_prompts import extract_features_from_subtree + + +# ============================================================================ +# Utility Functions +# ============================================================================ + +def convert_skeleton_to_rpgkit_format(skeleton: RepoSkeleton, rpg: RPG) -> Dict[str, Any]: + """Convert skeleton format to RPG-Kit's expected format. + + This ensures compatibility with existing validation and summary scripts. + """ + + def convert_node(node): + """Convert skeleton node to RPG-Kit format recursively.""" + result = { + "type": "directory" if node.is_dir else "file", + "name": node.name, + "path": node.path, + } + + if node.is_dir: + result["children"] = [convert_node(child) for child in node.children()] + else: + result["feature_paths"] = getattr(node, 'feature_paths', []) + # Find component name from feature paths + if hasattr(node, 'feature_paths') and node.feature_paths: + # Extract component from first feature path + first_feature = node.feature_paths[0] + if '/' in first_feature: + component = first_feature.split('/')[0] + result["component"] = component + + return result + + # Build RPG-Kit compatible output + output = { + "repository_name": rpg.repo_name, + "repository_purpose": rpg.repo_info, + "root": convert_node(skeleton.root), + "statistics": { + "total_components": len([n for n in rpg.nodes.values() if n.level == 1]), + "total_features": sum(len(f.feature_paths) for f in skeleton.get_all_file_nodes()), + "total_files": len(skeleton.get_all_file_nodes()), + "total_directories": len([n for n in skeleton.path_to_node.values() if n.is_dir]), + } + } + + return output + + +# ============================================================================ +# Skeleton Builder +# ============================================================================ + +class SkeletonBuilder: + """Skeleton builder.""" + + def __init__(self, max_iterations: int = 10, trajectory: Trajectory = None): + self.max_iterations = max_iterations + self.logger = logging.getLogger(__name__) + self.trajectory = trajectory + + # Build state + self.repo_name = "" + self.repo_data = {} + self.rpg = None + self.skeleton = None + self.file_designer = None + + # Statistics + self.stats = { + "total_features": 0, + "assigned_features": 0, + "total_files": 0, + "total_components": 0, + "llm_calls": 0 + } + + # Trajectory step tracking + self._current_step_id: Optional[int] = None + + def build(self, input_data: Dict[str, Any]) -> Dict[str, Any]: + """Execute complete skeleton building workflow.""" + self.repo_data = input_data + self.repo_name = input_data.get("repository_name", "project") + components = input_data.get("components", []) + + if not components: + return {"error": "No components found in input"} + + print("\n" + "=" * 70) + print("SKELETON BUILDING") + print("=" * 70) + print(f"Repository: {self.repo_name}") + print(f"Components: {len(components)}") + + # Count total features + self.stats["total_components"] = len(components) + self.stats["total_features"] = sum( + self._count_features_in_component(comp.get("refactored_subtree", {})) + for comp in components + ) + print(f"Total Features: {self.stats['total_features']}") + print("=" * 70) + + # Initialize trajectory metadata + if self.trajectory: + self.trajectory.metadata.update({ + "repository_name": self.repo_name, + "component_count": len(components), + "total_features": self.stats["total_features"] + }) + self.trajectory.save() + + try: + # Step 1: Build RPG from component data + print("\n[Step 1] Building RPG (Repository Program Graph)...") + step1 = None + if self.trajectory: + step1 = self.trajectory.add_step("build_rpg", "Build RPG from component architecture") + self.trajectory.start_step(step1.step_id) + self._current_step_id = step1.step_id + + if not self._step1_build_rpg(): + if self.trajectory and step1: + self.trajectory.fail_step(step1.step_id, "RPG building failed") + return {"error": "RPG building failed"} + + if self.trajectory and step1: + rpg_stats = get_rpg_statistics(self.rpg) + self.trajectory.complete_step(step1.step_id, { + "rpg_statistics": rpg_stats + }) + + # Step 2: Generate skeleton using FileDesigner + print("\n[Step 2] Generating skeleton with FileDesigner...") + step2 = None + if self.trajectory: + step2 = self.trajectory.add_step("file_design", "Generate skeleton using FileDesigner") + self.trajectory.start_step(step2.step_id) + self._current_step_id = step2.step_id + + if not self._step2_file_design(): + if self.trajectory and step2: + self.trajectory.fail_step(step2.step_id, "File design failed") + return {"error": "File design failed"} + + if self.trajectory and step2: + skeleton_stats = self.skeleton.get_statistics() + self.trajectory.complete_step(step2.step_id, { + "skeleton_statistics": skeleton_stats + }) + + # Step 2.5: Update RPG paths from skeleton + print("\n[Step 2.5] Updating RPG paths from skeleton...") + paths_updated = self._update_rpg_paths_from_skeleton() + print(f" [OK] Updated {paths_updated} nodes with path information") + + # Step 3: Convert and save results + print("\n[Step 3] Converting to RPG-Kit format...") + result = self._build_result() + + # Save updated RPG (with directory assignments) + self.rpg.save_json(str(REPO_RPG_FILE), indent=2) + print(f" [OK] Updated RPG saved to: {REPO_RPG_FILE}") + + self._print_summary() + + return result + + except Exception as e: + self.logger.error(f"Skeleton building failed: {e}") + if self.trajectory: + self.trajectory.fail_step(self._current_step_id or 0, f"Build failed: {e}") + return {"error": str(e)} + + def _step1_build_rpg(self) -> bool: + """Step 1: Build RPG from component data.""" + try: + self.rpg = create_initial_rpg(self.repo_data) + + # Save RPG to intermediate file + self.rpg.save_json(str(REPO_RPG_FILE), indent=2) + + # Print RPG statistics + stats = get_rpg_statistics(self.rpg) + print(" [OK] RPG built successfully:") + print(f" - Total nodes: {stats['total_nodes']}") + print(f" - Node types: {dict(stats['node_types'])}") + print(f" - Level distribution: {dict(stats['levels'])}") + print(f" [OK] RPG saved to: {REPO_RPG_FILE}") + + return True + + except Exception as e: + self.logger.error(f"RPG building failed: {e}") + return False + + def _step2_file_design(self) -> bool: + """Step 2: Generate skeleton using FileDesigner.""" + try: + # Initialize FileDesigner with trajectory support + self.file_designer = FileDesigner( + rpg=self.rpg, + max_iterations=self.max_iterations, + trajectory=self.trajectory, + step_id=self._current_step_id + ) + + # Run file design process + self.skeleton, updated_rpg, design_results = self.file_designer.run() + + if not design_results.get("success", False): + self.logger.error("FileDesigner failed") + return False + + # Update RPG with changes from FileDesigner + self.rpg = updated_rpg + + # Update statistics + self.stats.update({ + "assigned_features": design_results.get("features_assigned", 0), + "total_files": design_results.get("files_created", 0), + "llm_calls": design_results.get("statistics", {}).get("llm_calls_made", 0), + "validation_retries": design_results.get("statistics", {}).get("validation_retries", 0) + }) + + print(" [OK] Skeleton generated successfully:") + print(f" - Components processed: {design_results.get('components_processed', 0)}") + print(f" - Features assigned: {self.stats['assigned_features']}") + print(f" - Files created: {self.stats['total_files']}") + print(f" - LLM calls made: {self.stats['llm_calls']}") + if self.stats.get('validation_retries', 0) > 0: + print(f" - Validation retries: {self.stats['validation_retries']}") + + return True + + except Exception as e: + self.logger.error(f"File design failed: {e}") + return False + + def _build_result(self) -> Dict[str, Any]: + """Build the final result dictionary in RPG-Kit format.""" + # Convert to RPG-Kit compatible format + result = convert_skeleton_to_rpgkit_format(self.skeleton, self.rpg) + + # Add statistics + result["statistics"].update({ + "rpg_nodes": len(self.rpg.nodes), + "rpg_edges": len(self.rpg.edges), + "llm_calls_made": self.stats["llm_calls"], + "validation_retries": self.stats.get("validation_retries", 0), + }) + + # Get component directories from FileDesigner (updated via RPG) + component_directories = {} + + # First try to get from RPG nodes (which have been updated with paths) + for node in self.rpg.nodes.values(): + if node.level == 1 and node.name != self.rpg.repo_name: + if node.meta and hasattr(node.meta, 'path') and node.meta.path: + # Use the path stored in RPG node metadata + component_directories[node.name] = node.meta.path + else: + # Fallback: try to infer directory from skeleton + for skeleton_node in self.skeleton.path_to_node.values(): + if (isinstance(skeleton_node, DirectoryNode) and + node.name.lower().replace(' ', '_') in skeleton_node.path.lower()): + component_directories[node.name] = skeleton_node.path + break + + result["component_directories"] = component_directories + + return result + + def _update_rpg_paths_from_skeleton(self) -> int: + """Update RPG node paths from skeleton file assignments. + + For each feature node in RPG, find the corresponding file in skeleton + and update the node's meta.path accordingly. + + Returns: + Number of nodes updated with path information + """ + if not self.skeleton or not self.rpg: + return 0 + + updated_count = 0 + + # Build feature_path -> file_path mapping from skeleton + feature_to_file = {} + for file_node in self.skeleton.get_all_file_nodes(): + file_path = file_node.path + for feature_path in file_node.feature_paths: + feature_to_file[feature_path] = file_path + + # Update component (L1) nodes with directory paths + component_dirs = {} + for file_path, feature_path in [(f.path, fp) + for f in self.skeleton.get_all_file_nodes() + for fp in f.feature_paths]: + if '/' in feature_path: + component_name = feature_path.split('/')[0] + # Get the directory from file path + dir_path = str(PPath(file_path).parent) + if component_name not in component_dirs: + component_dirs[component_name] = dir_path + + # Update RPG nodes + for node in self.rpg.nodes.values(): + if node.level == 0: + # Repo root + if not (node.meta and node.meta.path): + if not node.meta: + node.meta = NodeMetaData() + node.meta.path = "." + updated_count += 1 + elif node.level == 1: + # Component/functional_area level + if node.name in component_dirs: + if not node.meta: + node.meta = NodeMetaData() + if not node.meta.path: + node.meta.path = component_dirs[node.name] + updated_count += 1 + else: + # Feature nodes - use feature path to find file + feature_path = node.feature_path() + if feature_path in feature_to_file: + file_path = feature_to_file[feature_path] + if not node.meta: + node.meta = NodeMetaData() + if not node.meta.path: + node.meta.path = file_path + updated_count += 1 + + if updated_count > 0: + self.logger.info(f"Updated {updated_count} RPG nodes with path information") + + return updated_count + + def _count_features_in_component(self, subtree: Any) -> int: + """Count features in component subtree.""" + if isinstance(subtree, dict): + total = 0 + for key, value in subtree.items(): + if key == "description": + continue + total += self._count_features_in_component(value) + return total + elif isinstance(subtree, list): + return len([item for item in subtree if item]) + else: + return 1 if subtree else 0 + + def _print_summary(self): + """Print build summary.""" + print("\n" + "=" * 70) + print("SKELETON BUILDING COMPLETE") + print("=" * 70) + print(f"Total Components: {self.stats['total_components']}") + print(f"Total Features: {self.stats['total_features']}") + print(f"Assigned Features: {self.stats['assigned_features']}") + print(f"Total Files: {self.stats['total_files']}") + print(f"LLM Calls Made: {self.stats['llm_calls']}") + + if self.skeleton: + skeleton_stats = self.skeleton.get_statistics() + print(f"Skeleton Nodes: {skeleton_stats['total_nodes']}") + print(f"__init__.py Files: {skeleton_stats.get('init_files', 0)}") + + # Print file summary + if self.skeleton: + files = self.skeleton.get_all_file_nodes() + if files: + rows = [] + for f in sorted(files, key=lambda x: x.path)[:20]: # Show first 20 + rows.append([f.path, len(f.feature_paths)]) + + if rows: + print_unicode_table( + headers=["File Path", "Features"], + rows=rows, + title="File Assignments (Top 20)" + ) + if len(files) > 20: + print(f" ... and {len(files) - 20} more files") + + +# ============================================================================ +# Patch Mode: Incremental Feature Assignment +# ============================================================================ + +def patch_missing(input_data: Dict[str, Any]) -> Dict[str, Any]: + """Incrementally assign missing features to the existing skeleton. + + Loads the existing skeleton.json and rpg.json, detects which features + from the feature tree are not yet assigned, and runs a targeted LLM + assignment for only those features โ€” reusing existing directory structure. + + Returns a result dict with type "patch", "skip", or "error". + """ + if not SKELETON_FILE.exists(): + return {"error": "No existing skeleton found. Run build first."} + if not REPO_RPG_FILE.exists(): + return {"error": "No RPG file found. Run build first."} + + skeleton = RepoSkeleton.load_json(str(SKELETON_FILE)) + rpg = RPG.from_json(str(REPO_RPG_FILE)) + + # Collect all features from input, grouped by component + all_input_features: set = set() + features_by_component: Dict[str, list] = {} + for comp in input_data.get("components", []): + comp_name = comp["name"] + subtree = comp.get("refactored_subtree", {}) + comp_features = extract_features_from_subtree(subtree, comp_name) + features_by_component[comp_name] = comp_features + all_input_features.update(comp_features) + + # Collect features already assigned in skeleton + existing_features: set = set() + for file_node in skeleton.get_all_file_nodes(): + existing_features.update(file_node.feature_paths) + + # Find missing features per component + missing_by_component: Dict[str, list] = {} + for comp_name, comp_features in features_by_component.items(): + missing = [f for f in comp_features if f not in existing_features] + if missing: + missing_by_component[comp_name] = missing + + total_missing = sum(len(v) for v in missing_by_component.values()) + if total_missing == 0: + print("[OK] No missing features. Skeleton is already complete.") + return {"type": "skip", "message": "All features already assigned"} + + print(f"\n[Patch] {total_missing} missing features across {len(missing_by_component)} components:") + for comp_name, features in missing_by_component.items(): + print(f" - {comp_name}: {len(features)} missing") + + # Extract dir_assignments from RPG L1 nodes (set during original build) + dir_assignments: Dict[str, str] = {} + for node in rpg.nodes.values(): + if node.level == 1 and node.name != rpg.repo_name: + if node.meta and node.meta.path: + dir_assignments[node.name] = node.meta.path + + missing_without_dir = [c for c in missing_by_component if c not in dir_assignments] + if missing_without_dir: + return { + "error": ( + f"No directory assignments in RPG for: {missing_without_dir}. " + "Cannot patch โ€” run full build first." + ) + } + + # Run patch via FileDesigner (skips directory structure generation) + file_designer = FileDesigner(rpg=rpg) + new_assignments = file_designer.patch(missing_by_component, dir_assignments) + + if not new_assignments: + return {"error": "Patch produced no assignments"} + + # Merge new assignments into existing skeleton. + # insert_file() OVERWRITES feature_paths on existing files, so handle manually. + merged_count = 0 + new_file_count = 0 + for assignment in new_assignments: + file_path = assignment["file_path"] + features = assignment["features"] + norm = normalize_path(file_path) + + if norm in skeleton.path_to_node: + existing_node = skeleton.path_to_node[norm] + if hasattr(existing_node, "feature_paths"): + existing_node.feature_paths.extend(features) + merged_count += len(features) + else: + skeleton.insert_file(file_path, "", features) + new_file_count += 1 + + skeleton.add_init_files() + + # Update RPG paths for any newly assigned nodes + feature_to_file: Dict[str, str] = {} + for file_node in skeleton.get_all_file_nodes(): + for fp in file_node.feature_paths: + feature_to_file[fp] = file_node.path + + for node in rpg.nodes.values(): + if node.level > 1: + fp = node.feature_path() + if fp in feature_to_file and not (node.meta and node.meta.path): + if not node.meta: + node.meta = NodeMetaData() + node.meta.path = feature_to_file[fp] + + # Re-convert and save + result = convert_skeleton_to_rpgkit_format(skeleton, rpg) + result["statistics"].update({ + "rpg_nodes": len(rpg.nodes), + "rpg_edges": len(rpg.edges), + "llm_calls_made": file_designer.stats["llm_calls_made"], + }) + + component_directories: Dict[str, str] = {} + for node in rpg.nodes.values(): + if node.level == 1 and node.name != rpg.repo_name: + if node.meta and node.meta.path: + component_directories[node.name] = node.meta.path + result["component_directories"] = component_directories + + with open(str(SKELETON_FILE), "w", encoding="utf-8") as f: + json.dump(result, f, indent=2, ensure_ascii=False) + rpg.save_json(str(REPO_RPG_FILE), indent=2) + + print(f"\n[OK] Patch complete:") + print(f" - Missing features patched: {total_missing}") + print(f" - New files created: {new_file_count}") + print(f" - Features merged into existing files: {merged_count}") + + return { + "type": "patch", + "total_missing_patched": total_missing, + "new_files_created": new_file_count, + "features_merged": merged_count, + } + + +# ============================================================================ +# Main Entry Point +# ============================================================================ + +def main(): + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Build repository skeleton from component architecture" + ) + parser.add_argument( + "--input", "-i", + type=str, + default=str(FEATURE_TREE_FILE), + help=f"Input file (default: {FEATURE_TREE_FILE})" + ) + parser.add_argument( + "--output", "-o", + type=str, + default=str(SKELETON_FILE), + help=f"Output file (default: {SKELETON_FILE})" + ) + parser.add_argument( + "--max-iterations", "-m", + type=int, + default=10, + help="Max iterations per component (default: 10)" + ) + parser.add_argument( + "--verbose", "-v", + action="store_true", + help="Enable verbose logging" + ) + parser.add_argument( + "--no-trajectory", + action="store_true", + help="Disable trajectory recording" + ) + parser.add_argument( + "--patch", + action="store_true", + help="Patch mode: only assign missing features to existing skeleton" + ) + + args = parser.parse_args() + + # Setup logging + log_level = logging.DEBUG if args.verbose else logging.INFO + logging.basicConfig( + level=log_level, + format="%(asctime)s - %(levelname)s - %(message)s" + ) + logger = logging.getLogger(__name__) + + # Load input + input_path = Path(args.input) + if not input_path.exists(): + logger.error(f"Input file not found: {input_path}") + print(f"ERROR: Input file not found: {input_path}") + print("Please run /rpgkit.refactor_feature first.") + return 1 + + try: + input_data = load_refactor_feature_data(input_path) + except Exception as e: + logger.error(f"Failed to load input data: {e}") + print(f"ERROR: {e}") + return 1 + + # --patch mode: incremental assignment of missing features only + if args.patch: + try: + result = patch_missing(input_data) + if "error" in result: + print(f"ERROR: {result['error']}") + return 1 + return 0 + except Exception as e: + logger.error(f"Patch failed: {e}") + raise + + # Initialize trajectory + trajectory = None + if not args.no_trajectory: + trajectory = load_or_create_trajectory("build_skeleton") + + # Check if there's an in-progress execution + if trajectory.is_resumable(): + print(f"\n[WARNING] Found in-progress execution from {trajectory.started_at}") + print(f" Resume point: {trajectory.resume_point.step_name}") + print(" (Use --no-trajectory to start fresh)") + # For now, we don't implement resume - just start fresh + + trajectory.start(metadata={ + "input_file": str(input_path), + "output_file": str(args.output), + "max_iterations": args.max_iterations + }) + + try: + # Build skeleton + builder = SkeletonBuilder( + max_iterations=args.max_iterations, + trajectory=trajectory + ) + + result = builder.build(input_data) + + # Check for errors + if "error" in result: + logger.error(f"Build failed: {result['error']}") + if trajectory: + trajectory.fail(result["error"]) + return 1 + + # Save output + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + + with open(output_path, "w", encoding="utf-8") as f: + json.dump(result, f, indent=2, ensure_ascii=False) + + logger.info(f"[OK] Skeleton saved to: {output_path}") + print(f"\n[OK] Skeleton saved to: {output_path}") + + # Save RPG as well + if REPO_RPG_FILE.exists(): + print(f"[OK] RPG saved to: {REPO_RPG_FILE}") + + # Mark trajectory as complete + if trajectory: + trajectory.complete(metadata={ + "total_features": builder.stats["total_features"], + "assigned_features": builder.stats["assigned_features"], + "total_files": builder.stats["total_files"] + }) + print(f"[OK] Trajectory saved to: {trajectory.trajectory_file}") + + return 0 + + except Exception as e: + logger.error(f"Build failed: {e}") + if trajectory: + trajectory.fail(str(e)) + raise + + +if __name__ == "__main__": + exit(main()) \ No newline at end of file diff --git a/RPG-Kit/scripts/check_base_classes.py b/RPG-Kit/scripts/check_base_classes.py new file mode 100644 index 0000000..f04e6b1 --- /dev/null +++ b/RPG-Kit/scripts/check_base_classes.py @@ -0,0 +1,282 @@ +#!/usr/bin/env python3 +"""Check Base Classes Script. + +Function: Validate base_classes.json state and validate Python syntax +- Checks if base_classes.json exists (init state) +- Validates JSON structure (error state if invalid) +- Validates Python code syntax (error state if syntax errors) +- Returns update state if valid + +Input: .rpgkit/base_classes.json +""" + +import json +import argparse +from pathlib import Path +from typing import Dict, Any, List, Tuple + +# Import from common utils +from common import validate_python_syntax, extract_class_names + +# Import centralized paths +from common.paths import BASE_CLASSES_FILE + + +def load_json(file_path: Path) -> Dict[str, Any]: + """Load JSON file safely.""" + try: + with open(file_path, 'r', encoding='utf-8') as f: + return json.load(f) + except (json.JSONDecodeError, FileNotFoundError): + return {} + + +def validate_base_classes_structure(data: Dict[str, Any]) -> Tuple[bool, List[str]]: + """Validate base classes structure.""" + errors = [] + + base_classes = data.get("base_classes", []) + + if not isinstance(base_classes, list): + errors.append("'base_classes' must be a list") + return False, errors + + for i, bc in enumerate(base_classes): + if not isinstance(bc, dict): + errors.append(f"Base class {i}: must be a dictionary") + continue + + # Required fields: file_path, code, and scope + for field in ["file_path", "code", "scope"]: + if field not in bc: + errors.append(f"Base class {i}: missing required field '{field}'") + elif not bc[field]: + errors.append(f"Base class {i}: field '{field}' is empty") + + # Validate Python syntax + code = bc.get("code", "") + if code: + is_valid, error = validate_python_syntax(code) + if not is_valid: + # Try to get name from bc or extract from code + name = bc.get("name", "") + if not name: + class_names = extract_class_names(code) + name = class_names[0] if class_names else "unknown" + errors.append(f"Base class {i} ({name}): syntax error - {error}") + + # Also validate data_structures if present + data_structures = data.get("data_structures", []) + if data_structures and not isinstance(data_structures, list): + errors.append("'data_structures' must be a list") + elif isinstance(data_structures, list): + for i, ds in enumerate(data_structures): + if not isinstance(ds, dict): + errors.append(f"Data structure {i}: must be a dictionary") + continue + + # code and subtree are required; file_path is optional (assigned later) + for field in ["code", "subtree"]: + if field not in ds: + errors.append(f"Data structure {i}: missing required field '{field}'") + elif not ds[field]: + errors.append(f"Data structure {i}: field '{field}' is empty") + + # subtree must NOT be 'global' + subtree = ds.get("subtree", "") + if subtree.lower() == "global": + errors.append(f"Data structure {i}: subtree cannot be 'global'") + + # data_flow_types is required and must be non-empty + df_types = ds.get("data_flow_types", []) + if not isinstance(df_types, list) or not df_types: + errors.append(f"Data structure {i}: 'data_flow_types' must be a non-empty list") + + code = ds.get("code", "") + if code: + is_valid, error = validate_python_syntax(code) + if not is_valid: + name = "" + class_names = extract_class_names(code) + name = class_names[0] if class_names else "unknown" + errors.append(f"Data structure {i} ({name}): syntax error - {error}") + + return len(errors) == 0, errors + + +def inspect_state(base_classes_path: Path) -> Dict[str, Any]: + """Inspect current state and determine action needed. + + Returns dict with: + - state: "error" | "init" | "update" + - message: description + - details: additional info + """ + # Check if base_classes.json exists + if not base_classes_path.exists(): + return { + "state": "init", + "message": "base_classes.json not found - need to run design_base_classes", + "details": {} + } + + # Load and validate + try: + with open(base_classes_path, 'r', encoding='utf-8') as f: + data = json.load(f) + except json.JSONDecodeError as e: + return { + "state": "error", + "message": f"Invalid JSON in base_classes.json: {e}", + "details": {} + } + + # Check for error field + if "error" in data: + return { + "state": "error", + "message": f"Base classes has error: {data['error']}", + "details": {} + } + + # Validate structure and syntax + is_valid, errors = validate_base_classes_structure(data) + if not is_valid: + return { + "state": "error", + "message": "Base classes structure or syntax is invalid", + "details": {"errors": errors} + } + + # Gather details + base_classes = data.get("base_classes", []) + class_names = data.get("class_names", []) + data_structures = data.get("data_structures", []) + ds_class_names = data.get("data_structure_names", []) + + # Collect file paths from base_classes + file_paths = [bc.get("file_path", "") for bc in base_classes if bc.get("file_path")] + # Collect subtrees from data_structures (file_path may not be assigned yet) + ds_subtrees = [ds.get("subtree", "") for ds in data_structures if ds.get("subtree")] + ds_file_paths = [ds.get("file_path", "") for ds in data_structures if ds.get("file_path")] + + return { + "state": "update", + "message": "Base classes are valid", + "details": { + "file_count": len(base_classes), + "class_count": len(class_names), + "file_paths": file_paths, + "class_names": class_names, + "data_structure_count": len(data_structures), + "data_structure_names": ds_class_names, + "data_structure_subtrees": ds_subtrees, + "data_structure_file_paths": ds_file_paths, + } + } + + +def print_state(result: Dict[str, Any]) -> None: + """Print state information.""" + state = result["state"] + message = result["message"] + details = result.get("details", {}) + + state_icons = { + "error": "[FAIL]", + "init": "[-]", + "update": "[OK]" + } + + icon = state_icons.get(state, "[?]") + print(f"\n{icon} State: {state.upper()}") + print(f" {message}") + + if state == "error" and "errors" in details: + print("\n Errors:") + for err in details["errors"][:10]: + print(f" - {err}") + if len(details.get("errors", [])) > 10: + print(f" ... and {len(details['errors']) - 10} more") + + elif state == "update": + if "file_count" in details: + print(f"\n Base Class Files: {details['file_count']}") + if "class_count" in details: + print(f" Base Classes: {details['class_count']}") + if details.get("data_structure_count"): + print(f" Data Structure Files: {details['data_structure_count']}") + print(f" Data Structures: {len(details.get('data_structure_names', []))}") + + file_paths = details.get("file_paths", []) + if file_paths: + print("\n Base Class File Paths:") + for fp in file_paths[:5]: + print(f" - {fp}") + if len(file_paths) > 5: + print(f" ... and {len(file_paths) - 5} more") + + class_names = details.get("class_names", []) + if class_names: + print("\n Base Classes:") + for cn in class_names[:10]: + print(f" - {cn}") + if len(class_names) > 10: + print(f" ... and {len(class_names) - 10} more") + + ds_names = details.get("data_structure_names", []) + if ds_names: + print("\n Data Flow Data Structures:") + for dn in ds_names[:10]: + print(f" - {dn}") + if len(ds_names) > 10: + print(f" ... and {len(ds_names) - 10} more") + + +def main(): + parser = argparse.ArgumentParser( + description="Check base classes state" + ) + parser.add_argument( + "--input", + type=Path, + default=BASE_CLASSES_FILE, + help="Base classes file to check" + ) + parser.add_argument( + "--json", + action="store_true", + help="Output as JSON" + ) + parser.add_argument( + "--verbose", "-v", + action="store_true", + help="Include detailed class list and syntax check results" + ) + + args = parser.parse_args() + + result = inspect_state(args.input) + + # In verbose mode, include raw base_classes data + if args.verbose and result.get("state") == "update": + base_classes_data = load_json(args.input) + if base_classes_data: + result["base_classes"] = base_classes_data.get("base_classes", []) + + if args.json: + print(json.dumps(result, indent=2)) + else: + print("\n" + "=" * 50) + print("BASE CLASSES CHECK") + print("=" * 50) + print_state(result) + + # Return exit code based on state + if result["state"] == "error": + return 1 + return 0 + + +if __name__ == "__main__": + exit(main()) diff --git a/RPG-Kit/scripts/check_code_gen.py b/RPG-Kit/scripts/check_code_gen.py new file mode 100644 index 0000000..d0d7669 --- /dev/null +++ b/RPG-Kit/scripts/check_code_gen.py @@ -0,0 +1,494 @@ +#!/usr/bin/env python3 +"""Check Code Gen Script - Validation for Code Generation State. + +Validates the current state of code generation and determines execution state: +- "error": Required input files missing or invalid +- "init": No code_gen_state.jsonl exists, ready to start +- "in_progress": A batch is currently being processed +- "continue": Ready to continue with next batch +- "complete": All batches completed + +Returns JSON with validation status and statistics. +""" + +import json +import argparse +from pathlib import Path +from typing import Dict, Any, List, Tuple + +# Import centralized paths and state loader +from common.paths import ( + TASKS_FILE, + CODE_GEN_STATE_FILE as STATE_FILE, + get_scripts_dir, + REPO_DIR, +) +from common.execution_state import load_code_gen_state +from common.execution_state import load_code_gen_state as _load_state, save_code_gen_state as _save_state +from common.execution_state import complete_batch as _complete_batch + + +def validate_tasks_file(tasks_path: Path) -> Tuple[bool, List[str], int]: + """Validate that tasks.json exists and count tasks. + + Returns: (valid, errors, total_tasks) + """ + errors = [] + total_tasks = 0 + + if not tasks_path.exists(): + errors.append(f"Tasks file not found: {tasks_path}") + return False, errors, 0 + + try: + with open(tasks_path, 'r', encoding='utf-8') as f: + data = json.load(f) + except json.JSONDecodeError as e: + errors.append(f"Invalid JSON in tasks file: {e}") + return False, errors, 0 + + # Count tasks + planned_tasks_dict = data.get("planned_tasks_dict", {}) + + for subtree, files_dict in planned_tasks_dict.items(): + for file_path, tasks_list in files_dict.items(): + total_tasks += len(tasks_list) + + if total_tasks == 0: + errors.append("No tasks found in tasks.json") + return False, errors, 0 + + return True, errors, total_tasks + + +def load_state(state_path: Path) -> Dict[str, Any]: + """Load code gen state from file via centralized loader. + + Returns a raw dict for backward compatibility with the rest of this script. + Returns empty dict if file doesn't exist or is a fresh (empty) state. + """ + state_obj = load_code_gen_state(state_path) + state_dict = state_obj.to_dict() + # A fresh CodeGenState (no file) has no completed/failed tasks and no current batch + # Treat it as "no state" to trigger the "init" path + if (not state_dict.get("completed_task_ids") + and not state_dict.get("failed_task_ids") + and not state_dict.get("current_batch_id") + and not state_dict.get("initialized")): + return {} + return state_dict + + +def get_all_task_ids(tasks_path: Path) -> List[str]: + """Get all task IDs from tasks.json.""" + task_ids = [] + + try: + with open(tasks_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + planned_tasks_dict = data.get("planned_tasks_dict", {}) + subtree_order = data.get("subtree_order", list(planned_tasks_dict.keys())) + + for subtree in subtree_order: + if subtree not in planned_tasks_dict: + continue + + files_dict = planned_tasks_dict[subtree] + for file_path, tasks_list in files_dict.items(): + for task_data in tasks_list: + if "task_id" in task_data: + task_ids.append(task_data["task_id"]) + except Exception: + pass + + return task_ids + + +def determine_state( + tasks_path: Path, + state_path: Path +) -> Dict[str, Any]: + """Determine the current execution state. + + Returns dict with: + - type: "error" | "init" | "in_progress" | "continue" | "complete" + - message: Human-readable message + - current_batch: Current batch info (if in_progress) + - next_batch: Next batch to process (if continue) + - stats: Statistics about progress + """ + result = { + "type": "error", + "message": "", + "current_batch": None, + "next_batch": None, + "stats": {} + } + + # Validate tasks file + valid, errors, total_tasks = validate_tasks_file(tasks_path) + + if not valid: + result["type"] = "error" + result["message"] = "; ".join(errors) + result["next_action"] = "Fix the reported issues. If tasks.json is missing, run /rpgkit.plan_tasks first." + return result + + # Get all task IDs + all_task_ids = get_all_task_ids(tasks_path) + + # Load state + state = load_state(state_path) + + scripts = get_scripts_dir() + + if not state: + # No state file - ready to start + result["type"] = "init" + result["message"] = f"Ready to start code generation ({total_tasks} tasks)" + result["next_batch"] = all_task_ids[0] if all_task_ids else None + result["stats"] = { + "total_tasks": total_tasks, + "completed": 0, + "failed": 0, + "remaining": total_tasks + } + result["next_action"] = ( + f"Run: python3 {scripts}/init_codebase.py --json to initialize the repository, " + f"then run: python3 {scripts}/run_batch.py --next --json to start the first batch." + ) + result["workflow_hint"] = ( + "run_batch.py --next dispatches a sub-agent that autonomously " + "writes tests โ†’ code โ†’ runs pytest โ†’ fixes โ†’ repeats (up to 5x)" + ) + return result + + # Parse state + completed_ids = set(state.get("completed_task_ids", [])) + failed_ids = set(state.get("failed_task_ids", [])) + current_batch_id = state.get("current_batch_id") + current_batch_state = state.get("current_batch_state") + + # Calculate progress + completed = len(completed_ids) + failed = len(failed_ids) + processed = completed + failed + remaining = total_tasks - processed + + result["stats"] = { + "total_tasks": total_tasks, + "completed": completed, + "failed": failed, + "remaining": remaining, + "success_rate": (completed / total_tasks * 100) if total_tasks > 0 else 0 + } + + # Determine state + if current_batch_id and current_batch_state: + # Consistency check: if the current batch's task is already in completed + # or failed lists, the batch was actually finished but current_batch_id + # was not properly cleared (stale state). Auto-recover by clearing it. + if current_batch_id in completed_ids or current_batch_id in failed_ids: + try: + _gs = _load_state(state_path) + _gs.current_batch_id = None + _gs.current_batch_state = None + _save_state(_gs, state_path) + # Reload and recalculate all derived variables + state = load_state(state_path) + completed_ids = set(state.get("completed_task_ids", [])) + failed_ids = set(state.get("failed_task_ids", [])) + current_batch_id = state.get("current_batch_id") + current_batch_state = state.get("current_batch_state") + completed = len(completed_ids) + failed = len(failed_ids) + processed = completed + failed + remaining = total_tasks - processed + result["stats"] = { + "total_tasks": total_tasks, + "completed": completed, + "failed": failed, + "remaining": remaining, + "success_rate": (completed / total_tasks * 100) if total_tasks > 0 else 0 + } + except Exception: + pass # If cleanup fails, proceed with stale state + + if current_batch_id and current_batch_state: + # A batch is in progress + phase = current_batch_state.get("phase", "unknown") + + # Auto-recover: if tests passed (phase=complete) but complete_batch was + # never called, finalize the batch now to prevent it being counted as failed. + # Note: this only updates state tracking; git commit/merge and RPG updates + # are skipped since code changes were already committed during the TDD loop. + if phase == "complete": + try: + _complete_batch(current_batch_id, True, state_path) + # Reload state after auto-recovery + state = load_state(state_path) + completed_ids = set(state.get("completed_task_ids", [])) + failed_ids = set(state.get("failed_task_ids", [])) + completed = len(completed_ids) + failed = len(failed_ids) + processed = completed + failed + remaining = total_tasks - processed + result["stats"] = { + "total_tasks": total_tasks, + "completed": completed, + "failed": failed, + "remaining": remaining, + "success_rate": (completed / total_tasks * 100) if total_tasks > 0 else 0 + } + result["auto_recovered"] = True + result["auto_recovered_batch"] = current_batch_id + # Fall through to the next-batch / complete logic below + except Exception as e: + # If auto-recovery fails, report as in_progress so the agent + # can fall back to ``run_batch.py --resume`` which re-runs the + # batch and lets the orchestrator's own completion path finalise it. + result["type"] = "in_progress" + result["message"] = f"Batch in progress: {current_batch_id}" + result["current_batch"] = { + "batch_id": current_batch_id, + "iteration": current_batch_state.get("iteration", 0), + "phase": phase, + "file_path": current_batch_state.get("file_path", ""), + "max_iterations": current_batch_state.get("max_iterations", 5), + "merged_mode": len(current_batch_state.get("merged_task_ids", [])) > 1, + "merged_task_count": len(current_batch_state.get("merged_task_ids", [])), + } + result["auto_recovery_error"] = str(e) + result["next_action"] = ( + f"Tests passed but auto-recovery failed ({e}). " + f"Run: python3 {scripts}/run_batch.py --resume --json to retry." + ) + return result + else: + result["type"] = "in_progress" + result["message"] = f"Batch in progress: {current_batch_id}" + result["current_batch"] = { + "batch_id": current_batch_id, + "iteration": current_batch_state.get("iteration", 0), + "phase": phase, + "file_path": current_batch_state.get("file_path", ""), + "max_iterations": current_batch_state.get("max_iterations", 5), + "merged_mode": len(current_batch_state.get("merged_task_ids", [])) > 1, + "merged_task_count": len(current_batch_state.get("merged_task_ids", [])), + } + if phase == "failed": + result["next_action"] = ( + f"Batch {current_batch_id} has failed. " + f"Run: python3 {scripts}/run_batch.py --retry {current_batch_id} --json " + f"to retry, or python3 {scripts}/run_batch.py --next --json to skip " + f"it and move on." + ) + else: + result["next_action"] = ( + f"Resume the current batch (phase: {phase}). " + f"Run: python3 {scripts}/run_batch.py --resume --json" + ) + result["workflow_hint"] = ( + "run_batch.py --resume dispatches a sub-agent that autonomously " + "writes tests โ†’ code โ†’ runs pytest โ†’ fixes โ†’ repeats (up to 5x)" + ) + return result + + # Find next batch + next_batch = None + for batch_id in all_task_ids: + if batch_id not in completed_ids and batch_id not in failed_ids: + next_batch = batch_id + break + + if next_batch: + result["type"] = "continue" + result["message"] = f"Ready to continue ({remaining} tasks remaining)" + result["next_batch"] = next_batch + result["next_action"] = ( + f"Run: python3 {scripts}/run_batch.py --next --json " + f"to start the next batch." + ) + result["workflow_hint"] = ( + "run_batch.py --next dispatches a sub-agent that autonomously " + "writes tests โ†’ code โ†’ runs pytest โ†’ fixes โ†’ repeats (up to 5x)" + ) + else: + result["type"] = "complete" + if failed > 0: + result["message"] = f"All tasks processed: {completed} completed, {failed} failed" + else: + result["message"] = f"All {completed} tasks completed successfully!" + + # Check stage files to determine which post-completion step is next + logs_dir = Path(scripts).parent / "logs" + ft_file = logs_dir / "codegen_final_test.json" + gr_file = logs_dir / "codegen_global_review.json" + + ft_passed = False + if ft_file.exists(): + try: + ft_data = json.loads(ft_file.read_text(encoding="utf-8")) + ft_passed = ft_data.get("success", False) + except Exception: + pass + + gr_passed = False + if gr_file.exists(): + try: + gr_data = json.loads(gr_file.read_text(encoding="utf-8")) + gr_passed = gr_data.get("success", False) + except Exception: + pass + + if not ft_passed: + result["next_action"] = ( + f"Run: python3 {scripts}/run_batch.py --final-test --json" + ) + elif not gr_passed: + result["next_action"] = ( + f"Final test passed. Run: python3 {scripts}/run_batch.py --global-review --json" + ) + else: + result["next_action"] = ( + "All steps complete (batches + final test + global review). " + "Display the final summary to the user." + ) + + # Artifact verification: check that special task outputs actually exist. + # This prevents false "complete" when tasks were marked done without + # actually generating the expected files. + missing_artifacts = [] + repo_root = REPO_DIR + + # Check for main_entry task artifact + main_entry_ids = [tid for tid in completed_ids if tid.startswith("")] + if main_entry_ids and not (repo_root / "main.py").exists(): + missing_artifacts.append("main.py (from task)") + + # Check for requirements task artifact + req_ids = [tid for tid in completed_ids if tid.startswith("")] + if req_ids and not (repo_root / "requirements.txt").exists(): + missing_artifacts.append("requirements.txt (from task)") + + if missing_artifacts: + result["type"] = "incomplete" + result["missing_artifacts"] = missing_artifacts + result["message"] = ( + f"All tasks marked complete but {len(missing_artifacts)} expected " + f"artifact(s) missing: {', '.join(missing_artifacts)}" + ) + result["next_action"] = ( + f"WARNING: The following files were expected but not found: " + f"{', '.join(missing_artifacts)}. " + f"These tasks may have been marked complete without actual generation. " + f"Re-run the affected tasks or generate the files manually." + ) + + return result + + +def print_status(result: Dict[str, Any], json_output: bool = False) -> None: + """Print the status in human-readable or JSON format.""" + if json_output: + print(json.dumps(result, indent=2)) + return + + state_type = result["type"] + + print("\nโ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•—") + print("โ•‘ CODE GENERATION STATUS โ•‘") + print("โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•") + + # Status badge + badges = { + "error": "ERROR", + "init": "READY TO START", + "in_progress": "IN PROGRESS", + "continue": "READY TO CONTINUE", + "complete": "COMPLETE" + } + + print(f"\n Status: {badges.get(state_type, state_type)}") + print(f" {result['message']}") + + # Stats + stats = result.get("stats", {}) + if stats: + print("\n Progress:") + print(f" - Total tasks: {stats.get('total_tasks', 0)}") + print(f" - Completed: {stats.get('completed', 0)}") + print(f" - Failed: {stats.get('failed', 0)}") + print(f" - Remaining: {stats.get('remaining', 0)}") + if stats.get('success_rate'): + print(f" - Success rate: {stats['success_rate']:.1f}%") + + # Current batch info + if result.get("current_batch"): + batch = result["current_batch"] + print("\n Current Batch:") + print(f" - ID: {batch.get('batch_id', 'unknown')}") + print(f" - File: {batch.get('file_path', 'unknown')}") + print(f" - Phase: {batch.get('phase', 'unknown')}") + print(f" - Iteration: {batch.get('iteration', 0)}/{batch.get('max_iterations', 5)}") + + # Next batch info + if result.get("next_batch"): + print(f"\n Next Batch: {result['next_batch']}") + + # Guidance + print("\n " + "โ”€" * 60) + + if state_type == "error": + print(" Fix the errors above before proceeding.") + print(" Run /rpgkit.plan_tasks to generate tasks.json") + elif state_type == "init": + print(" Run /rpgkit.code_gen to start code generation") + elif state_type == "in_progress": + print(" Run /rpgkit.code_gen to continue current batch") + elif state_type == "continue": + print(" Run /rpgkit.code_gen to process next batch") + elif state_type == "complete": + print(" All done! Review the generated code.") + + +def main(): + parser = argparse.ArgumentParser( + description="Check code generation state" + ) + parser.add_argument( + "--tasks", "-t", + type=Path, + default=TASKS_FILE, + help=f"Input tasks file (default: {TASKS_FILE})" + ) + parser.add_argument( + "--state", "-s", + type=Path, + default=STATE_FILE, + help=f"Input state file (default: {STATE_FILE})" + ) + parser.add_argument( + "--json", + action="store_true", + help="Output as JSON" + ) + + args = parser.parse_args() + + result = determine_state(args.tasks, args.state) + print_status(result, json_output=args.json) + + # Return exit code based on state + if result["type"] == "error": + return 1 + elif result["type"] == "complete": + # Check if there were failures + if result.get("stats", {}).get("failed", 0) > 0: + return 2 + return 0 + + +if __name__ == "__main__": + exit(main()) diff --git a/RPG-Kit/scripts/check_data_flow.py b/RPG-Kit/scripts/check_data_flow.py new file mode 100644 index 0000000..84455dd --- /dev/null +++ b/RPG-Kit/scripts/check_data_flow.py @@ -0,0 +1,374 @@ +#!/usr/bin/env python3 +"""Check Data Flow Script. + +Function: Validate data_flow.json state and cross-validate with skeleton.json +- Checks if data_flow.json exists (init state) +- Validates data flow structure (error state if invalid) +- Cross-validates components between skeleton and data flow (warning state) +- Returns update state if valid + +Input: .rpgkit/data_flow.json +Reference: .rpgkit/skeleton.json +""" + +import json +import argparse +from pathlib import Path +from typing import Dict, Any, List, Set, Tuple +from collections import defaultdict + +from common.paths import DATA_FLOW_FILE, SKELETON_FILE + + +def load_json(file_path: Path) -> Dict[str, Any]: + """Load JSON file safely.""" + try: + with open(file_path, 'r', encoding='utf-8') as f: + return json.load(f) + except (json.JSONDecodeError, FileNotFoundError): + return {} + + +def get_components_from_skeleton(skeleton: Dict[str, Any]) -> Set[str]: + """Extract component names from skeleton.""" + components = set() + + def traverse(node: Dict[str, Any]): + if node.get("type") == "file": + feature_paths = node.get("feature_paths", []) + for fp in feature_paths: + if "/" in fp: + component = fp.split("/")[0] + components.add(component) + comp = node.get("component", "") + if comp: + components.add(comp) + elif node.get("type") == "directory": + for child in node.get("children", []): + traverse(child) + + root = skeleton.get("root", skeleton) + traverse(root) + + return components + + +def get_components_from_data_flow(data_flow: Dict[str, Any]) -> Set[str]: + """Extract component names from data flow.""" + components = set() + + # From components list + for comp in data_flow.get("components", []): + components.add(comp) + + # From subtree_order + for comp in data_flow.get("subtree_order", []): + components.add(comp) + + # From data flow edges + for edge in data_flow.get("data_flow", []): + source = edge.get("source", "") + target = edge.get("target", "") + if source: + components.add(source) + if target: + components.add(target) + + return components + + +def validate_data_flow_structure(data_flow: Dict[str, Any]) -> Tuple[bool, List[str]]: + """Validate data flow structure.""" + errors = [] + + edges = data_flow.get("data_flow", []) + + if not isinstance(edges, list): + errors.append("'data_flow' must be a list") + return False, errors + + # Check each edge + for i, edge in enumerate(edges): + if not isinstance(edge, dict): + errors.append(f"Edge {i}: must be a dictionary") + continue + + # Required fields + for field in ["source", "target", "data_id", "data_type", "transformation"]: + if field not in edge: + errors.append(f"Edge {i}: missing required field '{field}'") + elif not edge[field]: + errors.append(f"Edge {i}: field '{field}' is empty") + + # No self-loops + if edge.get("source") == edge.get("target"): + errors.append(f"Edge {i}: self-loop detected ({edge.get('source')} -> {edge.get('source')})") + + # Check for cycles + graph = defaultdict(list) + for edge in edges: + source = edge.get("source", "") + target = edge.get("target", "") + if source and target: + graph[source].append(target) + + visited = set() + rec_stack = set() + + def has_cycle(node: str, path: List[str]) -> Tuple[bool, List[str]]: + visited.add(node) + rec_stack.add(node) + + for neighbor in graph.get(node, []): + if neighbor not in visited: + found, cycle_path = has_cycle(neighbor, path + [neighbor]) + if found: + return True, cycle_path + elif neighbor in rec_stack: + return True, path + [neighbor] + + rec_stack.remove(node) + return False, [] + + for node in graph: + if node not in visited: + found, cycle_path = has_cycle(node, [node]) + if found: + errors.append(f"Cycle detected: {' -> '.join(cycle_path)}") + break + + return len(errors) == 0, errors + + +def cross_validate_components( + skeleton_components: Set[str], + data_flow_components: Set[str] +) -> Tuple[bool, Dict[str, Any]]: + """Cross-validate components between skeleton and data flow. + + Returns: + (is_consistent, details) + """ + in_skeleton_only = skeleton_components - data_flow_components + in_data_flow_only = data_flow_components - skeleton_components + matched = skeleton_components & data_flow_components + + is_consistent = len(in_skeleton_only) == 0 and len(in_data_flow_only) == 0 + + return is_consistent, { + "skeleton_components": len(skeleton_components), + "data_flow_components": len(data_flow_components), + "matched": len(matched), + "in_skeleton_only": sorted(list(in_skeleton_only)), + "in_data_flow_only": sorted(list(in_data_flow_only)) + } + + +def inspect_state(data_flow_path: Path, skeleton_path: Path) -> Dict[str, Any]: + """Inspect current state and determine action needed. + + Returns dict with: + - state: "error" | "init" | "warning" | "update" + - message: description + - details: additional info + """ + # Check if data_flow.json exists + if not data_flow_path.exists(): + return { + "state": "init", + "message": "data_flow.json not found - need to run build_data_flow", + "details": {} + } + + # Load and validate + try: + with open(data_flow_path, 'r', encoding='utf-8') as f: + data_flow = json.load(f) + except json.JSONDecodeError as e: + return { + "state": "error", + "message": f"Invalid JSON in data_flow.json: {e}", + "details": {} + } + + # Check for error field + if "error" in data_flow: + return { + "state": "error", + "message": f"Data flow has error: {data_flow['error']}", + "details": {} + } + + # Validate structure + is_valid, errors = validate_data_flow_structure(data_flow) + if not is_valid: + return { + "state": "error", + "message": "Data flow structure is invalid", + "details": {"errors": errors} + } + + # Cross-validate with skeleton if available + if skeleton_path.exists(): + try: + with open(skeleton_path, 'r', encoding='utf-8') as f: + skeleton = json.load(f) + + skeleton_components = get_components_from_skeleton(skeleton) + data_flow_components = get_components_from_data_flow(data_flow) + + is_consistent, xval_details = cross_validate_components( + skeleton_components, data_flow_components + ) + + if not is_consistent: + return { + "state": "warning", + "message": "Component mismatch between skeleton and data flow", + "details": xval_details + } + + # All good + return { + "state": "update", + "message": "Data flow is valid and consistent", + "details": { + "edge_count": len(data_flow.get("data_flow", [])), + "component_count": len(data_flow_components), + "subtree_order": data_flow.get("subtree_order", []) + } + } + + except Exception as e: + # Skeleton load failed, just validate data flow + return { + "state": "update", + "message": f"Data flow is valid (skeleton check skipped: {e})", + "details": { + "edge_count": len(data_flow.get("data_flow", [])), + "component_count": len(get_components_from_data_flow(data_flow)) + } + } + + # No skeleton to compare + return { + "state": "update", + "message": "Data flow is valid (no skeleton to cross-validate)", + "details": { + "edge_count": len(data_flow.get("data_flow", [])), + "component_count": len(get_components_from_data_flow(data_flow)) + } + } + + +def print_state(result: Dict[str, Any]) -> None: + """Print state information.""" + state = result["state"] + message = result["message"] + details = result.get("details", {}) + + state_icons = { + "error": "[FAIL]", + "init": "[-]", + "warning": "[WARNING]", + "update": "[OK]" + } + + icon = state_icons.get(state, "[?]") + print(f"\n{icon} State: {state.upper()}") + print(f" {message}") + + if state == "error" and "errors" in details: + print("\n Errors:") + for err in details["errors"][:10]: + print(f" - {err}") + if len(details.get("errors", [])) > 10: + print(f" ... and {len(details['errors']) - 10} more") + + elif state == "warning": + if details.get("in_skeleton_only"): + print("\n Components in skeleton but not in data flow:") + for comp in details["in_skeleton_only"][:5]: + print(f" - {comp}") + if details.get("in_data_flow_only"): + print("\n Components in data flow but not in skeleton:") + for comp in details["in_data_flow_only"][:5]: + print(f" - {comp}") + + elif state == "update": + if "edge_count" in details: + print(f"\n Data Flow Edges: {details['edge_count']}") + if "component_count" in details: + print(f" Components: {details['component_count']}") + if details.get("subtree_order"): + print(f" Subtree Order: {' โ†’ '.join(details['subtree_order'][:5])}") + if len(details.get("subtree_order", [])) > 5: + print(f" ... and {len(details['subtree_order']) - 5} more") + + +def main(): + parser = argparse.ArgumentParser( + description="Check data flow state" + ) + parser.add_argument( + "--data-flow", "-d", + type=Path, + default=DATA_FLOW_FILE, + help="Data flow file to check" + ) + parser.add_argument( + "--skeleton", "-s", + type=Path, + default=SKELETON_FILE, + help="Skeleton file for cross-validation" + ) + parser.add_argument( + "--json", + action="store_true", + help="Output as JSON" + ) + parser.add_argument( + "--verbose", "-v", + action="store_true", + help="Include detailed edge list and component information" + ) + + args = parser.parse_args() + + result = inspect_state(args.data_flow, args.skeleton) + + # In verbose mode, include all edges and component details + if args.verbose and result.get("state") == "update": + data_flow_data = load_json(args.data_flow) + if data_flow_data: + result["edges"] = data_flow_data.get("data_flow", []) + result["subtree_order"] = data_flow_data.get("subtree_order", []) + + if args.json: + print(json.dumps(result, indent=2)) + else: + print("\n" + "=" * 50) + print("DATA FLOW CHECK") + print("=" * 50) + print_state(result) + + # Print verbose details + if args.verbose and result.get("state") == "update": + edges = result.get("edges", []) + if edges: + print("\nData Flow Edges:") + for edge in edges: + print(f" {edge.get('source', '?')} โ†’ {edge.get('target', '?')}: {edge.get('data_id', '?')} ({edge.get('data_type', '?')})") + + subtree_order = result.get("subtree_order", []) + if subtree_order: + print(f"\nSubtree Order: {' โ†’ '.join(subtree_order)}") + + # Return exit code based on state + if result["state"] == "error": + return 1 + return 0 + + +if __name__ == "__main__": + exit(main()) diff --git a/RPG-Kit/scripts/check_interfaces.py b/RPG-Kit/scripts/check_interfaces.py new file mode 100644 index 0000000..d7cd326 --- /dev/null +++ b/RPG-Kit/scripts/check_interfaces.py @@ -0,0 +1,430 @@ +#!/usr/bin/env python3 +"""Check Interfaces Script - Validation for interfaces.json. + +Validates the interfaces.json file and determines the execution state: +- "error": Input file missing or invalid +- "init": No interfaces.json exists or it's invalid +- "warning": interfaces.json exists but has feature mismatches with skeleton +- "update": Valid interfaces.json exists and is consistent + +Cross-validates feature paths between skeleton.json and interfaces.json. +Also validates RPG feature nodes have proper meta.path assignments. + +Returns JSON with validation status and statistics. +""" + +import json +import argparse +from pathlib import Path +from typing import Dict, Any, List, Tuple, Set + +# Import centralized paths +from common.paths import SKELETON_FILE, INTERFACES_FILE, REPO_RPG_FILE + + +def validate_skeleton(skeleton_path: Path) -> Tuple[bool, List[str]]: + """Validate that skeleton.json exists and is valid.""" + errors = [] + + if not skeleton_path.exists(): + errors.append(f"Input file not found: {skeleton_path}") + return False, errors + + try: + with open(skeleton_path, 'r', encoding='utf-8') as f: + data = json.load(f) + except json.JSONDecodeError as e: + errors.append(f"Invalid JSON: {e}") + return False, errors + + # Check required structure + if "root" not in data: + errors.append("Missing required field: 'root'") + return False, errors + + root = data["root"] + if not isinstance(root, dict): + errors.append("'root' must be an object") + return False, errors + + if root.get("type") != "directory": + errors.append("'root.type' must be 'directory'") + return False, errors + + return True, errors + + +def get_files_from_skeleton(skeleton_path: Path) -> List[Dict[str, Any]]: + """Extract all files from skeleton tree.""" + with open(skeleton_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + def collect_files(node: Dict[str, Any]) -> List[Dict[str, Any]]: + files = [] + if node.get("type") == "file": + files.append({ + "path": node.get("path", ""), + "feature_paths": node.get("feature_paths", []), + "component": node.get("component", "") + }) + else: + for child in node.get("children", []): + files.extend(collect_files(child)) + return files + + return collect_files(data.get("root", {})) + + +def get_all_features_from_skeleton(skeleton_path: Path) -> Set[str]: + """Extract all feature paths from skeleton.json.""" + files = get_files_from_skeleton(skeleton_path) + features = set() + for f in files: + features.update(f.get("feature_paths", [])) + return features + + +def get_all_features_from_interfaces(interfaces_path: Path) -> Set[str]: + """Extract all feature paths from interfaces.json.""" + with open(interfaces_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + features = set() + # Support both "subtrees" (new format) and "components" (old format) + containers = data.get("subtrees", data.get("components", {})) + for comp_name, comp_data in containers.items(): + # Support both "interfaces" (reference format) and "files" (old format) as container key + file_container = comp_data.get("interfaces", comp_data.get("files", {})) + for file_path, file_data in file_container.items(): + # Reference format: units_to_features at file level + units_to_features = file_data.get("units_to_features", {}) + for unit_name, unit_features in units_to_features.items(): + if isinstance(unit_features, list): + features.update(unit_features) + return features + + +def cross_validate_features(skeleton_features: Set[str], interfaces_features: Set[str]) -> Dict[str, Any]: + """Cross-validate features between skeleton and interfaces. + + Returns dict with: + - in_skeleton_not_interfaces: features in skeleton but not in interfaces + - in_interfaces_not_skeleton: features in interfaces but not in skeleton + - matched_count: number of matched features + - warnings: list of warning messages + """ + in_skeleton_not_interfaces = skeleton_features - interfaces_features + in_interfaces_not_skeleton = interfaces_features - skeleton_features + matched = skeleton_features & interfaces_features + + warnings = [] + + for feat in sorted(in_skeleton_not_interfaces): + warnings.append({ + "type": "missing_in_interfaces", + "feature": feat, + "message": f"Feature '{feat}' exists in skeleton.json but not mapped in interfaces.json" + }) + + for feat in sorted(in_interfaces_not_skeleton): + warnings.append({ + "type": "missing_in_skeleton", + "feature": feat, + "message": f"Feature '{feat}' mapped in interfaces.json but not in skeleton.json" + }) + + return { + "in_skeleton_not_interfaces": sorted(list(in_skeleton_not_interfaces)), + "in_interfaces_not_skeleton": sorted(list(in_interfaces_not_skeleton)), + "matched_count": len(matched), + "skeleton_feature_count": len(skeleton_features), + "interfaces_feature_count": len(interfaces_features), + "warnings": warnings, + "is_consistent": len(warnings) == 0 + } + + +def validate_interfaces(interfaces_path: Path, skeleton_path: Path) -> Tuple[bool, List[str], Dict[str, Any]]: + """Validate interfaces.json structure and content.""" + errors = [] + stats = { + "components": 0, + "files": 0, + "units": 0, + "features_mapped": 0 + } + + if not interfaces_path.exists(): + errors.append(f"Output file not found: {interfaces_path}") + return False, errors, stats + + try: + with open(interfaces_path, 'r', encoding='utf-8') as f: + data = json.load(f) + except json.JSONDecodeError as e: + errors.append(f"Invalid JSON: {e}") + return False, errors, stats + + # Check required structure - support both "subtrees" and "components" + containers = data.get("subtrees", data.get("components", {})) + if not containers: + errors.append("Missing required field: 'subtrees' or 'components'") + return False, errors, stats + + if not isinstance(containers, dict): + errors.append("'subtrees'/'components' must be an object") + return False, errors, stats + + stats["components"] = len(containers) + + # Get expected files from skeleton + expected_files = set() + if skeleton_path.exists(): + skeleton_files = get_files_from_skeleton(skeleton_path) + expected_files = {f["path"] for f in skeleton_files if f.get("feature_paths")} + + # Validate each component + all_features = set() + designed_files = set() + + for comp_name, comp_data in containers.items(): + if not isinstance(comp_data, dict): + errors.append(f"Component '{comp_name}' must be an object") + continue + + # Support both "interfaces" (reference format) and "files" (old format) as container key + file_container = comp_data.get("interfaces", comp_data.get("files", {})) + if not isinstance(file_container, dict): + errors.append(f"Component '{comp_name}.interfaces/files' must be an object") + continue + + for file_path, file_data in file_container.items(): + stats["files"] += 1 + designed_files.add(file_path) + + if not isinstance(file_data, dict): + errors.append(f"File '{file_path}' data must be an object") + continue + + # Reference format: units, units_to_features, units_to_code at file level + units = file_data.get("units", []) + if not isinstance(units, list): + errors.append(f"File '{file_path}.units' must be a list") + else: + stats["units"] += len(units) + + units_to_features = file_data.get("units_to_features", {}) + if not isinstance(units_to_features, dict): + errors.append(f"File '{file_path}.units_to_features' must be an object") + else: + for unit_name, features in units_to_features.items(): + if isinstance(features, list): + all_features.update(features) + + units_to_code = file_data.get("units_to_code", {}) + if not isinstance(units_to_code, dict): + errors.append(f"File '{file_path}.units_to_code' must be an object") + + stats["features_mapped"] = len(all_features) + + # Check coverage + missing_files = expected_files - designed_files + if missing_files: + # This is a warning, not an error + pass + + is_valid = len(errors) == 0 + return is_valid, errors, stats + + +def validate_rpg_feature_paths(rpg_path: Path) -> Dict[str, Any]: + """Validate that feature nodes in RPG have proper meta.path assignments. + + Returns: + Dict with: + - features_with_path: count of features with valid meta.path + - features_without_path: count of features missing meta.path + - same_unit_edges: count of SAME_UNIT edges + - warnings: list of validation warnings + """ + result = { + "features_with_path": 0, + "features_without_path": 0, + "same_unit_edges": 0, + "warnings": [], + "is_valid": True + } + + if not rpg_path.exists(): + result["warnings"].append(f"RPG file not found: {rpg_path}") + result["is_valid"] = False + return result + + try: + with open(rpg_path, 'r', encoding='utf-8') as f: + data = json.load(f) + except json.JSONDecodeError as e: + result["warnings"].append(f"Invalid RPG JSON: {e}") + result["is_valid"] = False + return result + + # Count SAME_UNIT edges + for edge in data.get("edges", []): + if edge.get("relation") == "same_unit": + result["same_unit_edges"] += 1 + + # Traverse tree to find feature nodes + def check_node(node_data: Dict[str, Any]): + node_type = node_data.get("node_type") + meta = node_data.get("meta", {}) + name = node_data.get("name", "") + + # Check if this is a feature node (leaf level or node_type == "feature") + if node_type == "feature": + path = meta.get("path") if meta else None + if path: + result["features_with_path"] += 1 + else: + result["features_without_path"] += 1 + result["warnings"].append(f"Feature '{name}' missing meta.path") + + # Recurse into children + for child in node_data.get("children", []): + check_node(child) + + root = data.get("root") + if root: + check_node(root) + + # Mark as invalid if there are features without paths + if result["features_without_path"] > 0: + result["is_valid"] = False + + return result + + +def check_state(input_path: Path, output_path: Path) -> Dict[str, Any]: + """Check the current state and return execution guidance.""" + result = { + "type": "error", + "message": "", + "input_exists": input_path.exists(), + "input_valid": False, + "output_exists": output_path.exists(), + "output_valid": False, + "validation_errors": [], + "stats": {}, + "cross_validation": None + } + + # Check input (skeleton.json) + if not result["input_exists"]: + result["type"] = "error" + result["message"] = f"Input file not found: {input_path}. Please run /rpgkit.build_skeleton first." + return result + + input_valid, input_errors = validate_skeleton(input_path) + result["input_valid"] = input_valid + + if not input_valid: + result["type"] = "error" + result["message"] = "Invalid skeleton.json" + result["validation_errors"] = input_errors + return result + + # Get skeleton features for cross-validation + skeleton_features = get_all_features_from_skeleton(input_path) + result["input_statistics"] = { + "total_features": len(skeleton_features) + } + + # Check output (interfaces.json) + if not result["output_exists"]: + result["type"] = "init" + result["message"] = "Ready to design interfaces. No existing interfaces.json found." + return result + + output_valid, output_errors, stats = validate_interfaces(output_path, input_path) + result["output_valid"] = output_valid + result["stats"] = stats + + if not output_valid: + result["type"] = "init" + result["message"] = "Existing interfaces.json is invalid. Will regenerate." + result["validation_errors"] = output_errors + return result + + # Cross-validate features + interfaces_features = get_all_features_from_interfaces(output_path) + cross_validation = cross_validate_features(skeleton_features, interfaces_features) + result["cross_validation"] = cross_validation + + # Validate RPG feature paths + rpg_validation = validate_rpg_feature_paths(REPO_RPG_FILE) + result["rpg_validation"] = rpg_validation + + # Determine type based on cross-validation and RPG validation + if not cross_validation["is_consistent"]: + warning_count = len(cross_validation["warnings"]) + result["type"] = "warning" + result["message"] = f"interfaces.json exists but has {warning_count} feature mismatches with skeleton." + elif not rpg_validation["is_valid"]: + missing_count = rpg_validation["features_without_path"] + result["type"] = "warning" + result["message"] = f"interfaces.json valid but {missing_count} features in RPG missing meta.path." + else: + result["type"] = "update" + result["message"] = (f"Valid interfaces.json exists with {stats['units']} units across {stats['files']} files. " + f"RPG has {rpg_validation['features_with_path']} features with paths, " + f"{rpg_validation['same_unit_edges']} SAME_UNIT edges.") + + return result + + +def main(): + parser = argparse.ArgumentParser( + description="Check interfaces.json validity and state" + ) + parser.add_argument( + "--input", "-i", + type=Path, + default=SKELETON_FILE, + help=f"Input skeleton file (default: {SKELETON_FILE})" + ) + parser.add_argument( + "--output", "-o", + type=Path, + default=INTERFACES_FILE, + help=f"Output interfaces file to check (default: {INTERFACES_FILE})" + ) + parser.add_argument( + "--json", + action="store_true", + help="Output as JSON only" + ) + + args = parser.parse_args() + + result = check_state(args.input, args.output) + + if args.json: + print(json.dumps(result, indent=2)) + else: + print(f"\nState: {result['type']}") + print(f"Message: {result['message']}") + + if result['validation_errors']: + print("\nValidation Errors:") + for err in result['validation_errors']: + print(f" - {err}") + + if result['stats']: + print("\nStatistics:") + for key, value in result['stats'].items(): + print(f" {key}: {value}") + + return 0 + + +if __name__ == "__main__": + exit(main()) diff --git a/RPG-Kit/scripts/check_skeleton.py b/RPG-Kit/scripts/check_skeleton.py new file mode 100644 index 0000000..95e5dd4 --- /dev/null +++ b/RPG-Kit/scripts/check_skeleton.py @@ -0,0 +1,412 @@ +#!/usr/bin/env python3 +"""Check Skeleton Script. + +Inspect .rpgkit/skeleton.json and validate its structure. +Also cross-validate feature paths between refactor_feature.json and skeleton.json. + +Decision rules: +- Check if input file (refactor_feature.json) exists +- Check if output file (skeleton.json) exists and has required fields +- Validate skeleton tree structure +- Cross-validate feature paths between input and output + +The script prints EXACTLY ONE JSON object to stdout. +""" + +import json +import argparse +from pathlib import Path +from typing import Any, Dict, List, Set, Tuple + +# Import centralized paths +from common.paths import FEATURE_TREE_FILE as INPUT_FILE, SKELETON_FILE as OUTPUT_FILE + + +def load_json(path: Path) -> Dict[str, Any] | None: + """Load JSON file safely.""" + try: + with path.open("r", encoding="utf-8") as f: + data = json.load(f) + if isinstance(data, dict) and len(data) > 0: + return data + except Exception: + pass + return None + + +# ============================================================================ +# Feature Path Extraction from refactor_feature.json +# ============================================================================ + +def get_all_feature_paths_from_subtree(subtree: Dict[str, Any], prefix: str = "") -> List[str]: + """Extract all feature paths from a refactored_subtree. + + Features are the leaf nodes in the tree structure. + This mirrors the logic in build_skeleton.py. + """ + paths = [] + if isinstance(subtree, dict): + for key, value in subtree.items(): + # Skip 'description' keys as they are metadata + if key in ("description",): + continue + + new_prefix = f"{prefix}/{key}" if prefix else key + + if key == "children": + # 'children' is a container, recurse into it + paths.extend(get_all_feature_paths_from_subtree(value, prefix)) + elif isinstance(value, dict): + # Check if this is a leaf node (only has 'description') + if set(value.keys()) <= {"description"}: + paths.append(new_prefix) + else: + # Has more content, recurse + paths.extend(get_all_feature_paths_from_subtree(value, new_prefix)) + elif isinstance(value, list): + # List of leaf features + for item in value: + if isinstance(item, str): + paths.append(f"{new_prefix}/{item}" if new_prefix else item) + elif isinstance(item, dict): + paths.extend(get_all_feature_paths_from_subtree(item, new_prefix)) + else: + paths.append(new_prefix) + elif isinstance(subtree, list): + for item in subtree: + if isinstance(item, str): + paths.append(f"{prefix}/{item}" if prefix else item) + elif isinstance(item, dict): + paths.extend(get_all_feature_paths_from_subtree(item, prefix)) + + return paths + + +def get_features_from_refactor(data: Dict[str, Any]) -> Tuple[Set[str], Dict[str, List[str]]]: + """Extract all feature paths from refactor_feature.json. + + Returns: + - Set of all feature paths + - Dict mapping component name to its feature paths + """ + all_features = set() + features_by_component = {} + + components = data.get("components", []) + if not isinstance(components, list): + return all_features, features_by_component + + for comp in components: + comp_name = comp.get("name", "unknown") + subtree = comp.get("refactored_subtree", {}) + + # Get features with component prefix (as build_skeleton does) + comp_features = get_all_feature_paths_from_subtree(subtree, prefix=comp_name) + + features_by_component[comp_name] = comp_features + all_features.update(comp_features) + + return all_features, features_by_component + + +# ============================================================================ +# Feature Path Extraction from skeleton.json +# ============================================================================ + +def get_all_feature_paths_from_skeleton(node: Dict[str, Any]) -> Set[str]: + """Extract all feature paths from skeleton tree.""" + features = set() + + if node.get("type") == "file": + for fp in node.get("feature_paths", []): + features.add(fp) + else: + for child in node.get("children", []): + features.update(get_all_feature_paths_from_skeleton(child)) + + return features + + +# ============================================================================ +# Cross Validation +# ============================================================================ + +def cross_validate_features( + input_features: Set[str], + skeleton_features: Set[str] +) -> Dict[str, Any]: + """Cross-validate features between input (refactor_feature) and output (skeleton). + + Returns dict with: + - in_input_not_skeleton: features in refactor_feature but not in skeleton + - in_skeleton_not_input: features in skeleton but not in refactor_feature + - matched_count: number of matched features + - warnings: list of warning messages + """ + in_input_not_skeleton = input_features - skeleton_features + in_skeleton_not_input = skeleton_features - input_features + matched = input_features & skeleton_features + + warnings = [] + + # Generate warnings for missing features + for feat in sorted(in_input_not_skeleton): + warnings.append({ + "type": "missing_in_skeleton", + "feature": feat, + "message": f"Feature '{feat}' exists in refactor_feature.json but not in skeleton.json" + }) + + for feat in sorted(in_skeleton_not_input): + warnings.append({ + "type": "missing_in_input", + "feature": feat, + "message": f"Feature '{feat}' exists in skeleton.json but not in refactor_feature.json" + }) + + return { + "in_input_not_skeleton": sorted(list(in_input_not_skeleton)), + "in_skeleton_not_input": sorted(list(in_skeleton_not_input)), + "matched_count": len(matched), + "input_feature_count": len(input_features), + "skeleton_feature_count": len(skeleton_features), + "warnings": warnings, + "is_consistent": len(warnings) == 0 + } + + +# ============================================================================ +# Skeleton Structure Validation +# ============================================================================ + + +# ============================================================================ +# Skeleton Structure Validation +# ============================================================================ + +def count_files_in_tree(node: Dict[str, Any]) -> int: + """Count total files in skeleton tree.""" + if node.get("type") == "file": + return 1 + + count = 0 + for child in node.get("children", []): + count += count_files_in_tree(child) + + return count + + +def count_features_in_tree(node: Dict[str, Any]) -> int: + """Count total features in skeleton tree.""" + if node.get("type") == "file": + return len(node.get("feature_paths", [])) + + count = 0 + for child in node.get("children", []): + count += count_features_in_tree(child) + + return count + + +def get_all_files(node: Dict[str, Any]) -> List[Dict[str, Any]]: + """Extract all file nodes from skeleton tree.""" + files = [] + + if node.get("type") == "file": + files.append({ + "path": node.get("path", ""), + "feature_count": len(node.get("feature_paths", [])), + "component": node.get("component", "") + }) + else: + for child in node.get("children", []): + files.extend(get_all_files(child)) + + return files + + +def validate_tree_structure(node: Dict[str, Any], errors: List[str], path: str = "") -> bool: + """Validate skeleton tree structure recursively.""" + # Check required fields + if "type" not in node: + errors.append(f"Missing 'type' at {path or 'root'}") + return False + + if "name" not in node: + errors.append(f"Missing 'name' at {path or 'root'}") + return False + + node_type = node.get("type") + node_path = node.get("path", path) + + if node_type == "directory": + # Directories should have children + children = node.get("children", []) + if not isinstance(children, list): + errors.append(f"Invalid 'children' at {node_path}") + return False + + # Recursively validate children + for child in children: + validate_tree_structure(child, errors, f"{node_path}/{child.get('name', '?')}") + + elif node_type == "file": + # Files should have feature_paths + features = node.get("feature_paths") + if features is not None and not isinstance(features, list): + errors.append(f"Invalid 'feature_paths' at {node_path}") + return False + + else: + errors.append(f"Unknown type '{node_type}' at {node_path}") + return False + + return True + + +def inspect_state() -> Dict[str, Any]: + """Inspect skeleton state and return decision object.""" + # Check input file + input_exists = INPUT_FILE.exists() + input_data = load_json(INPUT_FILE) if input_exists else None + input_valid = input_data is not None and "components" in input_data + + # Extract features from input + input_features = set() + features_by_component = {} + if input_valid and input_data: + input_features, features_by_component = get_features_from_refactor(input_data) + + # Check output file + output_exists = OUTPUT_FILE.exists() + output_data = load_json(OUTPUT_FILE) if output_exists else None + + # Validate output structure + output_valid = False + validation_errors = [] + statistics = {} + files = [] + skeleton_features = set() + cross_validation = None + + if output_data: + # Check required fields + required_fields = ["repository_name", "root"] + missing_fields = [f for f in required_fields if f not in output_data] + + if not missing_fields: + root = output_data.get("root", {}) + + # Validate tree structure + validate_tree_structure(root, validation_errors) + + if not validation_errors: + output_valid = True + + # Extract features from skeleton + skeleton_features = get_all_feature_paths_from_skeleton(root) + + # Collect statistics + statistics = { + "total_files": count_files_in_tree(root), + "total_features": count_features_in_tree(root), + "components": list(output_data.get("component_directories", {}).keys()) + } + + # Get file list + files = get_all_files(root) + + # Cross-validate features if both input and output are valid + if input_valid: + cross_validation = cross_validate_features(input_features, skeleton_features) + else: + validation_errors.append(f"Missing required fields: {missing_fields}") + + # Determine type and message + if not input_valid: + type_value = "error" + message = "Input file missing or invalid. Run /rpgkit.refactor_feature first." + elif not output_exists or not output_valid: + type_value = "init" + message = "Ready to build skeleton." + else: + # Check cross-validation results + if cross_validation and not cross_validation["is_consistent"]: + type_value = "warning" + warning_count = len(cross_validation["warnings"]) + message = f"Skeleton exists but has {warning_count} feature mismatches." + else: + type_value = "update" + message = "Skeleton exists and is consistent. Regenerate?" + + result = { + "type": type_value, + "message": message, + "input_file": str(INPUT_FILE), + "output_file": str(OUTPUT_FILE), + "input_exists": input_exists, + "input_valid": input_valid, + "output_exists": output_exists, + "output_valid": output_valid, + "validation_errors": validation_errors, + "statistics": statistics, + "files": files[:10], # First 10 files for preview + "files_total": len(files), + "cross_validation": cross_validation, + } + + # Add next_action for clear guidance + if type_value == "init": + result["next_action"] = "python3 .rpgkit/scripts/build_skeleton.py --max-iterations 10" + elif type_value == "warning": + result["next_action"] = "python3 .rpgkit/scripts/build_skeleton.py --patch" + else: + result["next_action"] = "Skeleton is consistent. Proceed to next step." + + # Add input feature count for reference + if input_valid: + result["input_statistics"] = { + "total_features": len(input_features), + "components": list(features_by_component.keys()), + "features_by_component": { + comp: len(feats) for comp, feats in features_by_component.items() + } + } + + return result + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Check skeleton file state" + ) + parser.add_argument( + "--verbose", "-v", + action="store_true", + help="Include detailed file list and all feature mismatches" + ) + + args = parser.parse_args() + + result = inspect_state() + + # In verbose mode, include all files and feature details + if args.verbose and result.get("output_valid"): + output_data = load_json(OUTPUT_FILE) + if output_data: + result["files"] = get_all_files(output_data.get("root", {})) + + # Include full feature lists in verbose mode + if result.get("input_valid"): + input_data = load_json(INPUT_FILE) + if input_data: + input_features, features_by_component = get_features_from_refactor(input_data) + result["input_features_detail"] = { + comp: sorted(feats) for comp, feats in features_by_component.items() + } + + print(json.dumps(result, ensure_ascii=False, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/RPG-Kit/scripts/check_tasks.py b/RPG-Kit/scripts/check_tasks.py new file mode 100644 index 0000000..8b89913 --- /dev/null +++ b/RPG-Kit/scripts/check_tasks.py @@ -0,0 +1,383 @@ +#!/usr/bin/env python3 +"""Check Tasks Script - Validation for tasks.json. + +Validates the tasks.json file and determines the execution state: +- "error": Input file missing or invalid +- "init": No tasks.json exists or it's invalid +- "warning": tasks.json exists but has unit mismatches with interfaces +- "update": Valid tasks.json exists and is consistent + +Cross-validates units between interfaces.json and tasks.json. + +Returns JSON with validation status and statistics. +""" + +import json +import argparse +from pathlib import Path +from typing import Dict, Any, List, Tuple, Set + +# Import centralized paths +from common.paths import INTERFACES_FILE as INPUT_FILE, TASKS_FILE as OUTPUT_FILE + + +def validate_interfaces(interfaces_path: Path) -> Tuple[bool, List[str]]: + """Validate that interfaces.json exists and is valid.""" + errors = [] + + if not interfaces_path.exists(): + errors.append(f"Input file not found: {interfaces_path}") + return False, errors + + try: + with open(interfaces_path, 'r', encoding='utf-8') as f: + data = json.load(f) + except json.JSONDecodeError as e: + errors.append(f"Invalid JSON: {e}") + return False, errors + + # Check required structure - support both "subtrees" (new) and "components" (old) + containers = data.get("subtrees", data.get("components", {})) + if not containers: + errors.append("Missing required field: 'subtrees' or 'components'") + return False, errors + + if not isinstance(containers, dict): + errors.append("'subtrees'/'components' must be an object") + return False, errors + + return True, errors + + +def get_all_units_from_interfaces(interfaces_path: Path) -> Set[str]: + """Extract all unit identifiers from interfaces.json (file_path::unit_name).""" + with open(interfaces_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + units = set() + # Support both "subtrees" (new format) and "components" (old format) + containers = data.get("subtrees", data.get("components", {})) + for comp_name, comp_data in containers.items(): + # Support both "interfaces" (new format) and "files" (old format) + file_container = comp_data.get("interfaces", comp_data.get("files", {})) + for file_path, file_data in file_container.items(): + for unit_name in file_data.get("units", []): + units.add(f"{file_path}::{unit_name}") + return units + + +def get_all_units_from_tasks(tasks_path: Path) -> Set[str]: + """Extract all unit identifiers from tasks.json (file_path::unit_name). + + Supports both formats: + - planned_tasks_dict: {component: {file_path: [task, ...]}} + - batches: [{batch_id, units: [{file_path, unit_name}, ...]}] + """ + with open(tasks_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + units = set() + + # Support planned_tasks_dict format + if "planned_tasks_dict" in data: + for component_name, files_dict in data["planned_tasks_dict"].items(): + for file_path, task_list in files_dict.items(): + for task in task_list: + # units_key contains the unit names + for unit_name in task.get("units_key", []): + units.add(f"{file_path}::{unit_name}") + # Support batches format (legacy) + elif "batches" in data: + for batch in data.get("batches", []): + for unit in batch.get("units", []): + file_path = unit.get("file_path", "") + unit_name = unit.get("unit_name", "") + if file_path and unit_name: + units.add(f"{file_path}::{unit_name}") + + return units + + +def cross_validate_units(interfaces_units: Set[str], tasks_units: Set[str]) -> Dict[str, Any]: + """Cross-validate units between interfaces and tasks. + + Returns dict with: + - in_interfaces_not_tasks: units in interfaces but not in tasks + - in_tasks_not_interfaces: units in tasks but not in interfaces + - matched_count: number of matched units + - warnings: list of warning messages + """ + in_interfaces_not_tasks = interfaces_units - tasks_units + in_tasks_not_interfaces = tasks_units - interfaces_units + matched = interfaces_units & tasks_units + + warnings = [] + + for unit in sorted(in_interfaces_not_tasks): + warnings.append({ + "type": "missing_in_tasks", + "unit": unit, + "message": f"Unit '{unit}' exists in interfaces.json but not in tasks.json" + }) + + for unit in sorted(in_tasks_not_interfaces): + warnings.append({ + "type": "missing_in_interfaces", + "unit": unit, + "message": f"Unit '{unit}' exists in tasks.json but not in interfaces.json" + }) + + return { + "in_interfaces_not_tasks": sorted(list(in_interfaces_not_tasks)), + "in_tasks_not_interfaces": sorted(list(in_tasks_not_interfaces)), + "matched_count": len(matched), + "interfaces_unit_count": len(interfaces_units), + "tasks_unit_count": len(tasks_units), + "warnings": warnings, + "is_consistent": len(warnings) == 0 + } + + +def validate_tasks(tasks_path: Path) -> Tuple[bool, List[str], Dict[str, Any]]: + """Validate tasks.json structure and content. + + Supports both formats: + - planned_tasks_dict: {component: {file_path: [task, ...]}} + - batches: [{batch_id, units: [{file_path, unit_name}, ...]}] + """ + errors = [] + stats = { + "total_tasks": 0, + "total_units": 0, + "files_touched": 0, + "components": [] # Use list instead of set for JSON serialization + } + components_set = set() # Track components internally + all_files = set() + total_units = 0 + + if not tasks_path.exists(): + errors.append(f"Output file not found: {tasks_path}") + return False, errors, stats + + try: + with open(tasks_path, 'r', encoding='utf-8') as f: + data = json.load(f) + except json.JSONDecodeError as e: + errors.append(f"Invalid JSON: {e}") + return False, errors, stats + + # Support planned_tasks_dict format (primary) + if "planned_tasks_dict" in data: + planned_tasks = data["planned_tasks_dict"] + if not isinstance(planned_tasks, dict): + errors.append("'planned_tasks_dict' must be an object") + return False, errors, stats + + stats["total_tasks"] = len(planned_tasks) + + for component_name, files_dict in planned_tasks.items(): + components_set.add(component_name) + + if not isinstance(files_dict, dict): + errors.append(f"Component '{component_name}' must contain a files object") + continue + + for file_path, task_list in files_dict.items(): + all_files.add(file_path) + + if not isinstance(task_list, list): + errors.append(f"Tasks for '{file_path}' must be a list") + continue + + for i, task in enumerate(task_list): + if not isinstance(task, dict): + errors.append(f"Task {i+1} in '{file_path}' must be an object") + continue + + # Check required fields in task + if "task_id" not in task: + errors.append(f"Task {i+1} in '{file_path}' missing 'task_id'") + + if "units_key" not in task: + errors.append(f"Task {i+1} in '{file_path}' missing 'units_key'") + else: + units_key = task.get("units_key", []) + if isinstance(units_key, list): + total_units += len(units_key) + + stats["total_units"] = total_units + + # Support batches format (legacy) + elif "batches" in data: + batches = data.get("batches", []) + if not isinstance(batches, list): + errors.append("'batches' must be a list") + return False, errors, stats + + stats["total_tasks"] = len(batches) + stats["total_units"] = data.get("total_units", 0) + + for i, batch in enumerate(batches): + if not isinstance(batch, dict): + errors.append(f"Batch {i+1} must be an object") + continue + + if "batch_id" not in batch: + errors.append(f"Batch {i+1} missing 'batch_id'") + + if "units" not in batch: + errors.append(f"Batch {i+1} missing 'units'") + continue + + units = batch.get("units", []) + if not isinstance(units, list): + errors.append(f"Batch {i+1} 'units' must be a list") + continue + + for j, unit in enumerate(units): + if not isinstance(unit, dict): + errors.append(f"Batch {i+1}, unit {j+1} must be an object") + continue + + if "unit_name" not in unit: + errors.append(f"Batch {i+1}, unit {j+1} missing 'unit_name'") + + if "file_path" not in unit: + errors.append(f"Batch {i+1}, unit {j+1} missing 'file_path'") + else: + all_files.add(unit["file_path"]) + + if "component" in unit: + components_set.add(unit["component"]) + + if "files" in batch: + files = batch.get("files", []) + if isinstance(files, list): + all_files.update(files) + else: + errors.append("Missing required field: 'planned_tasks_dict' or 'batches'") + return False, errors, stats + + stats["files_touched"] = len(all_files) + stats["components"] = sorted(list(components_set)) + + is_valid = len(errors) == 0 + return is_valid, errors, stats + + +def check_state(input_path: Path, output_path: Path) -> Dict[str, Any]: + """Check the current state and return execution guidance.""" + result = { + "type": "error", + "message": "", + "input_exists": input_path.exists(), + "input_valid": False, + "output_exists": output_path.exists(), + "output_valid": False, + "validation_errors": [], + "stats": {}, + "cross_validation": None + } + + # Check input (interfaces.json) + if not result["input_exists"]: + result["type"] = "error" + result["message"] = f"Input file not found: {input_path}. Please run /rpgkit.design_interfaces first." + return result + + input_valid, input_errors = validate_interfaces(input_path) + result["input_valid"] = input_valid + + if not input_valid: + result["type"] = "error" + result["message"] = "Invalid interfaces.json" + result["validation_errors"] = input_errors + return result + + # Get interfaces units for cross-validation + interfaces_units = get_all_units_from_interfaces(input_path) + result["input_statistics"] = { + "total_units": len(interfaces_units) + } + + # Check output (tasks.json) + if not result["output_exists"]: + result["type"] = "init" + result["message"] = "Ready to plan tasks. No existing tasks.json found." + return result + + output_valid, output_errors, stats = validate_tasks(output_path) + result["output_valid"] = output_valid + result["stats"] = stats + + if not output_valid: + result["type"] = "init" + result["message"] = "Existing tasks.json is invalid. Will regenerate." + result["validation_errors"] = output_errors + return result + + # Cross-validate units + tasks_units = get_all_units_from_tasks(output_path) + cross_validation = cross_validate_units(interfaces_units, tasks_units) + result["cross_validation"] = cross_validation + + # Determine type based on cross-validation + if not cross_validation["is_consistent"]: + warning_count = len(cross_validation["warnings"]) + result["type"] = "warning" + result["message"] = f"tasks.json exists but has {warning_count} unit mismatches with interfaces." + else: + result["type"] = "update" + result["message"] = f"Valid tasks.json exists with {stats['total_tasks']} tasks and {stats['total_units']} units." + + return result + + +def main(): + parser = argparse.ArgumentParser( + description="Check tasks.json validity and state" + ) + parser.add_argument( + "--input", + type=Path, + default=INPUT_FILE, + help="Input interfaces.json file" + ) + parser.add_argument( + "--output", + type=Path, + default=OUTPUT_FILE, + help="Output tasks.json file to check" + ) + parser.add_argument( + "--json", + action="store_true", + help="Output as JSON only" + ) + + args = parser.parse_args() + + result = check_state(args.input, args.output) + + if args.json: + print(json.dumps(result, indent=2)) + else: + print(f"\nState: {result['type']}") + print(f"Message: {result['message']}") + + if result['validation_errors']: + print("\nValidation Errors:") + for err in result['validation_errors']: + print(f" - {err}") + + if result['stats']: + print("\nStatistics:") + for key, value in result['stats'].items(): + print(f" {key}: {value}") + + return 0 + + +if __name__ == "__main__": + exit(main()) diff --git a/RPG-Kit/scripts/code_gen/__init__.py b/RPG-Kit/scripts/code_gen/__init__.py new file mode 100644 index 0000000..527eb57 --- /dev/null +++ b/RPG-Kit/scripts/code_gen/__init__.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 +"""Code generation utilities used by ``scripts/run_batch.py`` and friends. + +This package groups the libraries that drive the ``/rpgkit.code_gen`` +pipeline: + +* :mod:`scripts.code_gen.prompts` โ€” prompt templates +* :mod:`scripts.code_gen.test_runner` โ€” pytest execution helpers +* :mod:`scripts.code_gen.test_output_parser` โ€” unified test-output analysis +* :mod:`scripts.code_gen.rpg_updater` โ€” post-batch RPG mutation +* :mod:`scripts.code_gen.context_collector` โ€” dep / interface context +* :mod:`scripts.code_gen.static_checks` โ€” lightweight pre-LLM checks +* :mod:`scripts.code_gen.subtree_review` โ€” LLM review of completed subtrees + +The package deliberately exposes **no** re-exports. Callers import from +the specific submodule (``from code_gen.prompts import ...``) to keep +dependency edges explicit and to avoid lying about which functions are +really part of a stable public API. +""" diff --git a/RPG-Kit/scripts/code_gen/_constants.py b/RPG-Kit/scripts/code_gen/_constants.py new file mode 100644 index 0000000..6378c15 --- /dev/null +++ b/RPG-Kit/scripts/code_gen/_constants.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +"""Shared constants for the codegen pipeline. + +These were duplicated across ``run_batch.py`` and the extracted +``code_gen.*`` modules (4 copies of ``DEFAULT_TEST_TIMEOUT``, +3 of ``DEFAULT_PYTEST_OVERALL_TIMEOUT``, 2 of ``DEFAULT_AGENT_TIMEOUT``). +Centralising them here makes timeout tuning a single-line change. + +Why not ``common.paths``? ``common.paths`` is shared across the +encoder, decoder, and agent layers; these timeout values are specific +to the codegen / TDD loop and don't belong in that broader namespace. +""" + +from __future__ import annotations + + +# --------------------------------------------------------------------------- +# Sub-agent dispatch timeouts +# --------------------------------------------------------------------------- + +# Maximum wall-clock time per sub-agent LLM session (seconds). +# 2700s = 45 minutes โ€” enough for a full TDD iteration loop with retries. +# Overridable per-call via the ``timeout=`` keyword on +# :func:`code_gen.sub_agent.dispatch_sub_agent` and via the +# ``--agent-timeout`` CLI flag in ``run_batch.py``. +DEFAULT_AGENT_TIMEOUT = 2700 + + +# --------------------------------------------------------------------------- +# pytest invocation timeouts +# --------------------------------------------------------------------------- + +# Per-test-function timeout passed to ``pytest --timeout=`` (seconds). +# This is the real hang-prevention mechanism โ€” any single test that +# blocks longer than this is killed. +DEFAULT_TEST_TIMEOUT = 30 + +# Overall pytest-invocation wall-clock budget (seconds). +# Acts as a safety net on top of ``DEFAULT_TEST_TIMEOUT``: kills a +# frozen pytest process even if individual test-level timeouts don't +# fire (rare, but possible on collection / fixture errors). +# 1800s = 30 minutes is generous even for 1000+ test suites. +DEFAULT_PYTEST_OVERALL_TIMEOUT = 1800 diff --git a/RPG-Kit/scripts/code_gen/batch_prompts.py b/RPG-Kit/scripts/code_gen/batch_prompts.py new file mode 100644 index 0000000..29a4cee --- /dev/null +++ b/RPG-Kit/scripts/code_gen/batch_prompts.py @@ -0,0 +1,736 @@ +#!/usr/bin/env python3 +"""Per-batch TDD prompt assembly for the codegen pipeline. + +This module hosts the prompt-builder helpers extracted from +``scripts/run_batch.py`` Module 1 ("Prompt Builder"). They assemble +the full prompt that ``run_batch``'s sub-agent receives for a single +batch (test code + production code + pytest cmd + dependency context). + +Distinct from :mod:`scripts.code_gen.prompts`, which contains the +*pure-template* strings (``init_test_gen_prompt``, ``test_fix_prompt``, +``FAILURE_ANALYSIS_PROMPT``, โ€ฆ). This module assembles those templates +plus batch-specific runtime context (venv python path, dep_graph, +import conventions, โ€ฆ) into the final TDD batch prompt. + +Internal to the codegen package; no external API contract. +""" + +from __future__ import annotations + +import logging +import shutil +import sys +from pathlib import Path +from typing import Any, Dict, List, Optional + +from common.execution_state import BatchExecutionState, load_code_gen_state +from common.import_normalizer import build_import_convention_snippet +from common.paths import ( + CODE_GEN_STATE_FILE as STATE_FILE, + REPO_RPG_FILE, + TASKS_FILE, + get_scripts_dir, +) +from common.task_batch import PlannedTask, load_tasks_from_tasks_json +from code_gen.prompts import ( + _format_dependency_context, + is_project_docs_batch, + is_project_file_batch, +) +from code_gen.sub_agent import truncate_test_output +from code_gen.test_runner import ( + find_related_test_files, + get_dev_python, + get_dev_venv_path, +) + +logger = logging.getLogger(__name__) + + +from code_gen._constants import DEFAULT_TEST_TIMEOUT # noqa: E402 + +# Sub-agent internal TDD-loop iteration cap (enforced inside the +# generated prompt; not used to drive any Python-side loop). +MAX_ITERATIONS = 5 + + +# ============================================================================ +# Prompt Templates +# ============================================================================ + +TDD_BATCH_PREAMBLE = """\ +# TDD Batch Implementation + +You are an autonomous coding agent completing a single implementation batch +inside a structured TDD workflow. You have **full project access** and must +self-manage the entire write โ†’ test โ†’ fix cycle. + +## โ”€โ”€ Workflow โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +Follow these steps IN ORDER. Do not skip steps. + +### Step 1: Understand Context +- Read the target source file (skeleton may exist with interface stubs). +- Read dependency files listed in the Project Context section below. +- **Explore beyond listed dependencies.** Read any source file that your target + file imports or will interact with. If a completed module provides functions + your code should call, read that module to understand its actual API + (function signatures, return types, class interfaces). Do NOT guess โ€” read + the real code. +- If your target file produces output consumed by other modules (e.g., generates + HTML with CSS classes, returns data structures), read those consuming modules + to ensure compatibility. +- Read existing test files in `tests/` to understand conventions. +- Read `requirements.txt` if it exists. +- **UI/View code quality:** If you are implementing code that generates HTML, + renders pages, produces visual output, or defines styles/CSS: + - Ensure all HTML pages use the shared layout (head, nav, footer) consistently + - Use the CSS class names already defined in the stylesheet โ€” read the style + module first and use its exact class names in your HTML + - Wrap content in proper layout containers (e.g., `.container`, `
`) + - Produce complete, production-quality pages โ€” not minimal stubs + - Include proper form structure (labels, fieldsets, CSRF tokens where needed) + - All pages should look like they belong to the same application + - If the project needs static assets (CSS files, templates, images) that don't + exist yet, create them. You have permission to create any project files needed. + - **Layout verification:** After writing layout code (CSS grid/flex for web, + layout managers for GUI), verify the structure is correct: + - For CSS grid/flex: count child elements vs column/row definitions. + Example bug: `grid-template-columns: 1fr 300px` with 3 children (h1, + content, sidebar) โ€” h1 takes column 1, content gets pushed to 300px column. + Fix: add `grid-column: 1 / -1` to spanning elements, or restructure HTML. + - For GUI: verify widgets are placed in the correct parent container and + pack/grid/place calls produce the intended layout. + - **Content display:** The primary content area of every screen must show + meaningful content. Never leave the main area empty while content is + squeezed into a sidebar, toolbar, or secondary panel. +- **User-facing output quality:** Regardless of project type (web, GUI, CLI), + all user-facing output should be polished and professional: + - CLI tools: use clear formatting, aligned columns, colors/bold where helpful, + progress indicators for long operations, and helpful error messages + - GUI apps: consistent widget styling, proper layout management, sensible + defaults, and intuitive navigation + - Web apps: consistent page layout, working navigation, styled forms, and + responsive basics (viewport meta tag, flexible widths) + +### Step 2: Write Tests +{test_instructions} + +### Step 3: Write Implementation +{code_instructions} + +### Step 4: Run Tests +Run ONLY this command (no variations): +``` +{pytest_cmd} +``` +**CRITICAL**: This command runs ALL tests in the `tests/` directory, +not just the ones you wrote in this batch. Your new code must pass +ALL pre-existing tests as well as your new ones. +If pre-existing tests fail after your changes, your code has a bug โ€” +fix YOUR code, not the pre-existing tests (unless the test itself is +clearly wrong based on the skeleton). + +### Step 5: Analyze & Fix (if tests fail) +- Read the FULL pytest output carefully. +- Determine root cause: test bug, code bug, import error, or dependency issue. +- Fix the appropriate file(s). You MAY fix: + - Test files (wrong assertions, bad mocks, missing imports) + - Source files (logic bugs, missing methods, wrong signatures) + - Other project files (broken imports, missing `__init__.py`) + - requirements.txt (missing third-party package) +- After fixing, re-run the EXACT SAME pytest command from Step 4. + +### Step 6: Repeat Steps 4โ€“5 +- Maximum **{max_iterations} iterations** of test โ†’ fix โ†’ test. +- If tests pass, proceed to Step 7 immediately. +- If after {max_iterations} iterations tests still fail, proceed to Step 7 anyway. + +### Step 7: Save & Report +Commit with a conventional-commit message describing what you implemented: +``` +git add -A +git commit -m "feat(): " \\ + -m "" \\ + -m "Target: {file_path}" \\ + -m "Units: {units}" \\ + -m "Batch-Id: {batch_id}" +``` +The subject line MUST follow this format: `feat(): ` +The body MUST include a bullet list of what was implemented/changed. +Examples: +``` +git add -A +git commit -m "feat(auth/routes): implement LoginHandler with JWT authentication" \\ + -m "- Add LoginHandler class with login/logout/refresh endpoints +- Implement JWT token generation with configurable expiry +- Add password hashing with bcrypt" \\ + -m "Target: src/personal-blog-system/auth/routes.py" \\ + -m "Units: LoginHandler" \\ + -m "Batch-Id: {batch_id}" +``` + +## Exit Protocol โ€” How to Report Your Result + +The final two lines of your response MUST follow this exact shape so the +runner can verify your claim: + +``` +PYTEST_SUMMARY: +BATCH_RESULT: PASS +``` + +or on failure: + +``` +PYTEST_SUMMARY: +BATCH_RESULT: FAIL | +``` + +The `PYTEST_SUMMARY` line must be the *literal* one-line summary that +pytest printed, e.g. `5 passed in 0.42s`, `2 passed, 1 failed in 1.30s`, +`1 failed, 1 error in 0.55s`. Copy it verbatim from the run you just +performed; do NOT invent it. This lets the runner cross-check your +claim against an independent re-run. + +## โ”€โ”€ Capabilities โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +[OK] You CAN: +- Read/write any file under `src/`, `tests/`, `static/`, `templates/`, and `examples/` + (Python, HTML, CSS, JavaScript, JSON, YAML, config files, etc.) +- Create new directories and files if needed (e.g., `static/css/`, `templates/`) +- Read any file in the repo for context +- Run: `{pytest_cmd}` (this exact command only) +- Run: `{pip_install_cmd} install ` to install missing packages +- Update `requirements.txt` when adding new dependencies +- Fix import errors in ANY source file (not just the target) +- Run: `git add -A && git commit -m ""` + +[FAIL] You MUST NOT: +- Modify or read files under `.rpgkit/` +- Run any `.rpgkit/scripts/*.py` commands +- Run arbitrary shell commands beyond pytest/pip/git listed above +- Install packages that are not genuinely needed by the source code +- Delete files that are not part of your task +- Run pytest without `--timeout` flag (already included in the command) + +## โ”€โ”€ Pytest Rules (CRITICAL) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +1. **Always use the EXACT pytest command provided** โ€” it has timeout flags + to prevent hanging tests. +2. **Do not manually run a different pytest command** โ€” the provided command + already targets the correct test files for this batch. +3. If a test times out or hangs, the test is wrong. Fix the test: + - Remove infinite loops, blocking I/O, or `time.sleep()` calls + - Mock any external resources (network, filesystem, GPU) + - Ensure all fixtures have finite setup/teardown +4. **Do not write tests that depend on timing** (real-time waits). + Use mocks or `unittest.mock.patch` for time-dependent behavior. +5. **Do not write tests that spawn subprocesses or servers.** +6. **Output control:** Use `-x` (stop at first failure) and `--tb=short` + to keep output manageable. Focus on the FIRST failure. + +## โ”€โ”€ Test Quality Rules โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +- Use `MagicMock(spec=RealClass)` or `create_autospec()`, never bare `MagicMock()`. +- For numeric/math operations: use real values (`np.array(...)`, `4.0`), not mocks. +- Mock at boundaries (I/O, external deps), not internal implementation. +- Keep tests deterministic โ€” no random data without fixed seeds. +- Test count: proportional to task complexity. Small task = 3โ€“8 tests. + Do NOT over-engineer with 20+ tests for a simple class. + +## โ”€โ”€ Dependency Management โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +When you encounter `ModuleNotFoundError` or `ImportError` for a third-party package: +1. Install it: `{pip_install_cmd} install ` +2. Verify by re-running pytest. +3. Append the package to `requirements.txt` (create the file if it doesn't exist). + +{import_convention} + +## โ”€โ”€ Project Context โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +{dependency_context} + +## โ”€โ”€ Task Details โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +**Batch ID:** {batch_id} +**Target file:** {file_path} +**Units to implement:** [{units}] +**Task type:** {task_type} +""" + +TDD_RESUME_PREAMBLE = """\ +# TDD Batch โ€” Resume After Previous Failure + +A previous attempt at this batch failed. Code may be partially written. +Your job is to **continue from where it left off** and make tests pass. + +## Previous Failure Info +**Attempt:** {attempt_number} +**Failure reason:** {failure_reason} +{post_verify_section} +## Previous Test Output (last pytest run) +``` +{last_test_output} +``` + +## Instructions +1. Review what has already been written (read modified files). +2. Run the pytest command to see current status. +3. If tests fail โ†’ fix the **production code** first, then re-run pytest. +4. **Do NOT silence failures by editing tests** โ€” the tests in `tests/` + describe the contract. Only modify a test if you can show it is + logically wrong (wrong expected value, wrong fixture, etc.) and + document the reason in your reply. +5. If tests pass โ†’ commit, then exit with the **Exit Protocol** below. + +## Exit Protocol (same as the original task) +The final two lines of your response MUST be: +``` +PYTEST_SUMMARY: +BATCH_RESULT: PASS # or FAIL | +``` +The `PYTEST_SUMMARY` must be copied verbatim from your pytest run. + +All other rules from the original task apply (capabilities, constraints, +pytest rules, etc). The full original task is included below. +""" + +TDD_PROJECT_FILE_PREAMBLE = """\ +# Project File Generation Task + +You are creating a project file as part of a finalization workflow. +The core implementation is already complete. + +## Your Capabilities +[OK] You CAN: +- Read any file in the repo to understand the codebase +- Create or update the requested project file(s) +- Run validation commands as specified below + +[FAIL] You MUST NOT: +- Modify existing source code or test files +- Modify or read files under `.rpgkit/` +- Run any `.rpgkit/scripts/*.py` commands + +## Task Details + +**Batch ID:** {batch_id} +**Task type:** {task_type} + +{code_prompt} + +## Exit Protocol +When finished, on the LAST line of your response write: +- Success: `BATCH_RESULT: PASS` +- Failure: `BATCH_RESULT: FAIL | ` +""" + +TDD_DOCS_PREAMBLE = """\ +# Documentation Generation Task + +You are creating documentation for the project. No tests are needed. + +## Your Capabilities +[OK] You CAN: +- Read any file in the repo to understand the codebase +- Create or update documentation files (README.md, docs/, etc.) + +[FAIL] You MUST NOT: +- Modify existing source code or test files +- Modify or read files under `.rpgkit/` + +## Task Details + +**Batch ID:** {batch_id} + +{code_prompt} + +## Exit Protocol +When finished, on the LAST line of your response write: +`BATCH_RESULT: PASS` +""" + + +# ============================================================================ +# Builder functions +# ============================================================================ + +def build_batch_pytest_cmd( + test_files: List[str], + venv_python: str, + per_test_timeout: int = DEFAULT_TEST_TIMEOUT, +) -> str: + """Build a pytest command with timeout protection. + + Args: + test_files: Test files to run (empty โ†’ tests/). + venv_python: Path to venv python executable. + per_test_timeout: Max seconds per individual test function. + + Returns: + Shell command string ready for the sub-agent to copy-paste. + """ + files_str = " ".join(test_files) if test_files else "tests/" + return ( + f"{venv_python} -m pytest {files_str} " + f"-x --tb=short -q " + f"--timeout={per_test_timeout} " + f"--timeout-method=thread " + f"-W ignore::DeprecationWarning" + ) + + +def _build_pip_install_cmd(repo_path: Path) -> str: + """Build the pip install command prefix for the dev venv.""" + venv_path = get_dev_venv_path(repo_path) + uv = shutil.which("uv") + if uv: + py = get_dev_python(repo_path) or str(venv_path / "bin" / "python") + return f"uv pip --python {py}" + else: + if sys.platform == "win32": + return str(venv_path / "Scripts" / "pip") + return str(venv_path / "bin" / "pip") + + +def _build_api_summary(repo_path: Path, source_files: List[str], max_chars: int = 4000) -> str: + """Extract public API signatures from top-level definitions in source files. + + Used to inject API context into test-writing batches (final_test_docs, wiring) + so the sub-agent doesn't guess function signatures. + + Args: + repo_path: Project repo root path. + source_files: List of source file paths (relative to repo_path). + max_chars: Maximum output length before truncation. + + Returns: + Formatted string of file โ†’ class/function signatures. + """ + import ast as _ast + + summaries = [] + for filepath in sorted(source_files): + full_path = repo_path / filepath + if not full_path.exists() or full_path.suffix != '.py': + continue + try: + tree = _ast.parse(full_path.read_text(encoding='utf-8')) + except (SyntaxError, UnicodeDecodeError): + continue + + file_sigs = [] + for node in tree.body: + if isinstance(node, _ast.ClassDef): + if node.name.startswith('_'): + continue + methods = [ + n.name for n in node.body + if isinstance(n, (_ast.FunctionDef, _ast.AsyncFunctionDef)) + and not n.name.startswith('_') + ] + methods_str = ', '.join(methods) if methods else '(dataclass)' + file_sigs.append(f" class {node.name}: {methods_str}") + elif isinstance(node, (_ast.FunctionDef, _ast.AsyncFunctionDef)): + if node.name.startswith('_'): + continue + args = [a.arg for a in node.args.args if a.arg != 'self'] + ret = _ast.unparse(node.returns) if node.returns else '' + ret_str = f" -> {ret}" if ret else "" + file_sigs.append(f" def {node.name}({', '.join(args)}){ret_str}") + + if file_sigs: + summaries.append(f"# {filepath}\n" + "\n".join(file_sigs)) + + result = "\n\n".join(summaries) + if len(result) > max_chars: + result = result[:max_chars] + "\n# ... (truncated)" + return result + + +def _build_dep_graph_context_str(file_path: str, repo_path: Path) -> str: + """Build a dep_graph context string for prompt injection. + + Loads the RPG and dep_graph, extracts dependency info for the file, + and formats it as a markdown section. Returns empty string on any error + or when no dep_graph is available. + """ + try: + import os + scripts_dir = Path(get_scripts_dir()) + if str(scripts_dir) not in sys.path: + sys.path.insert(0, str(scripts_dir)) + from rpg.service import RPGService + + rpg_path = REPO_RPG_FILE + if not rpg_path.exists(): + return "" + + svc = RPGService.load(str(rpg_path)) + if svc.rpg.dep_graph is None: + return "" + + # Resolve file_path to a dep_graph node ID. + # Task file_path may differ from dep_graph node IDs (e.g. + # task uses 'src/flask_blog/models/user.py' while dep_graph + # uses 'models/user.py'). Try multiple candidates. + G = svc.rpg.dep_graph.G + candidates = [file_path] + code_dir = svc.rpg._dep_graph_code_dir + if code_dir: + candidates.append(code_dir.rstrip("/") + "/" + file_path) + # Also try matching by filename suffix + fname = os.path.basename(file_path) + for nid in G.nodes: + if G.nodes[nid].get("type") == "file" and nid.endswith("/" + fname): + candidates.append(nid) + + resolved = None + for c in candidates: + if c in G.nodes: + resolved = c + break + + if resolved is None: + return "" + + ctx = svc.get_dep_context_for_batch([resolved]) + info = ctx.get(resolved, {}) + if not any(info.values()): + return "" + + parts = ["\n\n## Dependency Graph Context (from AST analysis)\n"] + + if info.get("imports"): + parts.append("### Imports available:") + for imp in info["imports"][:20]: + parts.append(f"- `{imp['module']}` ({imp['name']})") + + if info.get("callees"): + parts.append("\n### Functions/classes this file calls:") + for c in info["callees"][:15]: + parts.append(f"- `{c['name']}` ({c['type']}) โ€” `{c['node_id']}`") + + if info.get("callers"): + parts.append("\n### Called by (external callers):") + for c in info["callers"][:15]: + parts.append(f"- `{c['name']}` ({c['type']}) โ€” `{c['node_id']}`") + + if info.get("inheritance"): + parts.append("\n### Inheritance:") + for inh in info["inheritance"][:10]: + if inh["direction"] == "extends": + parts.append(f"- extends `{inh['base']}`") + else: + parts.append(f"- extended by `{inh.get('child', '?')}`") + + return "\n".join(parts) + "\n" if len(parts) > 1 else "" + except Exception: + return "" + + +def build_tdd_prompt( + batch_state: BatchExecutionState, + task: PlannedTask, + repo_path: Path, + merged_tasks: Optional[List[PlannedTask]] = None, + dependency_context: Optional[Dict[str, Any]] = None, +) -> str: + """Build the complete TDD prompt for a batch. + + Handles all task_type variations: + - implementation: full TDD preamble + - integration_test: test-only variant + - project_docs: docs-only variant + - project_requirements / main_entry: project file variant + + Args: + batch_state: Current batch execution state. + task: Primary PlannedTask for this batch. + repo_path: Path to the project repo. + merged_tasks: If file-merge mode, list of merged tasks. + dependency_context: Dependency context dict from design stages. + + Returns: + Complete prompt string ready for LLMClient.generate(). + """ + venv_python = get_dev_python(repo_path) or "python3" + import_convention = build_import_convention_snippet(repo_path=repo_path) + + # --- Project docs: simplest path --- + if is_project_docs_batch(task): + return TDD_DOCS_PREAMBLE.format( + batch_id=batch_state.batch_id, + code_prompt=batch_state.code_prompt, + ) + + # --- Project files (requirements, main_entry): no TDD loop --- + if is_project_file_batch(task): + return TDD_PROJECT_FILE_PREAMBLE.format( + batch_id=batch_state.batch_id, + task_type=task.task_type, + code_prompt=batch_state.code_prompt, + ) + + # --- Implementation / integration_test: full TDD --- + # For marker file paths like , don't try to find related tests + if task.file_path.startswith("<") and task.file_path.endswith(">"): + test_files = [] + else: + test_files = find_related_test_files(task.file_path, repo_path) + pytest_cmd = build_batch_pytest_cmd(test_files, venv_python) + pip_cmd = _build_pip_install_cmd(repo_path) + + # For testing batches, allow fixing genuine integration bugs + if task.task_type in ("integration_test", "final_test_docs"): + code_instructions = ( + "This is primarily a testing batch. Your main deliverable is tests.\n" + "However, if your tests reveal **genuine integration bugs** in the " + "production code, you SHOULD fix them. Examples of legitimate fixes:\n" + "- A route handler returns a placeholder string instead of calling the real handler\n" + "- CSS class names in a style module don't match those used in page templates\n" + "- A module defines a function but its consumer never imports/calls it\n" + "- Data format mismatch at a module boundary\n\n" + "Do NOT modify production code solely to make a poorly-written test pass.\n" + "The test should reflect correct behavior; the code should implement it.\n" + "Do NOT create main.py โ€” it will be created in a later task.\n\n" + "**Testing strategy for efficiency:**\n" + "- After the first full pytest run, use `--last-failed` on subsequent runs " + "to only re-run failing tests. This saves time.\n" + "- Only run a full pytest at the very end to confirm everything passes.\n" + ) + else: + code_instructions = batch_state.code_prompt + + # Format dependency context + dep_ctx_str = _format_dependency_context(dependency_context) if dependency_context else "" + + # Inject dep_graph context (AST-based dependency info) + dep_graph_ctx = _build_dep_graph_context_str(task.file_path, repo_path) + if dep_graph_ctx: + dep_ctx_str += dep_graph_ctx + + # For test-writing batches (wiring, final_test_docs), inject API summary + # so sub-agent doesn't guess function signatures + if task.task_type in ("final_test_docs", "wiring"): + try: + all_tasks = load_tasks_from_tasks_json(TASKS_FILE) + global_state_for_api = load_code_gen_state(STATE_FILE) + completed_files = list(set( + t.file_path for t in all_tasks + if t.task_id in global_state_for_api.completed_task_ids + and not (t.file_path.startswith("<") and t.file_path.endswith(">")) + )) + api_summary = _build_api_summary(repo_path, completed_files) + if api_summary: + dep_ctx_str += ( + "\n### Implemented API Signatures\n" + "Use these EXACT signatures when writing tests โ€” do NOT guess.\n" + f"```\n{api_summary}\n```\n" + ) + except Exception as exc: + logger.warning("Failed to build API summary: %s", exc) + + # For WIRING batches, inject subtree review results to avoid redundant testing + if task.task_type == "wiring": + try: + global_state_for_reviews = load_code_gen_state(STATE_FILE) + reviews = global_state_for_reviews.subtree_reviews + verified = [ + st for st, rev in reviews.items() + if rev.get("status") in ("ALL_COMPLETE", "FIXED") + ] + if verified: + code_instructions += ( + "\n\nThe following subtrees have been individually reviewed " + "and their internal + cross-subtree connections verified:\n" + + "\n".join(f"- {s}" for s in sorted(verified)) + + "\n\nFocus your tests on:\n" + "1. Global connections NOT covered by subtree reviews " + "(e.g., app initialization, route registration)\n" + "2. End-to-end flows that span 3+ subtrees\n" + "Do NOT re-test connections already verified above.\n" + "Keep tests focused and concise โ€” avoid redundancy.\n" + ) + except Exception as exc: + logger.warning("Failed to load subtree reviews for WIRING: %s", exc) + + return TDD_BATCH_PREAMBLE.format( + test_instructions=batch_state.test_prompt, + code_instructions=code_instructions, + pytest_cmd=pytest_cmd, + max_iterations=MAX_ITERATIONS, + batch_id=batch_state.batch_id, + pip_install_cmd=pip_cmd, + import_convention=import_convention, + dependency_context=dep_ctx_str, + file_path=task.file_path, + units=", ".join(task.units_key), + task_type=task.task_type, + ) + + +def build_resume_prompt( + original_prompt: str, + attempt_number: int, + failure_reason: str, + last_test_output: str, + *, + sub_agent_claimed_pass: bool = False, + agent_pytest_summary: Optional[str] = None, +) -> str: + """Build a resume prompt for auto-retry after failure. + + Args: + original_prompt: The full TDD prompt from the first attempt. + attempt_number: Which attempt this is (2 for auto-retry). + failure_reason: One-line reason from BATCH_RESULT: FAIL, + or the post-verify mismatch reason if the sub-agent + self-reported PASS but verification failed. + last_test_output: pytest output from post-verification. + sub_agent_claimed_pass: True if the previous attempt reported + ``BATCH_RESULT: PASS`` but post-verify rejected it; this + triggers an extra warning section in the prompt so the + sub-agent does not repeat the false-positive pattern. + agent_pytest_summary: The ``PYTEST_SUMMARY:`` line the + previous attempt produced (verbatim). Included for + comparison when ``sub_agent_claimed_pass`` is True. + + Returns: + Resume prompt string. + """ + # Smart truncation: keep the first 20 lines (pytest header, + # collected count, first failure header) and last 50 lines + # (FAILED/ERROR detail + summary). + last_test_output = truncate_test_output(last_test_output, head=20, tail=50) + + if sub_agent_claimed_pass: + agent_summary_repr = ( + f"`{agent_pytest_summary}`" + if agent_pytest_summary + else "(missing โ€” you did not include the PYTEST_SUMMARY line)" + ) + post_verify_section = ( + "\n\n## โš  False-positive PASS detected\n" + "Your previous attempt ended with `BATCH_RESULT: PASS` and the\n" + f"PYTEST_SUMMARY line {agent_summary_repr}, but the runner's\n" + "independent pytest re-run reported the failure shown below.\n" + "Possible causes you must investigate:\n" + "* You did not actually run pytest before declaring PASS.\n" + "* You ran pytest with `--no-cov` / `-k` / a different path that\n" + " excluded the failing tests.\n" + "* You modified or deleted tests instead of fixing production code.\n" + "* Your local changes were not committed before the runner verified.\n" + "**Do not report PASS again unless the PYTEST_SUMMARY line literally\n" + "shows zero failures and zero errors.**\n" + ) + else: + post_verify_section = "" + + return TDD_RESUME_PREAMBLE.format( + attempt_number=attempt_number, + failure_reason=failure_reason, + last_test_output=last_test_output, + post_verify_section=post_verify_section, + ) + "\n---\n\n" + original_prompt diff --git a/RPG-Kit/scripts/code_gen/context_collector.py b/RPG-Kit/scripts/code_gen/context_collector.py new file mode 100644 index 0000000..5489323 --- /dev/null +++ b/RPG-Kit/scripts/code_gen/context_collector.py @@ -0,0 +1,644 @@ +#!/usr/bin/env python3 +"""Context Collector for Code Generation. + +Collects dependency and design context from earlier pipeline stages +(repo_rpg, data_flow, base_classes, interfaces) and provides it to +code generation prompts so that the agent understands how the current +batch relates to the rest of the project. + +Also handles writing interface skeletons to actual source files so that +they are visible to the agent during implementation. +""" + +import json +import logging +from pathlib import Path +from typing import Dict, List, Any, Optional, TYPE_CHECKING + +from common.import_normalizer import ( + detect_project_import_prefix, + normalize_code, + ensure_future_annotations, + fix_missing_stdlib_imports, +) +from common.utils import get_project_background_context + +if TYPE_CHECKING: + from common.task_batch import PlannedTask + from common.execution_state import CodeGenState + +logger = logging.getLogger(__name__) + + +# ============================================================================ +# Interface Skeleton Writer +# ============================================================================ + +def write_interface_skeletons( + interfaces_path: Path, + repo_path: Path +) -> Dict[str, Any]: + """Write interface skeletons from interfaces.json to actual source files. + + For each file in interfaces.json that has a ``file_code`` entry, this + function writes it to disk **only if** the file does not already exist + or the existing content is shorter than the skeleton (meaning the + skeleton is more informative). + + Import prefixes are automatically normalized based on the project's + source layout (e.g. ``from vibeanim.`` โ†’ ``from src.vibeanim.``). + + Args: + interfaces_path: Path to interfaces.json + repo_path: Root path of the target repository + + Returns: + {"written": [file_pathsโ€ฆ], "skipped": [file_pathsโ€ฆ]} + """ + result: Dict[str, List[str]] = {"written": [], "skipped": []} + + if not interfaces_path.exists(): + logger.warning("interfaces.json not found at %s", interfaces_path) + return result + + try: + with open(interfaces_path, "r", encoding="utf-8") as f: + interfaces = json.load(f) + except Exception as e: + logger.error("Failed to read interfaces.json: %s", e) + return result + + subtrees = interfaces.get("subtrees", {}) + + # Detect import prefix from file paths in interfaces.json. + # If file paths start with "src/", imports should use "src." prefix. + import_prefix = detect_project_import_prefix( + interfaces_subtrees=subtrees, + ) + + for _subtree_name, subtree_data in subtrees.items(): + file_interfaces = subtree_data.get("interfaces", {}) + for file_path, file_info in file_interfaces.items(): + file_code = file_info.get("file_code", "") + if not file_code or not file_code.strip(): + continue + + # Normalize import prefixes before writing + if import_prefix: + file_code = normalize_code(file_code, import_prefix) + + # Add from __future__ import annotations to prevent forward ref errors + file_code = ensure_future_annotations(file_code) + + # Fix missing stdlib imports (dataclass, Callable, etc.) + file_code = fix_missing_stdlib_imports(file_code) + + full_path = repo_path / file_path + if full_path.exists(): + try: + existing = full_path.read_text(encoding="utf-8") + except Exception: + existing = "" + # Skip if the file already has more content than the skeleton + if len(existing.strip()) > len(file_code.strip()): + result["skipped"].append(file_path) + continue + + # Write skeleton + try: + full_path.parent.mkdir(parents=True, exist_ok=True) + full_path.write_text(file_code, encoding="utf-8") + result["written"].append(file_path) + except Exception as e: + logger.error("Failed to write skeleton for %s: %s", file_path, e) + + logger.info( + "Interface skeletons: wrote %d files, skipped %d files", + len(result["written"]), + len(result["skipped"]), + ) + return result + + +# ============================================================================ +# Base Classes & Data Structures +# ============================================================================ + +def collect_base_classes_context( + base_classes_path: Path, + current_subtree: str +) -> Dict[str, Any]: + """Collect base-class code (all) and data-structure code (current subtree only). + + Args: + base_classes_path: Path to base_classes.json + current_subtree: Name of the current subtree/functional area + + Returns: + { + "base_classes": [{"file_path": โ€ฆ, "code": โ€ฆ, "subclasses": โ€ฆ}, โ€ฆ], + "data_structures": [{"code": โ€ฆ, "subtree": โ€ฆ, "data_flow_types": โ€ฆ}, โ€ฆ] + } + """ + result: Dict[str, list] = {"base_classes": [], "data_structures": []} + + if not base_classes_path.exists(): + return result + + try: + with open(base_classes_path, "r", encoding="utf-8") as f: + data = json.load(f) + except Exception: + return result + + # All base classes โ€” no filtering + for bc in data.get("base_classes", []): + result["base_classes"].append({ + "file_path": bc.get("file_path", ""), + "code": bc.get("code", ""), + "subclasses": bc.get("subclasses", {}), + }) + + # Data structures โ€” only those matching current_subtree + for ds in data.get("data_structures", []): + if ds.get("subtree", "") == current_subtree: + result["data_structures"].append({ + "code": ds.get("code", ""), + "subtree": ds.get("subtree", ""), + "data_flow_types": ds.get("data_flow_types", []), + "file_path": ds.get("file_path", ""), + }) + + return result + + +# ============================================================================ +# Data Flow Edges +# ============================================================================ + +def collect_data_flow_edges( + data_flow_path: Path, + current_subtree: str +) -> List[Dict[str, str]]: + """Return data-flow edges involving *current_subtree* (as source or target). + + Args: + data_flow_path: Path to data_flow.json + current_subtree: Name of the current subtree + + Returns: + List of edge dicts (original JSON shape, unmodified). + """ + if not data_flow_path.exists(): + return [] + + try: + with open(data_flow_path, "r", encoding="utf-8") as f: + data = json.load(f) + except Exception: + return [] + + edges = data.get("data_flow", []) + return [ + e for e in edges + if e.get("source") == current_subtree + or e.get("target") == current_subtree + ] + + +def collect_all_data_flow_edges( + data_flow_path: Path, +) -> List[Dict[str, str]]: + """Return ALL data-flow edges (no subtree filter). + + Used by wiring tasks that need a global view of cross-module connections. + + Args: + data_flow_path: Path to data_flow.json + + Returns: + List of all edge dicts. + """ + if not data_flow_path.exists(): + return [] + + try: + with open(data_flow_path, "r", encoding="utf-8") as f: + data = json.load(f) + except Exception: + return [] + + return data.get("data_flow", []) + + +# ============================================================================ +# Dependency Files (from enhanced_data_flow in interfaces.json) +# ============================================================================ + +def collect_dependency_files( + interfaces_path: Path, + file_path: str +) -> Dict[str, Any]: + """Identify files that the current file depends on, using the ``enhanced_data_flow`` section of interfaces.json. + + Args: + interfaces_path: Path to interfaces.json + file_path: The target file path for the current batch + + Returns: + { + "inherits_from": [{"parent": โ€ฆ, "parent_file": โ€ฆ}, โ€ฆ], + "invokes": [{"callee": โ€ฆ, "callee_file": โ€ฆ}, โ€ฆ], + "references": [{"type": โ€ฆ, "type_file": โ€ฆ}, โ€ฆ], + "dependent_files": [sorted unique file paths] + } + """ + result: Dict[str, Any] = { + "inherits_from": [], + "invokes": [], + "references": [], + "dependent_files": [], + } + + if not interfaces_path.exists(): + return result + + try: + with open(interfaces_path, "r", encoding="utf-8") as f: + interfaces = json.load(f) + except Exception: + return result + + edf = interfaces.get("enhanced_data_flow", {}) + dep_files: set = set() + + # Inheritance edges: source_file == file_path โ†’ depends on parent_file + for edge in edf.get("inheritance_edges", []): + if edge.get("source_file") == file_path and edge.get("parent_file"): + result["inherits_from"].append({ + "parent": edge.get("parent", ""), + "parent_file": edge["parent_file"], + }) + dep_files.add(edge["parent_file"]) + + # Invocation edges: caller_file == file_path โ†’ depends on callee_file + for edge in edf.get("invocation_edges", []): + if edge.get("caller_file") == file_path and edge.get("callee_file"): + result["invokes"].append({ + "callee": edge.get("callee", ""), + "callee_file": edge["callee_file"], + }) + dep_files.add(edge["callee_file"]) + + # Reference edges: source_file == file_path โ†’ depends on type_file + for edge in edf.get("reference_edges", []): + if edge.get("source_file") == file_path and edge.get("type_file"): + result["references"].append({ + "type": edge.get("referenced_type", ""), + "type_file": edge["type_file"], + }) + dep_files.add(edge["type_file"]) + + # Remove self-references + dep_files.discard(file_path) + result["dependent_files"] = sorted(dep_files) + + return result + + +# ============================================================================ +# Completed Modules +# ============================================================================ + +def collect_completed_context( + completed_task_ids: List[str], + tasks_path: Path +) -> Dict[str, List[str]]: + """Build a mapping of already-completed files โ†’ unit lists. + + Args: + completed_task_ids: List of completed task IDs from CodeGenState + tasks_path: Path to tasks.json + + Returns: + {"src/core/parser.py": ["class Parser", "function tokenize"], โ€ฆ} + """ + if not tasks_path.exists() or not completed_task_ids: + return {} + + try: + with open(tasks_path, "r", encoding="utf-8") as f: + data = json.load(f) + except Exception: + return {} + + completed_set = set(completed_task_ids) + file_units: Dict[str, List[str]] = {} + + for _subtree, files_dict in data.get("planned_tasks_dict", {}).items(): + for _file_path, batches_list in files_dict.items(): + for batch_data in batches_list: + if batch_data.get("task_id") in completed_set: + fp = batch_data.get("file_path", _file_path) + units = batch_data.get("units_key", []) + if fp not in file_units: + file_units[fp] = [] + file_units[fp].extend(units) + + return file_units + + +# ============================================================================ +# ORM Model Registry (cross-subtree relationship awareness) +# ============================================================================ + +def scan_orm_model_registry( + interfaces_path: Path, + repo_path: Optional[Path] = None, +) -> Dict[str, Any]: + """Scan interfaces.json for ORM model classes and their relationship() targets to build a model registry with cross-file dependencies. + + This solves the SQLAlchemy mapper configuration problem: when any test + instantiates a model, SQLAlchemy eagerly configures ALL mappers in the + registry. If model A has ``relationship('B')``, class B must be + imported (even if unused in the test) before mapper configuration runs. + + Returns: + { + "models": {"User": "src/.../models.py", ...}, + "relationships": [ + {"source_file": ..., "source_class": ..., + "target_class": ..., "target_file": ..., "field": ...}, + ], + "model_files": ["src/.../models.py", ...] # sorted, deduped + } + Returns empty dict if no ORM models are detected. + """ + import ast as _ast + + models: Dict[str, str] = {} # class_name -> file_path + relationships: List[Dict] = [] + model_files_set: set = set() + seen_rels: set = set() # dedup key: (source_class, field, target_class) + + # --- Strategy 1: scan interfaces.json file_code blocks --- + if interfaces_path and interfaces_path.exists(): + try: + with open(interfaces_path, "r", encoding="utf-8") as f: + data = json.load(f) + except Exception: + data = {} + + for _subtree, subtree_data in data.get("subtrees", {}).items(): + for file_path, file_info in subtree_data.get("interfaces", {}).items(): + code = file_info.get("file_code", "") + if not code: + continue + _scan_code_for_models( + _ast, code, file_path, models, relationships, + model_files_set, seen_rels, + ) + + # --- Strategy 2: if repo_path given, scan actual **/models*.py files --- + # Catches models added during codegen (not in skeleton) and handles + # projects where model files aren't named models.py. + if repo_path and repo_path.is_dir(): + src_dir = repo_path / "src" + search_dir = src_dir if src_dir.is_dir() else repo_path + for py_file in search_dir.rglob("model*.py"): + rel_path = str(py_file.relative_to(repo_path)) + try: + code = py_file.read_text(encoding="utf-8", errors="replace") + except Exception: + continue + _scan_code_for_models( + _ast, code, rel_path, models, relationships, + model_files_set, seen_rels, + ) + + if not models: + return {} + + # Resolve target_file for relationships + for rel in relationships: + if not rel.get("target_file"): + rel["target_file"] = models.get(rel["target_class"]) + + return { + "models": models, + "relationships": relationships, + "model_files": sorted(model_files_set), + } + + +def _scan_code_for_models( + _ast, code: str, file_path: str, + models: Dict[str, str], + relationships: List[Dict], + model_files_set: set, + seen_rels: Optional[set] = None, +) -> None: + """Parse a single file's code for ORM model classes and relationships.""" + try: + tree = _ast.parse(code) + except SyntaxError: + return + + for node in _ast.iter_child_nodes(tree): + if not isinstance(node, _ast.ClassDef): + continue + + # --- Detect ORM model classes --- + # Heuristic 1: inherits from *Model / BaseModel / db.Model + base_names = [] + for b in node.bases: + if isinstance(b, _ast.Name): + base_names.append(b.id) + elif isinstance(b, _ast.Attribute): + base_names.append(b.attr) + is_model = any( + n == "BaseModel" or n == "Model" + for n in base_names + ) + + # Heuristic 2: has __tablename__ attribute (strongest ORM signal) + has_tablename = False + for item in node.body: + if isinstance(item, _ast.Assign): + for target in item.targets: + if isinstance(target, _ast.Name) and target.id == "__tablename__": + has_tablename = True + break + + # Heuristic 3: inherits from a known ORM model already in the registry + inherits_known_model = any(n in models for n in base_names) + + if not (is_model or has_tablename or inherits_known_model): + continue + + class_name = node.name + models[class_name] = file_path + model_files_set.add(file_path) + + # Scan class body for db.relationship() calls + for item in _ast.walk(node): + if not isinstance(item, _ast.Call): + continue + func = item.func + # Match: db.relationship('TargetClass', ...) or relationship('...') + is_rel = False + if isinstance(func, _ast.Attribute) and func.attr == "relationship": + is_rel = True + elif isinstance(func, _ast.Name) and func.id == "relationship": + is_rel = True + if not is_rel: + continue + # Extract first string argument = target class name + if item.args and isinstance(item.args[0], _ast.Constant) and isinstance(item.args[0].value, str): + target_class = item.args[0].value + # Find the field name (the assignment target) + field_name = _find_assignment_target(_ast, node, item) + # Dedup: skip if already seen from another strategy + rel_key = (class_name, field_name or "?", target_class) + if seen_rels is not None: + if rel_key in seen_rels: + continue + seen_rels.add(rel_key) + relationships.append({ + "source_file": file_path, + "source_class": class_name, + "target_class": target_class, + "target_file": None, # resolved later + "field": field_name or "?", + }) + + +def _find_assignment_target(_ast, class_node, call_node) -> Optional[str]: + """Find the attribute name that a call is assigned to within a class body.""" + for item in class_node.body: + if isinstance(item, _ast.Assign): + if item.value is call_node: + for t in item.targets: + if isinstance(t, _ast.Name): + return t.id + return None + + +def collect_reverse_dependencies( + interfaces_path: Path, + file_path: str, +) -> List[Dict[str, str]]: + """Collect reverse dependencies: who depends on the current file. + + Returns list of edges where current file is the *target* (callee/parent/type). + This answers: "which other files will break if I change this file?" + + Returns: + [{"dependent_file": ..., "dependent_unit": ..., "edge_type": ..., "via": ...}, ...] + """ + result: List[Dict[str, str]] = [] + if not interfaces_path or not interfaces_path.exists(): + return result + + try: + with open(interfaces_path, "r", encoding="utf-8") as f: + interfaces = json.load(f) + except Exception: + return result + + edf = interfaces.get("enhanced_data_flow", {}) + + # Files that inherit from something in this file + for edge in edf.get("inheritance_edges", []): + if edge.get("parent_file") == file_path: + result.append({ + "dependent_file": edge.get("source_file", ""), + "dependent_unit": edge.get("child", ""), + "edge_type": "inherits_from", + "via": edge.get("parent", ""), + }) + + # Files that call something in this file + for edge in edf.get("invocation_edges", []): + if edge.get("callee_file") == file_path: + result.append({ + "dependent_file": edge.get("caller_file", ""), + "dependent_unit": edge.get("caller", ""), + "edge_type": "calls", + "via": edge.get("callee", ""), + }) + + # Files that reference types from this file + for edge in edf.get("reference_edges", []): + if edge.get("type_file") == file_path: + result.append({ + "dependent_file": edge.get("source_file", ""), + "dependent_unit": edge.get("unit", ""), + "edge_type": "references", + "via": edge.get("referenced_type", ""), + }) + + return result + + +# ============================================================================ +# Main Entry Point +# ============================================================================ + +def build_dependency_context( + batch: "PlannedTask", + interfaces_path: Path, + base_classes_path: Path, + data_flow_path: Path, + tasks_path: Path, + completed_task_ids: List[str], + feature_spec_path: Optional[Path] = None, +) -> Dict[str, Any]: + """Collect all dependency context for a task. + + This is the single entry point used by ``run_batch.py``'s + batch-prep flow (``_prepare_batch_context``). + + Args: + batch: The current PlannedTask + interfaces_path: Path to interfaces.json + base_classes_path: Path to base_classes.json + data_flow_path: Path to data_flow.json + tasks_path: Path to tasks.json + completed_task_ids: List of completed task IDs + feature_spec_path: Path to feature_spec.json (for project background context) + + Returns: + A dict containing all context sections, ready for prompt injection. + """ + # Load project background/technology context from feature_spec.json + project_background = "" + if feature_spec_path and feature_spec_path.exists(): + try: + project_background = get_project_background_context(feature_spec_path) + except Exception as _exc: + logger.warning("Failed to load project background context: %s", _exc) + + return { + "project_background": project_background, + "base_classes": collect_base_classes_context( + base_classes_path, batch.subtree + ), + "data_flow_edges": collect_data_flow_edges( + data_flow_path, batch.subtree + ) if batch.task_type != "wiring" else collect_all_data_flow_edges( + data_flow_path + ), + "dependencies": collect_dependency_files( + interfaces_path, batch.file_path + ), + "completed": collect_completed_context( + completed_task_ids, tasks_path + ), + "current_subtree": batch.subtree, + "current_file": batch.file_path, + "model_registry": scan_orm_model_registry(interfaces_path), + "reverse_deps": collect_reverse_dependencies( + interfaces_path, batch.file_path + ), + } diff --git a/RPG-Kit/scripts/code_gen/final_validation.py b/RPG-Kit/scripts/code_gen/final_validation.py new file mode 100644 index 0000000..ffc889f --- /dev/null +++ b/RPG-Kit/scripts/code_gen/final_validation.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python3 +"""Final test stage of the codegen pipeline. + +This module hosts :func:`final_test`, extracted from +``scripts/run_batch.py`` Module 6 ("Final Test"). + +After all per-task batches complete, the orchestrator runs a single +full-suite pytest pass against the merged code on ``main``. When +pytest passes, we also run the smoke test (import sweep + entry-point +check + stub detection); if the smoke test reports actionable findings, +a repair sub-agent is dispatched and the full pytest is re-run. + +The stage's outcome is persisted to ``.rpgkit/logs/codegen_final_test.json`` +(and ``codegen_smoke_test.json``) via +:mod:`scripts.code_gen.stage_io` so that the global-review stage can +consume the results without re-running pytest. + +Internal to the codegen package; no external API contract. +""" + +from __future__ import annotations + +import logging +from pathlib import Path +from typing import Any, Dict, Optional + +from common.git_utils import GitRunner +from common.paths import CODE_GEN_STATE_FILE as STATE_FILE, REPO_DIR +from code_gen.git_ops import ensure_on_main +from code_gen.stage_io import save_stage_result +from code_gen.sub_agent import dispatch_sub_agent +from code_gen.test_runner import ( + ensure_deps_installed, + get_dev_python, + run_pytest, +) + +logger = logging.getLogger(__name__) + + +from code_gen._constants import ( # noqa: E402 + DEFAULT_PYTEST_OVERALL_TIMEOUT, + DEFAULT_TEST_TIMEOUT, +) + + +def final_test( + repo_path: Optional[Path] = None, + state_path: Path = STATE_FILE, +) -> Dict[str, Any]: + """Run the full test suite against the completed repo. + + Args: + repo_path: Project repo path. + state_path: Path to state file. + + Returns: + Result dict with test statistics. + """ + repo_path = repo_path or REPO_DIR + git = GitRunner(str(repo_path)) + + logger.info("โ”โ”โ” Final Test: full repo validation โ”โ”โ”") + + try: + ensure_on_main(git) + except RuntimeError as exc: + return {"success": False, "error": str(exc)} + + # Ensure all deps + try: + ensure_deps_installed(repo_path) + except Exception as exc: + logger.warning("Dependency install issue: %s", exc) + + # Run full test suite + result = run_pytest( + repo_path, + timeout=DEFAULT_PYTEST_OVERALL_TIMEOUT, + extra_args=[ + "-v", "--tb=short", + f"--timeout={DEFAULT_TEST_TIMEOUT}", "--timeout-method=thread", + ], + ) + + result_dict = { + "success": result.success, + "type": "final_test", + "passed": result.passed, + "failed": result.failed, + "errors": result.errors, + "skipped": result.skipped, + "duration": result.duration, + "output": result.output[:5000] if not result.success else "", + "next_action": ( + "All tests passed! The repository is ready." + if result.success else + f"Final test failed ({result.failed} failures, {result.errors} errors). " + f"Review the output above and fix remaining issues." + ), + } + + # After pytest passes, run smoke test and attempt repair if issues found + if result.success: + try: + # Lazy import: smoke_test pulls in the dep_graph stack, so only + # load it on the success path where we actually need it. + from smoke_test import run_smoke_test + from code_gen.batch_prompts import build_batch_pytest_cmd + + smoke_result = run_smoke_test() + smoke_dict = smoke_result.to_dict() + result_dict["smoke_test"] = smoke_dict + + # Collect actionable findings (errors) + actionable = [f for f in smoke_result.findings if f.severity == "error"] + + if actionable: + findings_desc = "\n".join( + f"- [{f.severity}] {f.message}" for f in actionable + ) + # Build pytest command for the repair agent + venv_python = get_dev_python(repo_path) or "python3" + repair_pytest_cmd = build_batch_pytest_cmd([], venv_python) + repair_prompt = ( + "The smoke test detected the following issues after all " + "unit tests passed. Fix each issue in the production code, " + "then run the test suite to verify nothing is broken.\n\n" + f"Findings:\n{findings_desc}\n\n" + "Common fixes:\n" + "- STUB (pass only) โ†’ implement the function body\n" + "- PLACEHOLDER return โ†’ replace with real logic\n" + "- Import error โ†’ add missing import\n" + "- Startup crash โ†’ fix initialization code\n\n" + "Do NOT create new test files. Only fix production code.\n" + "After fixing, run this command to verify:\n" + f"```\n{repair_pytest_cmd}\n```\n\n" + "When done, commit your changes:\n" + "```\ngit add -A && git commit -m " + '"fix: repair smoke test findings"\n```\n' + "Then output: BATCH_RESULT: PASS" + ) + logger.info( + "Smoke test found %d actionable issues, dispatching " + "repair agent", len(actionable) + ) + response, error = dispatch_sub_agent( + repair_prompt, repo_path, timeout=1800, + purpose="smoke_repair", + ) + if response: + # Verify repair didn't break existing tests + recheck = run_pytest( + repo_path, + timeout=DEFAULT_PYTEST_OVERALL_TIMEOUT, + extra_args=[ + "-v", "--tb=short", + f"--timeout={DEFAULT_TEST_TIMEOUT}", "--timeout-method=thread", + ], + ) + if not recheck.success: + logger.warning( + "Repair agent broke %d tests, results may be degraded", + recheck.failed + recheck.errors, + ) + # Re-run smoke test to verify repairs + smoke_result_2 = run_smoke_test() + result_dict["smoke_test"] = smoke_result_2.to_dict() + result_dict["smoke_repair_attempted"] = True + result_dict["post_repair_tests_pass"] = recheck.success + remaining = [ + f for f in smoke_result_2.findings + if f.severity == "error" + ] + logger.info( + "Post-repair: smoke=%d issues remaining (was %d), " + "pytest=%s", + len(remaining), len(actionable), + "PASS" if recheck.success else "FAIL", + ) + except ImportError: + logger.debug("smoke_test module not available, skipping") + except Exception as exc: + logger.warning("Smoke test / repair failed: %s", exc) + + # Save per-stage results for global_review context + save_stage_result("final_test", { + "success": result.success, + "passed": result.passed, + "failed": result.failed, + "errors": result.errors, + "output_tail": "\n".join(result.output.splitlines()[-40:]) if not result.success else "", + }) + smoke_data = result_dict.get("smoke_test") + if isinstance(smoke_data, dict): + smoke_save: Dict[str, Any] = { + "findings": smoke_data.get("findings", []), + "error_count": smoke_data.get("error_count", 0), + } + if result_dict.get("smoke_repair_attempted"): + smoke_save["repair_attempted"] = True + remaining = [ + f for f in smoke_data.get("findings", []) + if f.get("severity") == "error" + ] + smoke_save["repair_remaining"] = len(remaining) + save_stage_result("smoke_test", smoke_save) + + return result_dict diff --git a/RPG-Kit/scripts/code_gen/git_ops.py b/RPG-Kit/scripts/code_gen/git_ops.py new file mode 100644 index 0000000..181556d --- /dev/null +++ b/RPG-Kit/scripts/code_gen/git_ops.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 +"""Git branch operations for the codegen batch lifecycle. + +This module hosts the four helpers extracted from +``scripts/run_batch.py`` Module 2 ("Git Operations"): + +* :func:`ensure_on_main` โ€” guarantee we're on ``main``, autosaving WIP changes. +* :func:`setup_batch_branch` โ€” create / reuse a ``batch/`` branch from main. +* :func:`merge_batch_branch` โ€” merge a batch branch into main (``--no-ff``) and delete it. +* :func:`abandon_batch_branch` โ€” leave a failed batch branch in place for inspection. + +All four are internal helpers used only by ``scripts.run_batch``; +they have **no** stable public API. External callers should not import +from here. +""" + +from __future__ import annotations + +import logging +from pathlib import Path +from typing import List, Optional, Tuple + +from common.git_utils import GitRunner + +logger = logging.getLogger(__name__) + + +def ensure_on_main(git: GitRunner) -> None: + """Ensure we are on the main branch, switching if necessary. + + If there are uncommitted changes on a non-main branch, they are + committed with a WIP message before switching. + + Raises: + RuntimeError: If switching to main fails. + """ + current = git.get_current_branch() + if current == git.main_branch: + return + + logger.info("Currently on branch '%s', switching to '%s'", current, git.main_branch) + if git.has_uncommitted_changes(): + logger.warning("Committing uncommitted changes on '%s' before switching", current) + git.stage_and_commit(f"WIP: auto-save before switching to {git.main_branch}") + + if not git.switch_branch(git.main_branch): + raise RuntimeError( + f"Failed to switch to {git.main_branch}. " + f"Current branch: {current}. Manual intervention needed." + ) + + +def setup_batch_branch( + git: GitRunner, + batch_id: str, + repo_path: Path, + reuse_existing: bool = False, +) -> Tuple[bool, str, str]: + """Create (or reuse) a batch branch from latest main HEAD. + + Args: + git: GitRunner instance. + batch_id: Batch identifier. + repo_path: Repo root path. + reuse_existing: If True and branch exists, switch to it instead of + deleting and recreating. + + Returns: + (success, branch_name, initial_commit) + """ + ensure_on_main(git) + + safe_id = batch_id.replace("/", "_").replace("\\", "_")[:50] + branch_name = f"batch/{safe_id}" + + if git.branch_exists(branch_name): + if reuse_existing: + logger.info("Reusing existing branch '%s'", branch_name) + if not git.switch_branch(branch_name): + return False, branch_name, "" + initial_commit = git.get_head_commit() + return True, branch_name, initial_commit + else: + logger.info("Deleting stale branch '%s' (will recreate from main)", branch_name) + git.delete_branch(branch_name, force=True) + + initial_commit = git.get_head_commit() + success = git.create_branch(branch_name) + if not success: + logger.error("Failed to create branch '%s'", branch_name) + return success, branch_name, initial_commit + + +def merge_batch_branch( + git: GitRunner, + branch_name: str, + batch_id: str, + file_path: str = "", + units: Optional[List[str]] = None, +) -> Tuple[bool, Optional[str]]: + """Merge a batch branch into main and delete it. + + 1. Commit any remaining changes on the branch. + 2. Build a merge message with batch_completed marker in body. + 3. Merge into main (--no-ff) with custom message. + 4. Delete the branch. + + The merge message body contains ``batch_completed: `` so that + git-based state recovery (``git log --grep``) can detect completed batches. + + Args: + git: GitRunner instance. + branch_name: Branch to merge. + batch_id: Batch ID for recovery marker. + file_path: Target file path for readable message. + units: List of unit names for readable message. + + Returns: + ``(success, error_description)`` + + error_description values: + - None when the merge succeeded. + - ``"branch_missing"`` when ``branch_name`` does not exist. Callers + should treat this as a skip (sub-agent setup issue), NOT as a + retryable failure. + - Any other string is propagated from ``GitRunner.merge_branch`` + (e.g. ``"merge_conflict"``, ``"merge_failed"``). + """ + # Branch went missing โ†’ caller must skip, not consume a retry slot. + # Happens when the sub-agent committed straight to main or deleted + # the branch. Stage any local changes first so they aren't lost. + if not git.branch_exists(branch_name): + logger.warning( + "Cannot merge: branch '%s' does not exist (sub-agent did not " + "use the batch branch). Treating as skip.", + branch_name, + ) + if git.has_uncommitted_changes(): + git.stage_and_commit( + f"WIP: salvage uncommitted changes after missing branch '{branch_name}'" + ) + return False, "branch_missing" + + # Commit any leftover changes + if git.has_uncommitted_changes(): + git.stage_and_commit(f"batch: final changes for {batch_id}") + + # Build merge message: readable subject + marker in body + units_str = ", ".join(units) if units else "" + is_marker = file_path.startswith("<") and file_path.endswith(">") + if is_marker: + scope = file_path.strip("<>").lower().replace("_", "-") + elif file_path: + scope = file_path.split("/")[-1].replace(".py", "") + else: + scope = "" + + if scope and units_str: + subject = f"merge({scope}): {units_str}" + elif scope: + subject = f"merge: {scope}" + else: + subject = f"merge: {branch_name}" + + body_lines = [f"batch_completed: {batch_id}"] + if file_path: + body_lines.append(f"Target: {file_path}") + if units_str: + body_lines.append(f"Units: {units_str}") + merge_msg = subject + "\n\n" + "\n".join(body_lines) + + merge_ok, error = git.merge_branch(branch_name, message=merge_msg) + if merge_ok: + git.delete_branch(branch_name) + logger.info("Merged branch '%s' into main and deleted it", branch_name) + return True, None + else: + logger.error("Failed to merge branch '%s': %s", branch_name, error) + return False, error + + +def abandon_batch_branch(git: GitRunner, branch_name: str) -> None: + """Switch back to main, leaving the batch branch intact for inspection.""" + if git.has_uncommitted_changes(): + git.stage_and_commit("WIP: batch failed, preserving state") + + logger.info("Abandoning branch '%s', switching to main", branch_name) + git.switch_branch(git.main_branch) diff --git a/RPG-Kit/scripts/code_gen/global_review.py b/RPG-Kit/scripts/code_gen/global_review.py new file mode 100644 index 0000000..2939849 --- /dev/null +++ b/RPG-Kit/scripts/code_gen/global_review.py @@ -0,0 +1,1370 @@ +#!/usr/bin/env python3 +"""Global feature review โ€” final QA pass of the codegen pipeline. + +This module hosts the helpers extracted from ``scripts/run_batch.py`` +Module 6b ("Global Review"): + +* The :data:`GLOBAL_REVIEW_PROMPT` template (โ‰ˆ470 lines of sub-agent prompt). +* :class:`_HeartbeatLogger` โ€” periodic-progress log for long-running calls. +* :func:`global_review` โ€” iterative review-and-repair loop. +* A dozen private helpers (``_load_feature_spec`` / ``_build_review_prompt`` / + ``_extract_review_checklist`` / ``_parse_review_result`` / etc.). + +The orchestrator (``scripts.run_batch``) calls :func:`global_review` +from its ``--global-review`` CLI mode. No external (non-``run_batch``) +caller imports from this module. +""" + +from __future__ import annotations + +import json +import logging +import os +import re +import shutil +import threading +import time +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +from common.paths import ( + FEATURE_SPEC_FILE, + LOGS_DIR as _LOGS_DIR, + REPO_DIR, + TOOLS_DIR, +) +from code_gen.batch_prompts import build_batch_pytest_cmd +from code_gen.stage_io import ( + save_stage_result as _save_stage_result, + load_stage_result as _load_stage_result, +) +from code_gen.sub_agent import dispatch_sub_agent +from code_gen.test_runner import ( + ensure_deps_installed, + get_dev_python, + run_pytest, +) + +logger = logging.getLogger(__name__) + + +# ============================================================================ +# Shared timeout constants +# ============================================================================ + +from code_gen._constants import ( # noqa: E402 + DEFAULT_PYTEST_OVERALL_TIMEOUT, + DEFAULT_TEST_TIMEOUT, +) + + +# ============================================================================ +# Global Review prompt template +# ============================================================================ + +GLOBAL_REVIEW_PROMPT = """\ +# Global Review: Full Feature Verification & Quality Check + +You are a QA engineer performing a comprehensive review of a completed Python +project. Your job is to: +1. Verify every planned feature works correctly by **actually running** the project +2. **Simulate real user interactions** (click buttons, fill forms, navigate pages) +3. Fix any bugs or missing functionality you find +4. **Improve visual quality** if the UI looks rough or unprofessional + +## Your Workflow + +### A. Read & Understand +1. Read main.py and understand how to start the project +2. Read the key source files to understand the architecture +3. Read the feature requirements listed below carefully โ€” these are **the plan**; + your job is to verify every planned feature is actually implemented and working + +### B. Start the Project +4. Set up the environment: + - Use the virtual environment at .venv_dev/ if it exists + - Set environment variables as needed (e.g., DATABASE_URL=sqlite:///test_review.db) + - Initialize the database if applicable +5. Start the project in the background +6. Verify it's running: + - Read the startup output to find the actual port (e.g., "Running on http://127.0.0.1:5000") + - Use that port for ALL subsequent commands (do NOT hardcode a port) + - Bind to 127.0.0.1 only (never 0.0.0.0) + - If the default port is occupied, use a different one and note which port you chose + - Verify with: `curl -s -o /dev/null -w '%{http_code}' http://127.0.0.1:/` + +### C. Create Test Data +7. Create realistic test data through the project's own interfaces: + - For web apps: use the app's UI or CLI commands to create users, sample + content, and any data needed for testing (check main.py for seed commands) + - For CLI tools: prepare input files or data fixtures + - For GUI apps: use the GUI tools to create content interactively + +### D. Verify Every Feature (Functional Completeness) +8. For EACH functional requirement listed below: + a. Determine how to test it (HTTP request, CLI command, GUI interaction, etc.) + b. **Actually execute** the test โ€” don't just read the code, run it + c. Check the response/output matches expectations + d. Record the result as PASS or FAIL with details + +### E. Visual Verification & Interactive Testing + +**This step is NOT optional for web and GUI projects.** You MUST use the +provided tools to visually verify the project. Verifying only via curl/API +is insufficient โ€” real users interact through the browser or GUI. + +#### Step E1: Discover all pages/screens + +For **web apps**: +- Read the source code to find all registered routes (e.g., grep for + `@app.route`, `@bp.route`, `url_map`, router definitions) +- Start from the homepage: `inspect` it to see all links and navigation +- Follow every link found to discover all reachable pages +- Also check routes from source code that may not be linked from nav + +For **GUI apps**: +- Launch the app and take an initial screenshot +- Identify all visible buttons, menus, tabs, and panels + +For **CLI tools**: +- Run the help command to list all subcommands and options + +#### Step E2: Inspect every page/screen + +For **web apps**, use `inspect` on EVERY distinct page: +```bash +# Start from homepage โ€” discover links, forms, navigation +python $BROWSER_TOOL inspect http://localhost:/ +# Then inspect every link found, and every route from source code +# Read the saved HTML files to understand full page content +``` + +For **GUI apps**, screenshot after each action: +```bash +python $GUI_TOOL start-display +python $GUI_TOOL launch "python main.py" --wait 3 +python $GUI_TOOL status # IMPORTANT: verify a window actually appeared +python $GUI_TOOL screenshot +``` + +**After `launch`, check the output carefully:** +- If it says "Top-level windows: N" with window titles โ†’ the GUI opened correctly +- If it says "[WARNING] No visible window detected" โ†’ the app did NOT create + a GUI window. It may only print text to console. This is a **bug in the app** + that you must fix before continuing. The GUI code needs to actually create + a window (e.g., `tkinter.Tk()`, `QApplication`, `pygame.display.set_mode`). + +#### Step E3: Simulate real user interactions + +Don't just view pages โ€” **interact** with them like a real user: + +For **web apps**: +- Click every link and button on each page +- Fill and submit every form +- After each interaction, read the saved HTML to verify the result +- Check: did the action succeed? Did it redirect correctly? Does the + response page show the expected content? +```bash +# Example: interact with a form (login, search, create, etc.) +python $BROWSER_TOOL run-script http://localhost:/some-page --script ' +page.click("a:has-text(\"Some Link\")") +page.wait_for_load_state("networkidle") +' +# Then read the [After] HTML file to verify the result +``` + +For **GUI apps**: +- Click every button, try every tool, fill every input +- After each action, take a screenshot and verify the result +- **Keep the app running** โ€” do NOT close and relaunch between tests. + Use one launch session for all interaction testing. +- For multi-step workflows (e.g., select tool โ†’ click canvas โ†’ set params): + use `run-script` to chain actions reliably with proper timing. + +**Simple interactions** (one action at a time): +```bash +python $GUI_TOOL click 200 150 +python $GUI_TOOL screenshot +python $GUI_TOOL type "test input" +python $GUI_TOOL key "Return" +python $GUI_TOOL screenshot +``` + +**Complex multi-step interactions** โ€” write to a file first, then run via +`--file`. This guarantees the script is reusable in future review iterations: +```bash +# Write the test script to the reusable scripts directory +mkdir -p .rpgkit/tmp/gui_test_scripts +cat > .rpgkit/tmp/gui_test_scripts/01_create_shape.py << 'PYEOF' +import time +# Verify: selecting a tool and using it on the canvas +gui.click(120, 45) # open dropdown menu +time.sleep(0.3) # wait for menu to appear +gui.click(120, 120) # select an option +time.sleep(0.3) +gui.click(400, 300) # click on canvas/main area +time.sleep(0.5) +gui.screenshot() # verify result +PYEOF +# Run it +python $GUI_TOOL run-script --file .rpgkit/tmp/gui_test_scripts/01_create_shape.py +``` +This way the script file persists and can be replayed in the next iteration. + +#### Step E4: Visual quality check + +Review screenshots, saved HTML files, or GUI state for EACH page/screen: + +**Screenshot sanity check (critical โ€” do this FIRST):** +- After taking a screenshot, check that it actually shows meaningful content. + A completely black, white, or solid-color screenshot means the application + did NOT render correctly โ€” do NOT mark this as "expected" or "empty scene". +- For GUI apps: if the screenshot is all black, the app either crashed, + failed to initialize its renderer, or Xvfb didn't work. This is a FAILURE, + not "an empty scene". Investigate and fix the startup issue. +- For web apps: if the screenshot is blank, the page didn't load. Check for + 500 errors, missing templates, or broken routes. +- **Never rationalize a blank/black screen as acceptable.** A working app + must show visible UI elements (menus, toolbars, canvas, content). + +**Layout verification (critical):** +- For web: read saved HTML and check CSS layout structure. For grid/flex + layouts, count child elements vs column/row definitions. Common bug: + extra elements inside a grid push content into wrong columns. +- For GUI: verify widgets are positioned correctly โ€” main content area + should not be empty while data is squeezed into a sidebar or toolbar. +- For all: verify the PRIMARY content (data lists, forms, visualizations) + occupies the MAIN area of the screen, not a narrow secondary panel. + Large empty areas next to content indicate a layout bug. + +**Visual quality:** +- Check for: broken layouts, missing styles, overlapping elements, + unreadable text, inconsistent spacing, unstyled defaults +- If the UI looks rough or unprofessional, improve it: + - Web: fix CSS (fonts, colors, spacing, borders, hover states) + - GUI: fix widget styling (padding, alignment, font sizes, colors) + - Ensure proper alignment and visual hierarchy across all screens + - Make error/success feedback visually distinct + +**Content rendering:** +- Verify text content displays correctly (not showing raw HTML tags, + raw markdown syntax, or escaped entities) +- Check that formatted content (markdown, rich text) renders properly +- Ensure long content doesn't overflow its container or get clipped + +After fixing visual issues, re-inspect to verify the fix. + +### F. Additional Checks +9. After interactive testing, also verify: + a. All routes/pages are reachable (no 500 errors or broken links) + b. Forms submit correctly and show success/error feedback + c. Navigation links lead to real pages + d. Error handling works (invalid input, 404, unauthorized access) + e. Data operations are consistent (create/edit/delete don't break related data) + +### G. Fix Issues +10. After completing ALL verification (not after the first failure): + - List all issues found (functional bugs, visual issues, missing features) + - Fix them in the production code + - Do NOT modify test files unless the test itself is clearly wrong + - After fixing, run the full test suite to verify no regressions: + ``` + $PYTEST_CMD + ``` + - **After fixing, re-inspect affected pages** to confirm the fix + +### H. Report Results + +**Build the checklist incrementally** as you work through steps Dโ€“G: +- After verifying each FR, immediately record it in the checklist +- When you discover an issue not covered by any FR (e.g., a broken link found + while navigating, a 500 error on an unlisted route, a visual glitch), add it + to "Discovered Issues" right away +- This ensures nothing is forgotten even if you hit context limits + +11. Clean up: + - Stop any background project processes you started + - Delete any test databases you created (e.g., test_review.db) + - For GUI apps: run `gui.py close` then `gui.py stop-display` + - For GUI apps: your test scripts in `.rpgkit/tmp/gui_test_scripts/` + are already saved (you wrote them to files before running via `--file`). + Do NOT delete them โ€” future review iterations will replay them. +12. Output the **Review Checklist** you've been building. Use this exact format: + +``` +## Review Checklist + +### Functional Verification +- [x] FR1: [description] โ€” [how you verified it] +- [~] FR2: [description] โ€” [was broken, what you fixed, verified fix] +- [ ] FR3: [description] โ€” [what's wrong, what you tried] +- [-] FR4: [description] โ€” [why not tested] + +### Visual Quality +- [x] /page-url โ€” clean layout, consistent nav, properly styled +- [~] /login โ€” was unstyled, added form CSS, verified +- [ ] /search โ€” text overlaps sidebar on narrow viewport + +### Discovered Issues +- [~] /admin/users returned 500 โ€” missing import, fixed +- [ ] Footer links point to # โ€” not yet fixed +- [x] Missing favicon โ€” added default icon + +### Tool Usage +- Pages inspected: [N] +- Forms tested: [N] +- Screenshots taken: [N] +- Code fixes applied: [N files changed] +``` + +**Checklist symbols:** +- `[x]` = verified, works correctly (no action needed) +- `[~]` = was broken, you fixed it AND verified the fix +- `[ ]` = broken, could not fix (explain why) +- `[-]` = not tested (context limit, dependency on failed item, etc.) + +**Rules:** +- Every FR MUST appear in "Functional Verification" (one symbol each) +- "Visual Quality" lists PAGES or SCREENS you inspected (web/GUI projects). + For each, verify: (a) primary content occupies the main area (not squeezed + into a sidebar or secondary panel), (b) no large empty areas next to content, + (c) text is readable and properly styled, (d) layout matches the screen's + purpose (e.g., main listing is prominent, not hidden or misaligned) +- "Discovered Issues" captures problems found OUTSIDE the FR list โ€” anything + you noticed while navigating, testing, or inspecting that isn't covered by + a specific FR. These are just as important as FR failures. +- "Tool Usage" = counts of actual tool invocations + +13. Then on the **LAST line** of your response, output EXACTLY ONE result: +- `REVIEW_RESULT: DONE | functional=N/T, visual=V/P, fixed=M, discovered=D` + โ€” ALL FRs verified or fixed; all pages visually checked; discovered issues resolved. +- `REVIEW_RESULT: CONTINUE | functional=N/T, visual=V/P, fixed=M, failed=K, remaining=R` + โ€” Made progress but some items still failed or not tested. + The next iteration will pick up where you left off. +- `REVIEW_RESULT: BLOCKED | reason=...` + โ€” Cannot proceed at all (project won't start, critical crash, etc.) + +**When to use each:** +- `DONE`: ONLY when ALL of these are true: + 1. Every FR is `[x]` or `[~]` in Functional Verification + 2. Every page is `[x]` or `[~]` in Visual Quality (web/GUI only) + 3. All Discovered Issues are `[x]` or `[~]` (no unresolved `[ ]`) + 4. You actually used browser/GUI tools (not just curl) +- `CONTINUE`: Whenever there are `[ ]` or `[-]` items in any section. + This is NOT a failure โ€” it means the next iteration will continue. +- `BLOCKED`: Only for showstopper issues that prevent ANY verification. + +14. Commit your changes (if any fixes were made): +``` +git add -A && git commit -m "review: fix issues found in global review" +``` + +## Critical Rules +- Verify ALL features, not just the first one that fails +- **Actually run and interact** with the project โ€” don't just read source code +- **For web/GUI projects: you MUST use browser.py or gui.py tools.** Verifying + only via curl is NOT acceptable โ€” use `inspect` and `run-script` to simulate + real user interactions, and read saved HTML files to analyze results +- **When gui.py reports "[WARNING] No visible window detected"**, this means + the application has a BUG โ€” it did not create a real GUI window. You MUST + fix the code (e.g., add `tkinter.Tk()` to the window/display class). Do NOT + rationalize this as "abstract API", "visualization framework", or "expected + design". A DisplayWindow that only sets `self._is_open = True` without + creating a real OS window is a bug that must be fixed. +- **NEVER mark GUI visual quality as N/A or "not applicable"** if the project + specification mentions "interactive window", "GUI mode", or "visual interactive". + The project IS a GUI application โ€” verify it creates and renders a real window. +- Discover pages/routes from both navigation AND source code โ€” don't assume + you know all pages; some may not be linked from the homepage +- Create test data through the project's own interfaces (not direct DB writes) +- If the project won't start, fix the startup issue first +- Do not create new test files โ€” only fix production code +- Run pytest after every batch of fixes to catch regressions +- Kill any background processes before finishing +- Use a separate test database (e.g., test_review.db), not the default one + +## Context Limit Handling +If you are running low on context/tokens and cannot finish all FRs: +1. Complete and report as many FRs as you can +2. Commit any fixes you've already made +3. Fill out the Review Checklist โ€” put finished items in their sections, mark + unfinished FRs as `[-]` in Functional Verification +4. Output `REVIEW_RESULT: CONTINUE | functional=N/T, visual=V/P, fixed=M, failed=0, remaining=R` + The next iteration will receive your checklist as context and skip verified items. + +## Browser Tools Reference (for web projects) + +These tools use headless Chromium via Playwright. Use them in Step E above. + +**Primary command โ€” `inspect` (recommended):** +One call = screenshot + HTML + links + forms + page structure. Use this instead +of calling screenshot/list-links/list-forms separately. +```bash +python $BROWSER_TOOL inspect http://localhost:/ +python $BROWSER_TOOL inspect http://localhost:/login +``` +Output includes: request URL, actual URL, title, screenshot path, HTML path, +all visible links, all forms with fields, and page structure (headings/nav/buttons). +**Read the saved HTML file** to analyze full page content. + +**Other read-only commands** (for specific needs): +```bash +# Get page structure as text (headings, links, forms, buttons) +python $BROWSER_TOOL accessibility-tree http://localhost:/ + +# Get rendered HTML of a specific element +python $BROWSER_TOOL get-html http://localhost:/ --selector "nav" +``` + +**Interactive command โ€” `run-script`** (for multi-step flows): + +After run-script completes, it automatically prints: +- `[Before] URL` and `[After] URL` โ€” track page navigation +- `[After] Screenshot` โ€” path to auto-saved screenshot +- `[After] HTML` โ€” path to auto-saved HTML (**read this file** to analyze the result) + +```bash +# Example: fill a form and submit (adapt selectors to the actual page) +python $BROWSER_TOOL run-script http://localhost:/some-form --script ' +page.fill("input[name=field1]", "value1") +page.fill("input[name=field2]", "value2") +page.click("button[type=submit]") +page.wait_for_load_state("networkidle") +' +# After this runs, read the [After] HTML file to verify the page content +``` + +In run-script, you have access to: `page`, `browser`, `Path`, `json`, `print`. +Use `inspect` to analyze any page (links, forms, structure, HTML). +Use `run-script` for multi-step flows (login โ†’ navigate โ†’ verify). +After each action, **read the saved HTML file** to verify the page content. + +**Quoting tip:** If your script contains single quotes (e.g., `has-text('...')`), +write the script to a temp file and use `--file` instead of `--script`: +```bash +cat > /tmp/_review_script.py << 'SCRIPT' +page.click("a:has-text('Some Link')") +page.wait_for_load_state("networkidle") +SCRIPT +python $BROWSER_TOOL run-script http://localhost:/ --file /tmp/_review_script.py +``` + +## GUI Tools Reference (for desktop applications) + +These tools use Xvfb + xdotool for virtual display interaction. Use them in Step E above. + +**Display and app management:** +```bash +python $GUI_TOOL start-display +python $GUI_TOOL launch "python main.py" --wait 3 +python $GUI_TOOL status # verify window exists โ€” if "No visible window", fix the app +python $GUI_TOOL screenshot +``` +**CRITICAL:** After `launch`, read its output. If you see `[WARNING] No visible +window detected`, the app's GUI code is broken โ€” it runs but doesn't create a +window. Fix the GUI initialization code before taking screenshots. + +**IMPORTANT:** Keep the app running for all your testing. Do NOT close and +re-launch between each test โ€” use one continuous session. Only restart if +you modified the app's code and need to verify the fix. + +**Simple interactions** (one action at a time): +```bash +python $GUI_TOOL click 300 200 +python $GUI_TOOL type "Hello World" +python $GUI_TOOL key "Return" +python $GUI_TOOL key "ctrl+s" +python $GUI_TOOL scroll -3 +python $GUI_TOOL screenshot +``` + +**Multi-step interactions** โ€” always write to a file first, then run via +`--file`. Scripts saved under `.rpgkit/tmp/gui_test_scripts/` persist across +review iterations so the next agent can replay them: +```bash +mkdir -p .rpgkit/tmp/gui_test_scripts +cat > .rpgkit/tmp/gui_test_scripts/02_form_fill.py << 'PYEOF' +import time +# Verify: dropdown selection + form fill + submit +wid = gui.find_window("My App") +if wid: + gui.focus_window(wid) +gui.click(120, 45) # open dropdown/menu +time.sleep(0.3) +gui.click(120, 120) # select option +time.sleep(0.3) +gui.click(200, 150) # click input field +gui.type_text("my value") +gui.key("Tab") +gui.type_text("another value") +gui.key("Return") +time.sleep(0.5) +gui.screenshot() # verify result +PYEOF +python $GUI_TOOL run-script --file .rpgkit/tmp/gui_test_scripts/02_form_fill.py +``` + +**Simple one-off scripts** (no need to persist): +```bash +python $GUI_TOOL run-script --script 'gui.click(100, 200); import time; time.sleep(0.3); gui.screenshot()' +``` + +In run-script: `gui` (GuiHelper with click/type_text/key/scroll/screenshot/ +find_window/focus_window), `subprocess`, `Path`, `time`, `print`. +**Always clean up when finished:** `gui.py close` then `gui.py stop-display`. + +--- + +## Project Context + +**Repository:** $REPO_PATH + +### Functional Requirements (from feature spec) +$REQUIREMENTS_TEXT + +### Source Files +$FILE_LIST + +### Reusable GUI Interaction Scripts (if any) +$GUI_SCRIPT_REUSE_CONTEXT + +### Previous Issues (unresolved from last iteration) +$PREVIOUS_ISSUES +""" + + +# ============================================================================ +# Helpers and main entry point +# ============================================================================ + + +def _load_feature_spec() -> Dict[str, Any]: + """Load feature_spec.json for review prompt.""" + if FEATURE_SPEC_FILE.exists(): + try: + with open(FEATURE_SPEC_FILE, "r", encoding="utf-8") as f: + return json.load(f) + except Exception: + pass + return {} + + +def _load_all_stage_findings_for_prompt() -> str: + """Build a compact summary of failed stages with file paths. + + Only includes stages that have errors. Provides status + one-line + summary + absolute file path so the agent can read details on demand, + keeping the prompt small. + """ + parts: List[str] = [] + + # 1. Pytest failures + ft = _load_stage_result("final_test") + ft_path = _LOGS_DIR / "codegen_final_test.json" + if ft and not ft.get("success", True): + parts.append( + f"- **pytest**: FAILED ({ft.get('failed', 0)} failures, " + f"{ft.get('errors', 0)} errors). " + f"Fix these FIRST. Details: `cat {ft_path}`" + ) + + # 2. Smoke test errors + st = _load_stage_result("smoke_test") + st_path = _LOGS_DIR / "codegen_smoke_test.json" + if st: + error_count = len([f for f in st.get("findings", []) if f.get("severity") == "error"]) + if error_count > 0: + repair_note = "" + if st.get("repair_attempted"): + repair_note = f" (repair attempted, {st.get('repair_remaining', '?')} remaining)" + parts.append( + f"- **smoke_test**: {error_count} error(s){repair_note}. " + f"Details: `cat {st_path}`" + ) + + # 3. Previous global review + gr = _load_stage_result("global_review") + gr_path = _LOGS_DIR / "codegen_global_review.json" + if gr: + iters = gr.get("iterations", []) + if iters and not iters[-1].get("review_passed"): + detail = iters[-1].get("detail", "")[:80] + parts.append( + f"- **global_review**: {len(iters)} iteration(s), " + f"last: {detail}. Details: `cat {gr_path}`" + ) + + if not parts: + return "" + return ( + "**Prior stage failures** (read the file for full details):\n" + + "\n".join(parts) + ) + + +def _format_requirements_for_review(feature_spec: Dict[str, Any]) -> str: + """Format functional requirements for the review prompt. + + Handles the feature_spec.json structure: + { "functional_requirements": [ + { "id": "FT-001", "name": "...", "description": "...", + "children": [ { "id": "FT-001-001", "name": "...", ... } ] } + ] } + """ + frs = feature_spec.get("functional_requirements", []) + if not frs: + return "(No functional requirements found)" + + parts = [] + for i, fr in enumerate(frs, 1): + name = fr.get("name", fr.get("title", "")) + desc = fr.get("description", "") + text = f"{name}: {desc}" if desc else name + + # Recursively collect children (supports arbitrary depth) + children = fr.get("children", fr.get("sub_features", fr.get("features", []))) + if children: + sub_texts = _collect_children(children, depth=1) + text += "\n" + "\n".join(sub_texts) + parts.append(f"FR{i}: {text}") + + return "\n\n".join(parts) + + +def _collect_children(children: list, depth: int = 1, max_depth: int = 3) -> List[str]: + """Recursively collect child feature descriptions.""" + lines = [] + indent = " " * depth + for child in children: + name = child.get("name", child.get("title", "")) + desc = child.get("description", "") + line = f"{indent}- {name}" + if desc and desc != name: + line += f": {desc}" + lines.append(line) + if depth < max_depth: + sub = child.get("children", child.get("sub_features", [])) + if sub: + lines.extend(_collect_children(sub, depth + 1, max_depth)) + return lines + + +def _load_gui_script_reuse_context(repo_path: Path) -> str: + """Load reusable GUI interaction scripts for review prompt context. + + Scripts are stored under ``repo/.rpgkit/tmp/gui_test_scripts`` and are + intended to capture stable, previously-validated interaction flows. + """ + scripts_dir = repo_path / ".rpgkit" / "tmp" / "gui_test_scripts" + if not scripts_dir.is_dir(): + return "(No reusable GUI scripts found yet)" + + files = sorted(p for p in scripts_dir.glob("*.py") if p.is_file()) + if not files: + return "(No reusable GUI scripts found yet)" + + # Keep prompt size bounded. + max_chars = 8000 + used = 0 + chunks: List[str] = [ + "Use these scripts first when testing matching GUI flows. " + "If a script fails due to UI changes, update it and continue." + ] + + for idx, f in enumerate(files): + rel = f.relative_to(repo_path) + try: + content = f.read_text(encoding="utf-8") + except Exception: + content = "# (failed to read script)" + + block = f"\n- {rel}\n```python\n{content}\n```\n" + if used + len(block) > max_chars: + remaining = len(files) - idx + if remaining > 0: + chunks.append(f"\n... ({remaining} more scripts omitted for size)\n") + break + + chunks.append(block) + used += len(block) + + return "\n".join(chunks) + + +def _build_review_prompt(repo_path: Path, previous_issues: str = "") -> str: + """Build the global review prompt. + + Uses $VAR substitution instead of .format() to avoid conflicts with + curly braces in example code (e.g., f-string {path} in GUI examples). + """ + feature_spec = _load_feature_spec() + requirements_text = _format_requirements_for_review(feature_spec) + + # Reuse smoke_test's file finder for consistency + try: + from smoke_test import _find_source_files + source_files = _find_source_files(repo_path) + file_lines = [] + for f in source_files: + rel = f.relative_to(repo_path) + size = f.stat().st_size + file_lines.append(f" {rel} ({size} bytes)") + file_list = "\n".join(file_lines) + except ImportError: + file_list = "(file listing unavailable)" + + venv_python = get_dev_python(repo_path) or "python3" + pytest_cmd = build_batch_pytest_cmd([], venv_python) + gui_script_reuse_context = _load_gui_script_reuse_context(repo_path) + + # Load accumulated findings from all pipeline stages + findings_text = _load_all_stage_findings_for_prompt() + if findings_text: + if previous_issues: + previous_issues = findings_text + "\n\n---\n\n" + previous_issues + else: + previous_issues = findings_text + + # Absolute paths for tools so the prompt is cwd-agnostic. + browser_tool = str(TOOLS_DIR / "browser.py") + gui_tool = str(TOOLS_DIR / "gui.py") + + # Use string.Template for safe substitution ($VAR style) + from string import Template + tmpl = Template(GLOBAL_REVIEW_PROMPT) + return tmpl.safe_substitute( + REPO_PATH=str(repo_path), + REQUIREMENTS_TEXT=requirements_text, + FILE_LIST=file_list, + GUI_SCRIPT_REUSE_CONTEXT=gui_script_reuse_context, + PREVIOUS_ISSUES=previous_issues or "(First iteration โ€” no previous issues)", + PYTEST_CMD=pytest_cmd, + BROWSER_TOOL=browser_tool, + GUI_TOOL=gui_tool, + ) + + +def _extract_review_checklist(response: str) -> Dict[str, Any]: + """Extract the structured Review Checklist from a review response. + + Parses three content sections (Functional Verification, Visual Quality, + Discovered Issues) and Tool Usage metrics. Each item is classified by + its checkbox symbol: [x] verified, [~] fixed, [ ] failed, [-] not tested. + + Returns dict with: + functional: dict with verified/fixed/failed/not_tested lists + visual: dict with verified/fixed/failed/not_tested lists + discovered: dict with verified/fixed/failed/not_tested lists + pages_inspected: int + forms_tested: int + screenshots_taken: int + """ + def _empty_section() -> Dict[str, list]: + return {"verified": [], "fixed": [], "failed": [], "not_tested": []} + + _symbol_to_key = {"x": "verified", "~": "fixed", " ": "failed", "-": "not_tested"} + + result: Dict[str, Any] = { + "functional": _empty_section(), + "visual": _empty_section(), + "discovered": _empty_section(), + "pages_inspected": 0, + "forms_tested": 0, + "screenshots_taken": 0, + } + + # Map heading text โ†’ which content section to fill + section = None # current content section key or "_tools" + heading_map = { + "functional verification": "functional", + "functional": "functional", + "visual quality": "visual", + "visual": "visual", + "discovered issues": "discovered", + "discovered": "discovered", + "tool usage": "_tools", + # Legacy headings (backward compat) + "verified": "functional", + "fixed": "functional", + "failed": "functional", + "not tested": "functional", + "not_tested": "functional", + } + # For legacy single-section format, track which symbol bucket to force + _legacy_force_key: Optional[str] = None + _legacy_key_map = { + "verified": "verified", "fixed": "fixed", + "failed": "failed", "not tested": "not_tested", + } + + for line in response.splitlines(): + stripped = line.strip() + + # Detect section headers: ### Functional Verification, ### Visual Quality, etc. + if stripped.startswith("###"): + heading = stripped.lstrip("# ").strip().lower() + matched = heading_map.get(heading) + if matched: + section = matched + # Legacy: if heading is "Verified"/"Fixed"/etc., force that key; + # otherwise reset to None so new-format sections use symbol-based classification + _legacy_force_key = _legacy_key_map.get(heading) # None for new-format headings + continue + + # Parse checklist items: - [x], - [~], - [ ], - [-] + if section and section != "_tools": + m = re.match(r'^-\s*\[([ x~-])\]\s*(.+)', stripped) + if m: + symbol = m.group(1) + text = m.group(2).strip() + if _legacy_force_key: + # Legacy format: heading determines the key + result[section][_legacy_force_key].append(text) + else: + # New format: symbol determines the key + key = _symbol_to_key.get(symbol, "failed") + result[section][key].append(text) + continue + + # Parse Tool Usage metrics + if section == "_tools": + m = re.match(r'^-\s*[Pp]ages inspected:\s*(\d+)', stripped) + if m: + result["pages_inspected"] = int(m.group(1)) + continue + m = re.match(r'^-\s*[Ff]orms tested:\s*(\d+)', stripped) + if m: + result["forms_tested"] = int(m.group(1)) + continue + m = re.match(r'^-\s*[Ss]creenshots taken:\s*(\d+)', stripped) + if m: + result["screenshots_taken"] = int(m.group(1)) + continue + + # Fallback: also try old-style FR lines (FR1: ... โ€” PASS/FAIL) + func = result["functional"] + if not func["verified"] and not func["fixed"] and not func["failed"]: + for line in response.splitlines(): + stripped = line.strip() + if re.match(r'^FR\d+:', stripped): + if 'PASS' in stripped: + func["verified"].append(stripped) + elif 'FAIL' in stripped: + func["failed"].append(stripped) + + return result + + +def _parse_review_result(response: Optional[str]) -> Tuple[bool, str]: + """Parse the sub-agent's review result. + + Handles both new format (DONE/CONTINUE/BLOCKED) and legacy format + (PASS/PARTIAL/FAIL/CONTEXT_LIMIT). Handles markdown formatting. + Returns (passed, detail). + """ + if not response: + return False, "no response" + + lines = response.strip().splitlines() + search_lines = lines[-30:] if len(lines) > 30 else lines + for line in reversed(search_lines): + # Strip whitespace and markdown bold/italic markers + line = line.strip().strip("*").strip("_").strip() + if line.startswith("REVIEW_RESULT:"): + rest = line[len("REVIEW_RESULT:"):].strip() + # New format + if rest.startswith("DONE"): + return True, rest + elif rest.startswith("CONTINUE"): + return False, rest + elif rest.startswith("BLOCKED"): + return False, rest + # Legacy format (backward compat) + elif rest == "PASS": + return True, "DONE (legacy PASS)" + elif rest.startswith("PARTIAL"): + return False, f"CONTINUE (legacy {rest})" + elif rest.startswith("FAIL"): + return False, rest + elif rest.startswith("CONTEXT_LIMIT"): + return False, f"CONTINUE (legacy {rest})" + + return False, "no REVIEW_RESULT found in response" + + +def _build_review_retry_context( + response: str, + post_pytest: Any, + post_stubs: List[str], + detail: str = "", + checklist: Optional[Dict[str, Any]] = None, +) -> str: + """Build context from previous iteration for retry prompt. + + Uses the structured checklist (functional/visual/discovered) to pass + completed/failed items to the next iteration. + """ + parts: List[str] = [] + + # Check if previous iteration didn't finish or was blocked + if "BLOCKED" in detail: + parts.append( + "IMPORTANT: The previous iteration was BLOCKED.\n" + f"Reason: {detail}\n" + "Diagnose and fix the blocker, then continue verification.\n" + ) + elif "CONTINUE" in detail: + parts.append( + "IMPORTANT: The previous iteration did not finish all items.\n" + "Continue reviewing from where it left off.\n" + "DO NOT re-verify items already marked [x] or [~] below.\n" + ) + + # Use structured checklist if available + if checklist: + for section_name, label in [ + ("functional", "Functional Verification"), + ("visual", "Visual Quality"), + ("discovered", "Discovered Issues"), + ]: + sec = checklist.get(section_name, {}) + if not sec: + continue + verified = sec.get("verified", []) + fixed = sec.get("fixed", []) + failed = sec.get("failed", []) + not_tested = sec.get("not_tested", []) + if not any([verified, fixed, failed, not_tested]): + continue + + parts.append(f"\n### {label} (from previous iteration)") + if verified: + parts.append(f"Already verified ({len(verified)}) โ€” skip:") + parts.extend(f" [x] {item}" for item in verified[:30]) + if fixed: + parts.append(f"Fixed ({len(fixed)}):") + parts.extend(f" [~] {item}" for item in fixed[:20]) + if failed: + parts.append(f"Still FAILED ({len(failed)}) โ€” fix these:") + parts.extend(f" [ ] {item}" for item in failed[:20]) + if not_tested: + parts.append(f"Not yet tested ({len(not_tested)}) โ€” verify these:") + parts.extend(f" [-] {item}" for item in not_tested[:20]) + + pages = checklist.get("pages_inspected", 0) + forms = checklist.get("forms_tested", 0) + if pages or forms: + parts.append(f"\nPrevious tool usage: pages_inspected={pages}, forms_tested={forms}") + else: + # Fallback: extract FR lines from raw response + fr_lines = [ + line.strip() for line in response.splitlines() + if line.strip().startswith("FR") and ("PASS" in line or "FAIL" in line) + ] + if fr_lines: + pass_lines = [fr for fr in fr_lines if "PASS" in fr] + fail_lines = [fr for fr in fr_lines if "FAIL" in fr] + if pass_lines: + parts.append(f"Features already verified PASS ({len(pass_lines)}):") + parts.extend(f" {line}" for line in pass_lines[:30]) + if fail_lines: + parts.append(f"\nFeatures that FAIL ({len(fail_lines)}):") + parts.extend(f" {line}" for line in fail_lines[:20]) + else: + fail_lines = [ + line.strip() for line in response.splitlines() + if "FAIL" in line and "FR" in line + ] + if fail_lines: + parts.append("Features that still FAIL:") + parts.extend(f" {line}" for line in fail_lines[:20]) + + # Post-pytest failures + if not post_pytest.success: + parts.append( + f"\npytest regressions: {post_pytest.failed} failed, " + f"{post_pytest.errors} errors" + ) + output_tail = "\n".join(post_pytest.output.splitlines()[-30:]) + parts.append(f"pytest output (tail):\n{output_tail}") + + # Remaining stubs + if post_stubs: + parts.append(f"\nRemaining stubs ({len(post_stubs)}):") + parts.extend(f" {s}" for s in post_stubs[:10]) + + return "\n".join(parts) + + +def _cleanup_background_processes(repo_path: Path) -> None: + """Best-effort cleanup of Python processes started in repo_path.""" + import signal + import subprocess as _sp + try: + # Use lsof to find processes with files open in repo_path + # This is more targeted than pgrep -f which matches too broadly + result = _sp.run( + ["lsof", "+D", str(repo_path), "-t"], + capture_output=True, text=True, timeout=5, + ) + if result.returncode == 0: + my_pid = os.getpid() + my_ppid = os.getppid() + for pid_str in result.stdout.strip().splitlines(): + try: + pid = int(pid_str.strip()) + except ValueError: + continue + if pid in (my_pid, my_ppid): + continue # Don't kill ourselves or parent + try: + os.kill(pid, signal.SIGTERM) + logger.info("Cleaned up process %d", pid) + except OSError: + pass + except FileNotFoundError: + # lsof not available โ€” skip cleanup, prompt already asks agent to clean up + logger.debug("lsof not found, skipping process cleanup") + except Exception as exc: + logger.debug("Process cleanup failed (non-fatal): %s", exc) + + # Also clean up any Xvfb processes on the default display + try: + result = _sp.run( + ["pgrep", "-f", "Xvfb :99( |$)"], + capture_output=True, text=True, timeout=3, + ) + if result.returncode == 0: + for pid_str in result.stdout.strip().splitlines(): + try: + pid = int(pid_str.strip()) + os.kill(pid, signal.SIGTERM) + logger.info("Cleaned up Xvfb process %d", pid) + except (ValueError, OSError): + pass + except Exception: + pass + + +# ============================================================================ +# E2 โ€” Heartbeat helper for long-running sub-agent calls +# ============================================================================ + +class _HeartbeatLogger: + """Emit a periodic ``...still running, elapsed=Xs`` log line. + + Designed to wrap a single long-running blocking call (typically + ``dispatch_sub_agent`` inside a global_review iteration). Exits + cleanly via context manager โ€” the daemon thread stops as soon as + ``__exit__`` runs, even when the wrapped call raises (plan E2). + """ + + def __init__(self, label: str, interval_s: int = 60) -> None: + self._label = label + self._interval = max(1, int(interval_s)) + self._stop = threading.Event() + self._thread: Optional[threading.Thread] = None + + def __enter__(self) -> "_HeartbeatLogger": + start = time.time() + + def _beat() -> None: + while not self._stop.wait(self._interval): + elapsed = int(time.time() - start) + logger.info("%s ...still running, elapsed=%ds", self._label, elapsed) + + self._thread = threading.Thread( + target=_beat, name=f"heartbeat:{self._label}", daemon=True, + ) + self._thread.start() + return self + + def __exit__(self, exc_type, exc, tb) -> None: + self._stop.set() + # Don't join longer than one tick โ€” the thread is daemon and will + # exit on its own at process shutdown if join fails. + if self._thread is not None: + self._thread.join(timeout=2) + + +def global_review( + repo_path: Optional[Path] = None, + max_iterations: int = 10, + timeout_per_iteration: int = 1800, +) -> Dict[str, Any]: + """Run global feature review with iterative repair. + + Dispatches a sub-agent as QA engineer to verify every feature + against the feature_spec, fix issues, and iterate until all pass. + + Args: + repo_path: Project repo path. + max_iterations: Max review+repair cycles. + timeout_per_iteration: Sub-agent timeout per iteration (seconds). + + Returns: + Result dict with review findings and statistics. + """ + repo_path = repo_path or REPO_DIR + + results: Dict[str, Any] = { + "type": "global_review", + "iterations": [], + "success": False, + "total_duration": 0.0, + } + start_time = time.time() + previous_issues = "" + + for iteration in range(1, max_iterations + 1): + logger.info("โ”โ”โ” Global Review: iteration %d/%d โ”โ”โ”", iteration, max_iterations) + + # Clean screenshots from previous iteration so size check is fresh + try: + screenshots_dir = repo_path / ".rpgkit" / "tmp" / "screenshots" + if screenshots_dir.is_dir(): + shutil.rmtree(screenshots_dir) + except Exception: + pass + + # 1. Pre-check: run pytest to know current state + try: + ensure_deps_installed(repo_path) + except Exception: + pass + pre_pytest = run_pytest( + repo_path, + timeout=DEFAULT_PYTEST_OVERALL_TIMEOUT, + extra_args=[f"--timeout={DEFAULT_TEST_TIMEOUT}", "--timeout-method=thread"], + ) + # Update stage file so _build_review_prompt sees fresh state + _save_stage_result("final_test", { + "success": pre_pytest.success, + "passed": pre_pytest.passed, + "failed": pre_pytest.failed, + "errors": pre_pytest.errors, + "output_tail": "\n".join(pre_pytest.output.splitlines()[-40:]) if not pre_pytest.success else "", + }) + if not pre_pytest.success: + logger.warning( + "pytest pre-check: %d failures, %d errors โ€” agent will fix", + pre_pytest.failed, pre_pytest.errors, + ) + + # 2. Build prompt + prompt = _build_review_prompt(repo_path, previous_issues=previous_issues) + + # 3. Dispatch sub-agent (with retries for transient failures). + # Wrap with a heartbeat so the operator sees the iteration is + # still alive even if the sub-agent runs for many minutes + # without producing output (plan E2). + with _HeartbeatLogger( + label=f"global_review[{iteration}/{max_iterations}]", + interval_s=60, + ): + response, error = dispatch_sub_agent( + prompt, repo_path, + timeout=timeout_per_iteration, + purpose=f"global_review_{iteration}", + max_retries=3, + ) + + if error: + results["iterations"].append({ + "iteration": iteration, + "error": error, + }) + # Don't break โ€” transient LLM failures shouldn't abort the + # entire review. Continue to the next iteration which will + # rebuild the prompt and retry. + logger.warning( + "Dispatch error on iteration %d, will retry next iteration: %s", + iteration, error[:120], + ) + if iteration == max_iterations: + logger.error("Dispatch error on final iteration โ€” giving up") + continue + + # 4. Parse sub-agent result and extract checklist + review_passed, detail = _parse_review_result(response) + checklist = _extract_review_checklist(response) if response else None + + # 5. Post-verify (independent โ€” don't trust sub-agent) + _cleanup_background_processes(repo_path) + + post_pytest = run_pytest( + repo_path, + timeout=DEFAULT_PYTEST_OVERALL_TIMEOUT, + extra_args=["-v", "--tb=short", f"--timeout={DEFAULT_TEST_TIMEOUT}", "--timeout-method=thread"], + ) + + # Stub check + post_stubs: List[str] = [] + try: + from code_gen.static_checks import static_completeness_check + from smoke_test import _find_source_files + source_files = _find_source_files(repo_path) + file_paths = [str(f.relative_to(repo_path)) for f in source_files] + post_stubs = [ + s for s in static_completeness_check(file_paths, repo_path) + if s.startswith("STUB:") + ] + except Exception as exc: + logger.debug("Stub check during review post-verify failed: %s", exc) + + # Framework-level tool usage validation: + # If agent claims DONE but never inspected any pages, override to CONTINUE + tools_used = False + if checklist: + pages = checklist.get("pages_inspected", 0) + screenshots = checklist.get("screenshots_taken", 0) + tools_used = pages > 0 or screenshots > 0 + if review_passed and not tools_used: + logger.warning( + "Agent reported DONE but pages_inspected=%d, screenshots=%d " + "โ€” overriding to CONTINUE (visual verification required)", + pages, screenshots, + ) + review_passed = False + detail = ( + "CONTINUE (overridden: agent claimed DONE but did not use " + "browser/GUI tools for visual verification)" + ) + + # Override DONE if post-pytest failed (agent's fixes may have broken tests) + if review_passed and not post_pytest.success: + logger.warning( + "Agent reported DONE but post-pytest FAILED (%d failures, %d errors) " + "โ€” overriding to CONTINUE", + post_pytest.failed, post_pytest.errors, + ) + review_passed = False + detail = ( + f"CONTINUE (overridden: post-pytest failed with " + f"{post_pytest.failed} failures, {post_pytest.errors} errors)" + ) + + # Framework-level screenshot content validation: + # Check if screenshots are suspiciously small (black/empty = ~250 bytes). + # A normal screenshot with actual content is at least a few KB. + # This check uses filesystem evidence directly, not the checklist's + # self-reported metrics (which may be missing or malformed). + if review_passed: + try: + screenshots_dir = repo_path / ".rpgkit" / "tmp" / "screenshots" + if screenshots_dir.is_dir(): + png_files = list(screenshots_dir.glob("*.png")) + if png_files: + small_count = sum( + 1 for f in png_files if f.stat().st_size < 1000 + ) + # Fail if majority of screenshots are blank + if small_count > 0 and small_count >= len(png_files) * 0.5: + logger.warning( + "%d/%d screenshots are < 1KB (likely blank/black) " + "โ€” overriding to CONTINUE", + small_count, len(png_files), + ) + review_passed = False + detail = ( + f"CONTINUE (overridden: {small_count}/{len(png_files)} " + f"screenshots are < 1KB, likely blank/black โ€” " + f"GUI may not have rendered)" + ) + except Exception as exc: + logger.debug("Screenshot size check failed: %s", exc) + + # Check for unresolved items in any checklist section + has_unresolved = False + if checklist: + for sec_name in ("functional", "visual", "discovered"): + sec = checklist.get(sec_name, {}) + if sec.get("failed"): + has_unresolved = True + break + # For functional/visual: not_tested items are blockers. + # For discovered: not_tested items are best-effort (bonus + # findings the agent couldn't verify), not blockers. + if sec_name != "discovered" and sec.get("not_tested"): + has_unresolved = True + break + if review_passed and has_unresolved: + logger.warning( + "Agent reported DONE but checklist has unresolved items " + "โ€” overriding to CONTINUE" + ) + review_passed = False + detail = "CONTINUE (overridden: checklist has unresolved [ ] or [-] items)" + + # Build iteration result with checklist stats per section + cl_stats: Dict[str, Any] = {} + if checklist: + for sec_name in ("functional", "visual", "discovered"): + sec = checklist.get(sec_name, {}) + total = sum(len(sec.get(k, [])) for k in ("verified", "fixed", "failed", "not_tested")) + if total > 0: + cl_stats[sec_name] = { + "verified": len(sec.get("verified", [])), + "fixed": len(sec.get("fixed", [])), + "failed": len(sec.get("failed", [])), + "not_tested": len(sec.get("not_tested", [])), + } + cl_stats["pages_inspected"] = checklist.get("pages_inspected", 0) + cl_stats["forms_tested"] = checklist.get("forms_tested", 0) + cl_stats["screenshots_taken"] = checklist.get("screenshots_taken", 0) + + iteration_result = { + "iteration": iteration, + "review_passed": review_passed, + "post_pytest_pass": post_pytest.success, + "post_stub_count": len(post_stubs), + "detail": detail, + "tools_used": tools_used, + "checklist": cl_stats, + } + results["iterations"].append(iteration_result) + + logger.info( + "Review iteration %d: agent=%s, pytest=%s, stubs=%d, " + "tools=%s, checklist=%s, detail=%s", + iteration, + "DONE" if review_passed else "CONTINUE", + "PASS" if post_pytest.success else "FAIL", + len(post_stubs), + "yes" if tools_used else "NO", + cl_stats if cl_stats else "none", + detail[:80], + ) + + # 6. Decision + if review_passed and post_pytest.success and len(post_stubs) == 0: + results["success"] = True + logger.info("Global review DONE on iteration %d", iteration) + break + + # Build context for next iteration (pass checklist for structured retry) + if response: + previous_issues = _build_review_retry_context( + response, post_pytest, post_stubs, + detail=detail, checklist=checklist, + ) + else: + previous_issues = "Previous iteration produced no response." + + if iteration == max_iterations: + logger.warning( + "Global review reached max iterations (%d) without full pass", + max_iterations, + ) + + results["total_duration"] = round(time.time() - start_time, 1) + + # Persist results for cross-stage context + try: + _save_stage_result("global_review", results) + except Exception: + pass + + return results + + diff --git a/RPG-Kit/scripts/code_gen/post_verify.py b/RPG-Kit/scripts/code_gen/post_verify.py new file mode 100644 index 0000000..a960a31 --- /dev/null +++ b/RPG-Kit/scripts/code_gen/post_verify.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 +"""Post-verification: independent pytest re-run after a sub-agent batch. + +This module hosts :func:`post_verify`, extracted from +``scripts/run_batch.py`` Module 4 ("Post-Verification"). + +The sub-agent self-reports ``BATCH_RESULT: PASS`` or ``FAIL`` after its +TDD cycle, but we do **not** trust that signal โ€” :func:`post_verify` +re-runs pytest from the orchestrator process to get an authoritative +answer. This catches two failure modes: + +* Sub-agent claims PASS but actually skipped failing tests. +* Sub-agent's environment differed from the orchestrator's (different + PYTHONPATH, stale ``__pycache__``, etc.). + +This is an internal helper used only by ``scripts.run_batch``; no +external API contract. +""" + +from __future__ import annotations + +import logging +import subprocess +from pathlib import Path +from typing import Tuple + +from common.git_utils import GitRunner +from common.task_batch import PlannedTask +from code_gen.prompts import is_project_docs_batch +from code_gen.test_runner import ( + ensure_deps_installed, + find_related_test_files, + run_pytest, +) + +logger = logging.getLogger(__name__) + + +from code_gen._constants import ( # noqa: E402 + DEFAULT_PYTEST_OVERALL_TIMEOUT, + DEFAULT_TEST_TIMEOUT, +) + + +def post_verify( + repo_path: Path, + task: PlannedTask, + timeout: int = 0, # 0 = auto-select based on task type +) -> Tuple[bool, str]: + """Run an independent pytest to verify the batch result. + + This is the authoritative check โ€” we do NOT trust the sub-agent's + self-reported BATCH_RESULT. + + Args: + repo_path: Project repo path. + task: The PlannedTask for this batch. + timeout: Overall pytest timeout. + + Returns: + ``(passed, test_output_summary)`` + """ + # Skip verification for docs batches + if is_project_docs_batch(task): + logger.info("Skipping post-verification for docs batch") + return True, "Documentation batch โ€” no tests." + + # Use the global safety-net timeout for all task types. + # Per-test hang prevention is handled by pytest-timeout (--timeout=DEFAULT_TEST_TIMEOUT). + if timeout == 0: + timeout = DEFAULT_PYTEST_OVERALL_TIMEOUT + + def _git_diff_test_files(prefix: str = "tests/") -> list: + """Return test files added/modified by this batch branch vs the main branch.""" + try: + main_branch = GitRunner(str(repo_path)).main_branch + diff = subprocess.run( + ["git", "diff", f"{main_branch}..HEAD", "--name-only"], + cwd=repo_path, capture_output=True, text=True, timeout=10, + ) + return [ + str(repo_path / f) for f in diff.stdout.splitlines() + if f.startswith(prefix) and (repo_path / f).exists() + ] + except Exception: + return [] + + # Find test files to scope post-verification. + # Special file_path values like "" or "" indicate + # synthetic tasks; use git diff to find only what this batch added/modified. + test_files = [] + if not (task.file_path.startswith("<") and task.file_path.endswith(">")): + # Regular file batch: find tests related to the target source file. + test_files = find_related_test_files(task.file_path, repo_path) + elif task.task_type == "integration_test": + # Find integration test files added/modified in this batch via git diff. + # Falls back to deriving the filename from the unit name. + test_files = _git_diff_test_files("tests/test_integration_") + if not test_files: + # Derived fallback: "Application Core_integration_tests" โ†’ test_integration_app_core.py + for unit in task.units_key: + subtree_name = unit.replace("_integration_tests", "").strip() + fname = "test_integration_" + subtree_name.lower().replace(" ", "_") + ".py" + candidate = repo_path / "tests" / fname + if candidate.exists(): + test_files.append(str(candidate)) + elif task.task_type == "wiring": + # Wiring verifies cross-module connections; run every test file the batch + # added or modified. If git diff finds nothing (e.g., on a bare retry), + # fall back to all tests so no regression goes undetected. + test_files = _git_diff_test_files("tests/test_") + + logger.info( + "Post-verification: running pytest on %s", + test_files if test_files else "all tests", + ) + + # Ensure deps are installed (sub-agent may have added new ones) + try: + ensure_deps_installed(repo_path) + except Exception as exc: + logger.warning("ensure_deps_installed failed: %s", exc) + + result = run_pytest( + repo_path, + test_files=test_files or None, + timeout=timeout, + extra_args=[f"--timeout={DEFAULT_TEST_TIMEOUT}", "--timeout-method=thread"], + ) + + # Build summary + summary_lines = [ + f"passed={result.passed} failed={result.failed} " + f"errors={result.errors} skipped={result.skipped}", + ] + if not result.success: + # Include truncated output for the resume prompt + output = result.output + if len(output) > 4000: + output = output[:4000] + "\n...(truncated)" + summary_lines.append(output) + + summary = "\n".join(summary_lines) + logger.info("Post-verification result: success=%s %s", result.success, summary_lines[0]) + if not result.success: + logger.debug("Post-verification pytest output:\n%s", result.output) + return result.success, summary diff --git a/RPG-Kit/scripts/code_gen/prompts.py b/RPG-Kit/scripts/code_gen/prompts.py new file mode 100644 index 0000000..0194f19 --- /dev/null +++ b/RPG-Kit/scripts/code_gen/prompts.py @@ -0,0 +1,1219 @@ +#!/usr/bin/env python3 +"""Prompt Templates for Code Generation. + +Contains all prompt templates used in the TDD workflow: +- Test generation prompts +- Code generation prompts +- Environment setup prompts +- Failure analysis prompts +""" + +import sys as _sys +from pathlib import Path as _Path +from typing import Optional, Dict, Any, List, TYPE_CHECKING + +if TYPE_CHECKING: + from common.task_batch import PlannedTask + +# Ensure scripts dir is on path for common.paths import +_sys.path.insert(0, str(_Path(__file__).resolve().parent.parent)) +from common.paths import REPO_DIR as _REPO_DIR +import ast as _ast_mod + + +# ============================================================================ +# Dependency Context Formatter +# ============================================================================ + +def _read_source_file(file_path: str, max_bytes: int = 8192) -> Optional[str]: + """Read a source file from disk (skeleton or implementation). + + *file_path* is relative to the project repo root (e.g. ``src/pkg/foo.py``). + The function resolves it against :data:`common.paths.REPO_DIR` to find + the actual file. + + Returns the file content (truncated to *max_bytes*) or ``None`` + when the file does not exist or is empty. + """ + if not file_path: + return None + p = _Path(file_path) + # If relative, resolve against the project repo directory + if not p.is_absolute(): + p = _REPO_DIR / p + if not p.is_file(): + return None + try: + text = p.read_text(encoding="utf-8", errors="replace") + if not text.strip(): + return None + if len(text) > max_bytes: + text = text[:max_bytes] + "\n# ... (truncated)\n" + return text + except Exception: + return None + + +def _format_skeleton_context(file_path: str) -> str: + """Return a prompt section with the skeleton / interface file content. + + Used in the FIRST iteration when only skeleton code exists on disk. + If the file doesn't exist or is empty, returns an empty string. + """ + content = _read_source_file(file_path) + if not content: + return "" + return ( + f"\n## Skeleton / Interface Definitions for `{file_path}`\n" + "The target file already contains interface definitions (class signatures, method\n" + "signatures, docstrings, type hints) from the design stage. **Your tests MUST\n" + "target ONLY the classes, methods, and signatures defined below.** Do NOT invent\n" + "methods or features that are not present in this skeleton.\n\n" + f"```python\n{content}\n```\n" + ) + + +def _format_current_source_context(file_path: str) -> str: + """Return a prompt section with the CURRENT source file content. + + Used in iteration 2+ after the code-generation sub-agent has written + actual implementation. The test agent needs to see the real code + to fix tests accurately. + """ + content = _read_source_file(file_path) + if not content: + return "" + return ( + f"\n## Current Implementation of `{file_path}`\n" + "The source file below contains the ACTUAL implementation code generated in the\n" + "previous step. **Your tests MUST match the real API** (class names, method names,\n" + "signatures, return types) as shown below. Fix any tests that expect methods or\n" + "behaviors not present in this implementation.\n\n" + f"```python\n{content}\n```\n" + ) + + +def _format_dependency_context(ctx: Optional[Dict[str, Any]]) -> str: + """Format the dependency context dict into a compact prompt section. + + Uses a "map not snapshot" approach: tells the sub-agent *where* to find + information (file paths, class names) rather than inlining full code. + The sub-agent has tool access and can read files on demand. + + Returns an empty string when *ctx* is ``None`` or empty. + """ + if not ctx: + return "" + + parts: List[str] = [] + parts.append("\n## Project Context (from earlier design stages)\n") + + # --- Project background & technology stack -------------------------------- + project_bg = ctx.get("project_background", "") + completed = ctx.get("completed", {}) + if project_bg and project_bg.strip(): + if len(completed) == 0: + # First batch: full background + parts.append(project_bg) + parts.append( + "**Use the technology stack described above** when making implementation " + "decisions (framework choice, database layer, routing patterns, etc.). " + "Generate idiomatic code for the specified technologies.\n" + ) + else: + # Subsequent batches: one-line summary (sub-agent can read files for full context) + # Extract first line as a compact summary + first_line = project_bg.strip().split('\n')[0].strip('#').strip() + parts.append( + f"### Project: {first_line}\n" + "See completed modules below for full architecture context. " + "Use the same technology stack and patterns as existing code.\n" + ) + + # --- Base classes: compact summary with file pointers --------------------- + bc_data = ctx.get("base_classes", {}) + base_classes = bc_data.get("base_classes", []) + if base_classes: + parts.append("### Base Classes (shared across all modules)\n") + parts.append("Read these files directly for full API signatures and docstrings.\n") + for bc in base_classes: + fp = bc.get("file_path", "") + code = bc.get("code", "") + subs = bc.get("subclasses", {}) + if not code: + continue + # Extract class name and method names from code + try: + tree = _ast_mod.parse(code) + for node in _ast_mod.walk(tree): + if isinstance(node, _ast_mod.ClassDef): + methods = [n.name for n in node.body + if isinstance(n, (_ast_mod.FunctionDef, _ast_mod.AsyncFunctionDef))] + parts.append(f"- `{node.name}` in `{fp}` โ€” methods: {', '.join(methods)}") + if subs: + for parent, children in subs.items(): + if parent == node.name: + parts.append(f" Subclasses: {', '.join(children)}") + break + except SyntaxError: + parts.append(f"- `{fp}` (parse error โ€” read file directly)") + parts.append("") + + # --- Data structures: compact summary ------------------------------------ + data_structs = bc_data.get("data_structures", []) + if data_structs: + subtree = ctx.get("current_subtree", "") + parts.append(f"### Data Structures (subtree: {subtree})\n") + for ds in data_structs: + fp = ds.get("file_path", "") + types = ds.get("data_flow_types", []) + if types and fp: + parts.append(f"- Types: {', '.join(types)} โ€” read `{fp}`") + elif types: + parts.append(f"- Types: {', '.join(types)} โ€” defined in skeleton files") + parts.append("") + + # --- Data flow edges: compact text format -------------------------------- + df_edges = ctx.get("data_flow_edges", []) + if df_edges: + parts.append("### Data Flow (edges involving current subtree)\n") + for edge in df_edges: + src = edge.get("source", "?") + tgt = edge.get("target", "?") + data_type = edge.get("data_type", "") + dtype_str = f" ({data_type})" if data_type else "" + parts.append(f"- {src} โ†’ {tgt}{dtype_str}") + parts.append("") + + # --- Dependency files: deduplicated by file -------------------------------- + deps = ctx.get("dependencies", {}) + dep_files = deps.get("dependent_files", []) + if dep_files: + parts.append( + "### Dependencies of Current File\n" + "These files are dependencies of your current implementation.\n" + "**You MUST read these files** before writing code to understand:\n" + "- What functions/classes they export\n" + "- What parameters they expect\n" + "- What they return\n" + "Do NOT assume or invent APIs โ€” use the actual interface defined in these files.\n" + ) + # Group by file to deduplicate repeated entries + from collections import defaultdict as _defaultdict + + inh_grouped = _defaultdict(list) + for inh in deps.get("inherits_from", []): + parent = inh['parent'] + if parent not in inh_grouped[inh['parent_file']]: + inh_grouped[inh['parent_file']].append(parent) + for f, parents in inh_grouped.items(): + parts.append(f"- `{f}` (inherits: {', '.join(parents)})") + + inv_grouped = _defaultdict(list) + for inv in deps.get("invokes", []): + callee = inv['callee'] + if callee not in inv_grouped[inv['callee_file']]: + inv_grouped[inv['callee_file']].append(callee) + for f, callees in inv_grouped.items(): + parts.append(f"- `{f}` (invokes: {', '.join(callees)})") + + ref_grouped = _defaultdict(list) + for ref in deps.get("references", []): + typ = ref['type'] + if typ not in ref_grouped[ref['type_file']]: + ref_grouped[ref['type_file']].append(typ) + for f, types in ref_grouped.items(): + parts.append(f"- `{f}` (references: {', '.join(types)})") + parts.append("") + + # --- Completed modules: show ALL files, not truncated -------------------- + # 'completed' was already fetched above for project background shortening + if completed: + file_list = list(completed.keys()) + parts.append(f"### Already Completed Modules ({len(file_list)} files)\n") + parts.append( + "These files have been implemented โ€” import and use them freely.\n" + "**Read any of these files** if you need to understand their actual API " + "(function signatures, class interfaces, return types). " + "Do NOT guess what functions exist โ€” read the source code directly.\n" + ) + for fp in file_list: # Show ALL, not truncated + parts.append(f"- `{fp}`") + parts.append("") + + # --- ORM Model Registry: cross-file model import requirements ----------- + model_reg = ctx.get("model_registry", {}) + if model_reg and model_reg.get("models"): + models = model_reg["models"] + rels = model_reg.get("relationships", []) + model_files = model_reg.get("model_files", []) + + # Only show this section if there are cross-file relationships + # (i.e. the mapper-configuration trap is actually possible). + # Projects with models all in one file don't have this problem. + cross_file_rels = [r for r in rels if r.get("target_file") and + r["target_file"] != r.get("source_file")] + + if cross_file_rels: + parts.append("### ORM Model Registry\n") + parts.append("All ORM model classes in this project:\n") + for cls_name, cls_file in sorted(models.items()): + parts.append(f"- `{cls_name}` โ†’ `{cls_file}`") + + parts.append("\n**Cross-file relationships** (string references resolved at runtime):\n") + for r in cross_file_rels: + parts.append( + f"- `{r['source_class']}.{r['field']}` โ†’ `{r['target_class']}` " + f"(in `{r['target_file']}`)" + ) + + parts.append( + "\n**CRITICAL for tests**: ORM frameworks (SQLAlchemy, etc.) resolve " + "string-based relationship targets by looking up class names in the " + "mapper registry. When your test instantiates ANY model, the ORM may " + "eagerly configure ALL mappers. If model A has a relationship pointing " + "to model B in another file, class B must be imported โ€” even if your " + "test never uses B directly.\n" + "**Import ALL model files** in your test fixture before using any " + "model or calling `db.create_all()`:\n" + "```python\n" + ) + for mf in model_files: + mod = mf.replace("/", ".").replace(".py", "") + parts.append(f"import {mod} # noqa: F401") + parts.append("```\n") + + # NOTE: ctx may also contain "reverse_deps" (who depends on this file). + # Not displayed in TDD prompt โ€” value is marginal for codegen since + # skeleton already defines the fixed API. Reserved for future use in + # design_interfaces review (P2). + + text = "\n".join(parts) + return text + + +# ============================================================================ +# Initial Prompts (First Iteration) +# ============================================================================ + +def init_test_gen_prompt( + task: str, + batch_units: str, + file_path: str, + task_type: str = "implementation", + dependency_context: Optional[Dict[str, Any]] = None, + **kwargs +) -> str: + """Generate initial test generation prompt for a task batch. + + (This step is for writing/adding tests only.). + + Args: + task: Task description + batch_units: Comma-separated list of unit keys + file_path: Target file path + task_type: Type of task (implementation, integration_test, final_test_docs) + dependency_context: Dependency context from earlier design stages + """ + if task_type == "implementation": + prompt = ( + "You are working in a Test-Driven Development (TDD) workflow.\n" + "In this step your responsibility is ONLY to write or update tests.\n" + "Do NOT modify production/source code and do NOT touch environment or dependency files.\n\n" + "Task description:\n" + f"{task}\n\n" + f"Target: write tests for units [{batch_units}] in {file_path}.\n\n" + "Requirements:\n" + "- Use the repository's existing test layout and conventions.\n" + "- Follow the same testing framework, helpers, fixtures, and style already used.\n" + "- Cover at minimum: normal behavior, key edge cases, and meaningful failure cases.\n" + "- Keep tests deterministic, readable, and maintainable.\n" + "- If the expected behavior is unclear, encode the most reasonable interpretation\n" + " and add comments explaining your assumptions.\n" + "- **Only import packages available in the environment.** Use Python standard library\n" + " and internal project modules (`src.*`) freely. For third-party packages, only import\n" + " them if they are already used by existing source files. Never add unused imports.\n" + "- **CRITICAL: Only test classes, methods, and functions that exist in the skeleton\n" + " file below (if provided). Do NOT invent or assume additional methods, features,\n" + " or APIs beyond what is defined in the skeleton.**\n" + "**Plan first โ€” output a brief summary** (3โ€“5 sentences) before writing any code:\n" + "- What test scenarios you intend to write and why.\n" + "- Key edge cases or design trade-offs you will address.\n" + "- Any assumptions about expected behavior.\n" + "This is a small task. **DO NOT over-engineer with too many tests.**\n" + ) + # Point agent to skeleton file (read on demand, not inlined) + if file_path: + prompt += ( + f"\nThe skeleton file `{file_path}` contains interface definitions " + "(signatures, docstrings, type hints). **Read this file** before " + "writing tests to understand the exact API.\n" + ) + elif task_type == "integration_test": + prompt = ( + "You are working on Integration Testing.\n" + "Your primary responsibility is to write or update integration tests.\n" + "If you discover genuine integration bugs in production code while writing tests, " + "note them โ€” you will have a chance to fix them in the next step.\n\n" + "Task description:\n" + f"{task}\n\n" + f"Target: write integration tests for units [{batch_units}].\n\n" + "File placement:\n" + "- There is NO pre-determined target file. You decide where to place test files.\n" + "- First, explore the repository to understand the existing test directory layout\n" + " (e.g., tests/, test/, or co-located test files).\n" + "- Create new test files following the same naming conventions (e.g., test_.py).\n" + "- Place integration tests in the most appropriate location per project conventions.\n\n" + "Requirements:\n" + "- Focus on testing interactions between components, modules, or systems.\n" + "- Use the repository's existing test layout and conventions.\n" + "- Test data flows, API contracts, and cross-module dependencies.\n" + "- Cover realistic scenarios including success paths and failure modes.\n" + "- Ensure tests are isolated and can run independently.\n" + "- Mock external dependencies appropriately.\n" + "- Keep tests deterministic, readable, and maintainable.\n" + ) + elif task_type == "final_test_docs": + prompt = ( + "You are working on Final Testing and Documentation.\n" + "Your primary responsibility is to write comprehensive end-to-end tests AND create documentation.\n" + "If you discover genuine integration bugs in production code while writing tests, " + "note them โ€” you will have a chance to fix them in the next step.\n\n" + "Task description:\n" + f"{task}\n\n" + f"Target: create final tests and documentation for units [{batch_units}].\n\n" + "File placement:\n" + "- There is NO pre-determined target file. You decide where to place files.\n" + "- Explore the repository structure to find the best locations.\n" + "- Place end-to-end tests in the project's test directory (e.g., tests/e2e/ or tests/).\n" + "- Place documentation updates in the project root or docs/ directory.\n" + "- Create example scripts in an examples/ directory if one exists, or create it.\n" + "- Follow existing project conventions for file naming and organization.\n\n" + "Requirements:\n" + "- Write end-to-end tests that validate complete user workflows.\n" + "- Create or update documentation (README, API docs, usage examples).\n" + "- Ensure all critical paths and user scenarios are covered.\n" + "- Document any assumptions, limitations, or known issues.\n" + "- Provide clear examples and usage instructions.\n" + "- Validate the entire system works as intended.\n" + "- Keep tests deterministic, readable, and maintainable.\n" + ) + else: + # Fallback to implementation behavior + prompt = ( + "You are working in a Test-Driven Development (TDD) workflow.\n" + "In this step your responsibility is to write or update tests.\n" + "If you discover genuine bugs in production code while writing tests, " + "note them โ€” you will have a chance to fix them in the next step.\n\n" + "Task description:\n" + f"{task}\n\n" + f"Target: write tests for units [{batch_units}] in {file_path}.\n\n" + "Requirements:\n" + "- Use the repository's existing test layout and conventions.\n" + "- Follow the same testing framework, helpers, fixtures, and style already used.\n" + "- Cover at minimum: normal behavior, key edge cases, and meaningful failure cases.\n" + "- Keep tests deterministic, readable, and maintainable.\n" + "- If the expected behavior is unclear, encode the most reasonable interpretation\n" + " and add comments explaining your assumptions.\n" + ) + + # NOTE: dependency_context is NOT appended here โ€” it is provided once + # in the TDD_BATCH_PREAMBLE template to avoid 5x duplication. + # Only init_project_file_gen_prompt() retains its own dep_context + # because TDD_PROJECT_FILE_PREAMBLE has no {dependency_context} slot. + return prompt + + +def init_code_gen_prompt( + task: str, + batch_units: str, + file_path: str, + task_type: str = "implementation", + dependency_context: Optional[Dict[str, Any]] = None, + **kwargs +) -> str: + """Generate initial code generation prompt for a task batch. + + (This step is for incremental implementation of production code.). + + Args: + task: Task description + batch_units: Comma-separated list of unit keys + file_path: Target file path + task_type: Type of task + dependency_context: Dependency context from earlier design stages + """ + if task_type == "implementation": + prompt = ( + "You are working in an incremental development workflow.\n" + "Tests may already exist or may be added later.\n" + "Your responsibility in this step is to implement or refine production code only.\n" + "Do NOT modify test files or environment/dependency configuration here.\n\n" + "Task description:\n" + f"{task}\n\n" + f"Target: implement or refine units [{batch_units}] in {file_path}.\n\n" + "Guidelines:\n" + "- Implement behavior consistent with the task description and any existing tests.\n" + "- Work incrementally: it is fine if not all tests pass yet, as long as your code moves toward correctness.\n" + "- Prefer small, focused, maintainable changes.\n" + "- Follow repository architecture, conventions, and abstractions.\n" + "- **Treat this project as an integrated whole. Prioritize code reuse and leverage existing\n" + " implementations. Before writing any new logic, check the dependency context and existing code\n" + " for utilities, helpers, or patterns that can be reused. Do NOT reinvent the wheel.**\n" + "- Do NOT edit or create test files at this stage.\n" + "- Assume the current file may be missing some imports. Whenever you use a function, class, type, or constant,\n" + " you MUST ensure the corresponding import is present at the top of the file.\n" + "- Before adding new imports, search the repository for existing usage of similar helpers or patterns and\n" + " prefer the same modules and import style (to stay consistent with the codebase).\n" + "- If you introduce new symbols in this file, also add or update the import statements so that the module can be\n" + " imported and executed without NameError or ImportError.\n" + "- **Only import packages available in the environment.** Use Python standard library\n" + " and internal project modules (`src.*`) freely. For third-party packages, only import\n" + " them if they are already used by existing source files. Before adding any import,\n" + " verify you actually USE the imported name in your code โ€” never add unused imports.\n" + "\n**Plan first โ€” output a brief summary** (3โ€“5 sentences) before writing any code:\n" + "- Your implementation approach and key design decisions.\n" + "- How you will use the dependency context (base classes, data flow, etc.).\n" + "- Any assumptions or trade-offs to note.\n" + ) + elif task_type == "integration_test": + prompt = ( + "You are working on Integration Bug Fixes.\n" + "Your integration tests (from the previous step) may have revealed \n" + "genuine bugs in the production code. Your responsibility is to fix those bugs.\n\n" + "Task description:\n" + f"{task}\n\n" + f"Target: fix integration issues found by tests for [{batch_units}].\n\n" + "Common issues to look for and fix:\n" + "- Route handlers returning placeholder strings instead of calling real handler functions\n" + "- CSS class names in style modules not matching class names used in HTML-generating modules\n" + "- Missing imports or function calls between modules that should be connected\n" + "- Data format mismatches at module boundaries\n\n" + "Guidelines:\n" + "- Fix only what is needed to make integration tests pass.\n" + "- Read the actual source files to understand current implementation before changing.\n" + "- Do NOT refactor working code. Only fix broken connections.\n" + "- Do NOT create main.py \u2014 it will be created in a later task.\n" + "- Do NOT edit test files at this stage.\n" + ) + elif task_type == "final_test_docs": + prompt = ( + "You are working on End-to-End Integration Fixes.\n" + "Your end-to-end tests (from the previous step) may have revealed integration \n" + "bugs in the production code. Your responsibility is to fix those bugs.\n\n" + "Task description:\n" + f"{task}\n\n" + f"Target: fix integration issues found by tests for [{batch_units}].\n\n" + "Common issues to look for and fix:\n" + "- Route handlers returning placeholder strings instead of calling real handler functions\n" + "- CSS class names in style modules not matching class names used in HTML-generating modules\n" + "- Missing imports or function calls between modules that should be connected\n" + "- Data format mismatches at module boundaries\n\n" + "Guidelines:\n" + "- Fix only what is needed to make end-to-end tests pass.\n" + "- Read the actual source files to understand current implementation before changing.\n" + "- Do NOT refactor working code. Only fix broken connections.\n" + "- Do NOT create main.py \u2014 it will be created in the next task.\n" + "- Do NOT edit test files at this stage.\n" + ) + else: + # Fallback + prompt = ( + "You are working in an incremental development workflow.\n" + "Tests may already exist or may be added later.\n" + "Your responsibility in this step is to implement or refine production code only.\n" + "Do NOT modify test files or environment/dependency configuration here.\n\n" + "Task description:\n" + f"{task}\n\n" + f"Target: implement or refine units [{batch_units}] in {file_path}.\n\n" + "Guidelines:\n" + "- Implement behavior consistent with the task description and any existing tests.\n" + "- Work incrementally: it is fine if not all tests pass yet, as long as your code moves toward correctness.\n" + "- Prefer small, focused, maintainable changes.\n" + "- Follow repository architecture, conventions, and abstractions.\n" + "- Reuse helpers/utilities where possible; introduce small helpers only when justified.\n" + "- Do NOT edit or create test files at this stage.\n" + ) + + # For implementation tasks, remind agent about the skeleton file + if task_type == "implementation" and file_path: + prompt += ( + "\n**Skeleton file:** The target file `" + file_path + "` may already contain " + "interface definitions (signatures, docstrings) written during the design stage. " + "Use them as your implementation starting point and fill in the function bodies.\n" + ) + + # NOTE: dependency_context is NOT appended here โ€” provided once in TDD_BATCH_PREAMBLE. + return prompt + + +def build_test_prompt_from_batch( + batch: "PlannedTask", + dependency_context: Optional[Dict[str, Any]] = None, +) -> str: + """Build test generation prompt from a PlannedTask object.""" + return init_test_gen_prompt( + task=batch.task, + batch_units=", ".join(batch.units_key), + file_path=batch.file_path, + task_type=batch.task_type, + dependency_context=dependency_context, + ) + + +def build_code_prompt_from_batch( + batch: "PlannedTask", + dependency_context: Optional[Dict[str, Any]] = None, +) -> str: + """Build code generation prompt from a PlannedTask object.""" + return init_code_gen_prompt( + task=batch.task, + batch_units=", ".join(batch.units_key), + file_path=batch.file_path, + task_type=batch.task_type, + dependency_context=dependency_context, + ) + + +# ============================================================================ +# Merged File-Level Prompts +# ============================================================================ + +def _format_merged_phases(batches: list) -> str: + """Format multiple batch tasks into numbered phases for merged prompts.""" + phases = [] + for i, batch in enumerate(batches, 1): + units_str = ", ".join(batch.units_key) + phases.append( + f"### Phase {i}: [{units_str}]\n" + f"{batch.task}" + ) + return "\n\n".join(phases) + + +def build_merged_test_prompt( + batches: list, + dependency_context: Optional[Dict[str, Any]] = None, +) -> str: + """Build a combined test generation prompt for multiple batches from the same file. + + Used in file-level merge mode when several tasks targeting the same file + are implemented together. + + Args: + batches: List of PlannedTask objects (all sharing the same file_path) + dependency_context: Dependency context from earlier design stages + """ + if len(batches) == 1: + return build_test_prompt_from_batch(batches[0], dependency_context=dependency_context) + + file_path = batches[0].file_path + all_units = [] + for b in batches: + all_units.extend(b.units_key) + all_units_str = ", ".join(all_units) + + phases_text = _format_merged_phases(batches) + + prompt = ( + "You are working in a Test-Driven Development (TDD) workflow.\n" + "In this step your responsibility is ONLY to write or update tests.\n" + "Do NOT modify production/source code and do NOT touch environment or dependency files.\n\n" + + f"**File-level batch:** You are implementing `{file_path}` โ€” " + f"covering the following units in one pass.\n" + f"**Units in this batch:** [{all_units_str}]\n\n" + + "The implementation is organized into ordered phases (by dependency).\n" + "Write tests that cover ALL phases below.\n\n" + + f"{phases_text}\n\n" + + "Requirements:\n" + "- Use the repository's existing test layout and conventions.\n" + "- Follow the same testing framework, helpers, fixtures, and style already used.\n" + "- Cover at minimum: normal behavior, key edge cases, and meaningful failure cases.\n" + "- Keep tests deterministic, readable, and maintainable.\n" + "- Organize tests logically โ€” you may group by phase or by functional area.\n" + "- If the expected behavior is unclear, encode the most reasonable interpretation\n" + " and add comments explaining your assumptions.\n" + "- **CRITICAL: Only test classes, methods, and functions that exist in the skeleton\n" + " file below (if provided). Do NOT invent or assume additional methods, features,\n" + " or APIs beyond what is defined in the skeleton.**\n" + ) + + # Point agent to skeleton file (read on demand, not inlined) + if file_path: + prompt += ( + f"\nThe skeleton file `{file_path}` contains interface definitions. " + "**Read this file** for exact API signatures.\n" + ) + # NOTE: dependency_context is NOT appended here โ€” provided once in TDD_BATCH_PREAMBLE. + return prompt + + +def build_merged_code_prompt( + batches: list, + dependency_context: Optional[Dict[str, Any]] = None, +) -> str: + """Build a combined code generation prompt for multiple batches from the same file. + + Used in file-level merge mode when several tasks targeting the same file + are implemented together. + + Args: + batches: List of PlannedTask objects (all sharing the same file_path) + dependency_context: Dependency context from earlier design stages + """ + if len(batches) == 1: + return build_code_prompt_from_batch(batches[0], dependency_context=dependency_context) + + file_path = batches[0].file_path + all_units = [] + for b in batches: + all_units.extend(b.units_key) + all_units_str = ", ".join(all_units) + + phases_text = _format_merged_phases(batches) + + prompt = ( + "You are working in an incremental development workflow.\n" + "Tests may already exist or may be added later.\n" + "Your responsibility in this step is to implement or refine production code only.\n" + "Do NOT modify test files or environment/dependency configuration here.\n\n" + + f"**File-level batch:** You are implementing `{file_path}` โ€” " + f"covering the following units in one pass.\n" + f"**Units in this batch:** [{all_units_str}]\n\n" + + "The phases below are ordered by dependency โ€” implement them in order.\n" + "Earlier phases provide foundations that later phases depend on.\n\n" + + f"{phases_text}\n\n" + + "Guidelines:\n" + "- Implement ALL phases listed above in the specified order.\n" + "- Implement behavior consistent with the task descriptions and any existing tests.\n" + "- Work incrementally within the file: foundational helpers first, then higher-level logic.\n" + "- Prefer small, focused, maintainable implementations.\n" + "- Follow repository architecture, conventions, and abstractions.\n" + "- Reuse helpers/utilities where possible; introduce small helpers only when justified.\n" + "- Do NOT edit or create test files at this stage.\n" + "- Ensure all necessary imports are present at the top of the file.\n" + "- Before adding new imports, search the repository for existing usage of similar helpers\n" + " and prefer the same modules and import style (to stay consistent with the codebase).\n" + ) + + # Remind about skeleton file + if file_path: + prompt += ( + f"\n**Skeleton file:** The target file `{file_path}` may already contain " + "interface definitions (signatures, docstrings) written during the design stage. " + "Use them as your implementation starting point and fill in the function bodies.\n" + ) + + # NOTE: dependency_context is NOT appended here โ€” provided once in TDD_BATCH_PREAMBLE. + return prompt + + +# ============================================================================ +# Project File Prompts +# ============================================================================ + +def init_project_file_gen_prompt( + task: str, + batch_units: str, + file_path: str, + dependency_context: Optional[Dict[str, Any]] = None, + **kwargs +) -> str: + """Generate prompt for project file generation. + + This is used after all core implementation is complete. + Project files include: requirements.txt, README.md, main.py, etc. + + Args: + task: Task description with detailed instructions + batch_units: Comma-separated list of unit keys + file_path: Target file marker (e.g., , ) + dependency_context: Dependency context from earlier design stages + """ + prompt = ( + "You are working on Project Finalization.\n" + "Your responsibility is to create project files that complete the repository.\n\n" + + "**Context:**\n" + "All core implementation code has been completed.\n" + "You now need to create the requested file(s) to make the repository complete and usable.\n\n" + + "**Important Guidelines:**\n" + "1. Analyze the ACTUAL implemented code to generate accurate content\n" + "2. Do NOT guess or assume - reference real module names, functions, and classes\n" + "3. Ensure all examples and documentation are consistent with the codebase\n" + "4. Follow standard conventions for each file type\n" + "5. Read existing files in the repository to understand the structure\n\n" + + f"**Target files:** {batch_units}\n\n" + + f"**Task description:**\n{task}\n\n" + + "**Process:**\n" + "1. First, explore the repository structure to understand what has been implemented\n" + "2. Read key source files to understand imports, functions, and classes\n" + "3. Generate the requested files based on your analysis\n" + "4. Ensure all references are accurate and all examples are runnable\n" + ) + + prompt += _format_dependency_context(dependency_context) + return prompt + + +def build_project_file_prompt_from_batch( + batch: "PlannedTask", + dependency_context: Optional[Dict[str, Any]] = None, +) -> str: + """Build project file generation prompt from a PlannedTask object.""" + return init_project_file_gen_prompt( + task=batch.task, + batch_units=", ".join(batch.units_key), + file_path=batch.file_path, + dependency_context=dependency_context, + ) + + +def is_project_file_batch(batch: "PlannedTask") -> bool: + """Check if a batch is for project file generation (requirements, docs, main entry).""" + return batch.task_type in [ + "project_requirements", # requirements.txt (needs import test) + "project_docs", # README.md (no tests) + "main_entry", # main.py (needs run test) + ] + + +def is_project_docs_batch(batch: "PlannedTask") -> bool: + """Check if a batch is for documentation files (no tests needed).""" + return batch.task_type == "project_docs" + + +def needs_project_file_test(batch: "PlannedTask") -> bool: + """Check if a project file batch needs testing.""" + return batch.task_type in [ + "project_requirements", # import validation + "main_entry", # run test + ] + + +# ============================================================================ +# Iterative Prompts (After Failure) +# ============================================================================ + +def test_fix_prompt( + test_result: str, + task: str, + **kwargs +) -> str: + """Generate iterative test regeneration prompt based on failing tests. + + Used when failure_type == TEST_ERROR. + """ + prompt = ( + "You are now in the TEST FIX phase.\n" + "Your responsibility is to correct and improve the TEST CODE only.\n" + "Assume production code is mostly correct for now.\n\n" + "Task context:\n" + f"{task}\n\n" + "Test failures:\n" + f"{test_result}\n\n" + "Your job:\n" + "- Analyze why the tests fail.\n" + "- Fix assertions, setups, fixtures, imports, or test logic when they are incorrect.\n" + "- Ensure the tests describe intended behavior clearly and consistently.\n\n" + "Rules:\n" + "- Modify ONLY test-related files.\n" + "- Do NOT change production code or environment configuration.\n" + "- Keep tests deterministic and meaningful.\n" + ) + return prompt + + +def code_fix_prompt( + test_result: str, + task: str, + **kwargs +) -> str: + """Generate iterative code regeneration prompt based on failing tests. + + Used when failure_type == CODE_ERROR. + """ + prompt = ( + "You are now in the CODE FIX phase.\n" + "Your responsibility is to fix bugs in production code.\n" + "Do NOT modify tests or environment configuration here.\n\n" + "Task context:\n" + f"{task}\n\n" + "Test output:\n" + f"{test_result}\n\n" + "Your job:\n" + "- Understand what behavior the failing test expects.\n" + "- Identify the real root cause in the code.\n" + "- Apply a minimal, targeted fix aligned with existing architecture.\n\n" + "Rules:\n" + "- Modify ONLY production code modules.\n" + "- Prefer the smallest correct change.\n" + "- Avoid unnecessary refactors or public API breaks unless unavoidable.\n" + ) + return prompt + + +def env_fix_prompt( + test_result: str, + task: str, + **kwargs +) -> str: + """Generate environment setup prompt based on environment-related failures. + + Used when failure_type == ENV_ERROR. + """ + prompt = ( + "You are now in the ENVIRONMENT FIX phase.\n" + "The test failure is caused by importing a third-party package that is not installed.\n\n" + "Context:\n" + f"{task}\n\n" + "Evidence of environment failure:\n" + f"{test_result}\n\n" + "Guidelines:\n" + "- Fix by REMOVING the unused import if the imported name is not actually used in the code,\n" + " OR by replacing the third-party functionality with Python standard library equivalents.\n" + "- Search the source file for actual usage of the imported name before deciding.\n" + "- Do NOT attempt to install packages or modify requirements.txt.\n" + "- Do NOT modify test files.\n" + "- Prefer minimal, targeted changes.\n" + "- Logical test failures may remain โ€” that is acceptable.\n" + ) + return prompt + + +# ============================================================================ +# Failure Analysis Prompt +# ============================================================================ + +FAILURE_ANALYSIS_PROMPT = """ +You are a test failure analysis expert. + +Analyze the following test failure. You have FULL access to the test file, +source file, and test output. Your job is to determine the root cause and +produce a concrete fix plan. + +## Test Execution Output: +{test_output} + +## Source File ({source_file}): +```python +{source_content} +``` + +## Test File ({test_file}): +```python +{test_content} +``` + +## Task Context: +{task_context} + +## Failure History (previous iterations): +{failure_history} + +--- + +### Step 1: Classify the error + +Choose ONE of these categories: +- ENV_ERROR : Environment issue (missing package, import of uninstalled third-party lib) +- TEST_ERROR : Only the test code is wrong (wrong assertion, bad fixture, wrong API usage in test) +- CODE_ERROR : Only the production code is wrong (logic bug, missing method, wrong return value) +- BOTH_ERROR : Both test AND code have issues that need fixing together + +### Step 2: Determine fix_target + +Based on the category, choose the fix path: +- "test" : Only test code needs changes (for TEST_ERROR) +- "code" : Only production code needs changes (for CODE_ERROR) +- "code_then_test" : Fix code first, then fix tests (for BOTH_ERROR) +- "env" : Fix environment/import issue in code (for ENV_ERROR) + +### Step 3: Write a concrete fix plan + +For EACH side that needs fixing, describe the SPECIFIC changes needed. +Reference exact function names, line numbers, variable names. +Do NOT write vague instructions like "fix the bug" or "update the test". + +### Output Format + +Return a JSON object with these fields: + +```json +{{ + "category": "CODE_ERROR | TEST_ERROR | ENV_ERROR | BOTH_ERROR", + "fix_target": "code | test | code_then_test | env", + "root_cause": "One sentence: the precise technical reason for the failure", + "fix_plan": {{ + "code_changes": "Specific changes to make in the source file (or null if not needed)", + "test_changes": "Specific changes to make in the test file (or null if not needed)" + }}, + "reasoning": "2-3 sentences explaining why you chose this category and fix_target" +}} +``` + +### Decision Guidelines + +- If the test expects behavior X but the code does Y, ask: **which one matches the + task description / spec?** The one that matches the spec is correct; fix the other. +- If BOTH the test assertion AND the code logic are wrong relative to the spec, + choose BOTH_ERROR with fix_target "code_then_test". +- If the test uses an API that doesn't exist in the source, check: was the API + supposed to exist (per the spec)? If yes โ†’ CODE_ERROR. If no โ†’ TEST_ERROR. +- Prefer CODE_ERROR over TEST_ERROR when the spec is ambiguous โ€” tests represent + the intended behavior. +- **Mock/patch bugs**: When tests use `@patch`, `MagicMock`, or `side_effect`, + carefully trace whether the mock setup matches the ACTUAL call sequence in the + production code. Common test bugs include: + - `side_effect` list has wrong number of values (too many or too few for the + actual number of calls the patched function receives) + - Mock return values don't account for internal helper calls that also invoke + the patched function + - If the production code logic is clearly correct but the assertion fails, + check whether the mock values fed to the code actually produce the expected + result โ€” the mock setup itself may be wrong โ†’ TEST_ERROR +- **ENV_ERROR sub-types**: ENV_ERROR covers three distinct situations. + Your `fix_plan.code_changes` or `fix_plan.test_changes` MUST specify the + exact fix โ€” do NOT give vague instructions like "fix the import". + - **Missing import in source**: `NameError: name 'Enum' is not defined` means + the source file uses a name without importing it. Fix: add the correct + import statement (e.g. `from enum import Enum`). fix_target = "code". + - **Wrong import path**: `ModuleNotFoundError: No module named 'vibeanim'` + in a project that uses `src.vibeanim.*` means the import path is wrong. + Fix: change `from vibeanim.x` to `from src.vibeanim.x`. If the error is + in a test file, fix_target = "test"; if in source, fix_target = "code". + - **Missing third-party package**: `ModuleNotFoundError` for a non-project + module means a package is not installed. Fix: remove the import or replace + with stdlib equivalents. fix_target = "code". + - Do NOT classify logic errors (AssertionError, TypeError, ValueError) as + ENV_ERROR โ€” those are CODE_ERROR or TEST_ERROR. +- Look at failure_history: if previous iterations alternated between TEST_ERROR + and CODE_ERROR, this strongly suggests BOTH_ERROR. +- **Persistent same-error pattern**: If failure_history shows 2+ consecutive + CODE_ERROR iterations with the same test still failing, seriously consider + whether the TEST is actually wrong (mock setup, wrong expected value, etc.). + Repeated code fixes that don't resolve the issue are a strong signal that + the root cause is in the test, not the code. + +### Examples + +Example 1 (CODE_ERROR): +{{ + "category": "CODE_ERROR", + "fix_target": "code", + "root_cause": "loop(None) sets _loop_count=None which is the same as the default, so build() cannot distinguish 'never called' from 'infinite loop'", + "fix_plan": {{ + "code_changes": "Add a `_loop_enabled: bool = False` flag to EvolutionSequenceBuilder.__init__. Set it to True in loop(). Use `loop=self._loop_enabled` in build() instead of `loop=(self._loop_count is not None)`. Reset it in clear().", + "test_changes": null + }}, + "reasoning": "The test correctly expects loop(None) to produce loop=True per the spec. The code has a sentinel value collision โ€” _loop_count defaults to None and loop(None) also sets it to None." +}} + +Example 2 (BOTH_ERROR): +{{ + "category": "BOTH_ERROR", + "fix_target": "code_then_test", + "root_cause": "The code returns a list instead of a tuple, AND the test compares against a hardcoded wrong expected value", + "fix_plan": {{ + "code_changes": "In transform(), change `return [x, y, z]` to `return (x, y, z)` to match the documented return type", + "test_changes": "In test_transform_origin(), change expected value from (0, 0, 1) to (0, 0, 0) which is the correct origin transform" + }}, + "reasoning": "The code has a type error (list vs tuple) and the test has a wrong expected value. Both need fixing. Previous iterations alternated between TEST_ERROR and CODE_ERROR, confirming both sides have issues." +}} + +Example 3 (TEST_ERROR): +{{ + "category": "TEST_ERROR", + "fix_target": "test", + "root_cause": "Test calls entity.get_position() but the API is entity.position (a property, not a method)", + "fix_plan": {{ + "code_changes": null, + "test_changes": "Replace all calls to `entity.get_position()` with `entity.position` in test_entity_movement.py (lines 45, 67, 89)" + }}, + "reasoning": "The source code correctly implements position as a property per the skeleton. The test was generated with a wrong API assumption." +}} +""" + + +def build_failure_analysis_prompt( + test_output: str, + task_context: str, + source_file: str = "", + source_content: str = "", + test_file: str = "", + test_content: str = "", + failure_history: str = "", + max_output_length: int = 3000, + # Legacy params (kept for backward compat, ignored) + test_patch: str = "", + code_patch: str = "", +) -> str: + """Build the failure analysis prompt with full file context. + + Args: + test_output: Output from test execution + task_context: Context about the current task + source_file: Path to the source file + source_content: Full content of the source file + test_file: Path to the test file + test_content: Full content of the test file + failure_history: Formatted string of previous failure types + max_output_length: Maximum length of test output to include + """ + # Truncate test output if too long + if len(test_output) > max_output_length: + test_output = test_output[:max_output_length] + "\n\n... (truncated)" + + # Truncate file contents if too long (keep enough for analysis) + max_file = 8000 + if len(source_content) > max_file: + source_content = source_content[:max_file] + "\n# ... (truncated)" + if len(test_content) > max_file: + test_content = test_content[:max_file] + "\n# ... (truncated)" + + return FAILURE_ANALYSIS_PROMPT.format( + test_output=test_output, + task_context=task_context, + source_file=source_file or "(unknown)", + source_content=source_content or "(not available)", + test_file=test_file or "(unknown)", + test_content=test_content or "(not available)", + failure_history=failure_history or "(first iteration)", + ) + + +# ============================================================================ +# Commit Message Prompt +# ============================================================================ + +COMMIT_MESSAGE_PROMPT = """You are an assistant that writes concise, conventional git commit messages. + +You will be given: +- Workflow type (one of: TEST_DEVELOPMENT, TEST_FIX, CODE_INCREMENTAL, CODE_BUG_FIX, ENV_SETUP, or legacy types) +- A unified diff patch +- Patch statistics (lines changed, files changed) +- File path +- Units (logical units or sections implemented) +- A task description + +Your job: +1. Decide a good conventional-commit style SUBJECT line based on workflow type: + - TEST_DEVELOPMENT: Use "test:" prefix for new test creation + - TEST_FIX: Use "test:" or "fix:" prefix for fixing broken tests + - CODE_INCREMENTAL: Use "feat:" prefix for new feature implementation + - CODE_BUG_FIX: Use "fix:" prefix for bug fixes and corrections + - ENV_SETUP: Use "chore:" prefix for environment/dependency setup + - Keep it concise (ideally <= 60 characters). + - No trailing period. +2. Optionally produce a BODY with several lines (each line just plain text, no bullets). + - You can include information about units, file, lines changed, and task summary. + - Each item should be a separate string in an array (e.g., ["Units: ...", "File: ..."]). + +Return ONLY a JSON object with: +- "subject": string +- "body": either a string or an array of strings. If you don't want a body, you can return an empty string or an empty array. + +Examples: +{{ + "subject": "feat: implement user authentication system", + "body": [ + "Units: login_handler, auth_validator", + "File: src/auth/auth.py", + "Changed: 67 lines in 1 file", + "Task: implement OAuth2 login flow for new users" + ] +}} + +{{ + "subject": "fix: resolve memory leak in data processor", + "body": [ + "Units: process_batch, cleanup_resources", + "File: src/processing/processor.py", + "Changed: 23 lines in 1 file", + "Task: fix memory allocation issues in batch processing" + ] +}} + +{{ + "subject": "test: add unit tests for payment module", + "body": "" +}} + +Now generate a commit message for the following context: + +Workflow type: {workflow_type} +Lines changed: {lines_changed} +Files changed: {files_changed} +File path: {file_path} +Units: {units} +Task description: {task_desc} + +Unified diff: +{patch_content} +""" + + +def build_commit_message_prompt( + workflow_type: str, + file_path: str, + units: str, + task_desc: str, + patch_content: str = "", + lines_changed: int = 0, + files_changed: int = 0 +) -> str: + """Build a prompt for generating commit messages.""" + # Truncate patch content if too long + if len(patch_content) > 2000: + patch_content = patch_content[:2000] + "\n... (truncated)" + + return COMMIT_MESSAGE_PROMPT.format( + workflow_type=workflow_type, + lines_changed=lines_changed, + files_changed=files_changed, + file_path=file_path, + units=units, + task_desc=task_desc, + patch_content=patch_content or "(no patch provided)" + ) + + +def generate_simple_commit_message( + workflow_type: str, + file_path: str, + units: str, + task: str +) -> str: + """Generate a simple commit message without LLM. + + Used as fallback when LLM is not available. + """ + prefixes = { + "test_development": "test", + "test_fix": "fix(test)", + "code_incremental": "feat", + "code_bug_fix": "fix", + "env_setup": "chore", + } + + prefix = prefixes.get(workflow_type.lower(), "chore") + + # Extract filename from path + filename = file_path.split("/")[-1] if "/" in file_path else file_path + + # Truncate task description + short_task = task[:50] + "..." if len(task) > 50 else task + + return f"{prefix}: {filename} - {short_task}" diff --git a/RPG-Kit/scripts/code_gen/result_builders.py b/RPG-Kit/scripts/code_gen/result_builders.py new file mode 100644 index 0000000..bb19025 --- /dev/null +++ b/RPG-Kit/scripts/code_gen/result_builders.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python3 +"""Result-dict builders for codegen batch outcomes. + +These helpers shape the JSON payloads that ``scripts.run_batch`` returns +to the slash-command driver (via ``--json`` output) so the AI agent can +read ``next_action`` and decide which command to run next. + +Extracted from ``scripts/run_batch.py`` Module 7 ("Result Builders"). +All four functions are internal helpers used only by Module 5's batch +orchestrator; no external API contract. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any, Dict, List + +from common.execution_state import BatchExecutionState, CodeGenState, load_code_gen_state +from common.task_batch import PlannedTask, load_tasks_from_tasks_json + + +def _error(message: str, scripts: str) -> Dict[str, Any]: + """Build an error result dict.""" + return { + "success": False, + "error": message, + "next_action": f"Fix the issue, then run: python3 {scripts}/run_batch.py --next --json", + } + + +def _all_done(global_state: CodeGenState, tasks_path: Path, scripts: str) -> Dict[str, Any]: + """Build a result dict when all tasks are processed.""" + all_tasks = load_tasks_from_tasks_json(tasks_path) + total = len(all_tasks) + completed = len(global_state.completed_task_ids) + failed = len(global_state.failed_task_ids) + + if failed > 0: + msg = f"All batches processed: {completed} completed, {failed} failed out of {total}." + next_act = ( + f"Some batches failed. You can retry them with: " + f"python3 {scripts}/run_batch.py --retry --json, " + f"or run final validation: python3 {scripts}/run_batch.py --final-test --json" + ) + else: + msg = f"All {completed} batches completed successfully!" + next_act = f"Run final validation: python3 {scripts}/run_batch.py --final-test --json" + + return { + "success": True, + "type": "complete", + "message": msg, + "stats": { + "total": total, + "completed": completed, + "failed": failed, + "success_rate": round(completed / total * 100, 1) if total > 0 else 0, + }, + "next_action": next_act, + } + + +def _success_result( + batch_id: str, + task: PlannedTask, + batch_state: BatchExecutionState, + attempts: List[Dict], + total_duration: float, + branch_merged: bool, + scripts: str, + tasks_path: Path, + state_path: Path, +) -> Dict[str, Any]: + """Build result dict for a successful batch.""" + global_state = load_code_gen_state(state_path) + all_tasks = load_tasks_from_tasks_json(tasks_path) + completed = len(global_state.completed_task_ids) + failed = len(global_state.failed_task_ids) + total = len(all_tasks) + remaining = total - completed - failed + + merged_ids = batch_state.merged_task_ids or [] + return { + "success": True, + "type": "batch_complete", + "batch_id": batch_id, + "file_path": task.file_path, + "task_type": task.task_type, + "attempts_used": len(attempts), + "total_duration": round(total_duration, 1), + "branch_merged": branch_merged, + "merged_mode": len(merged_ids) > 1, + "merged_task_count": len(merged_ids) if len(merged_ids) > 1 else 1, + "stats": { + "total": total, + "completed": completed, + "failed": failed, + "remaining": remaining, + "success_rate": round(completed / total * 100, 1) if total > 0 else 0, + }, + "next_action": ( + f"Batch completed. {remaining} tasks remaining. " + f"Run: python3 {scripts}/run_batch.py --next --json" + if remaining > 0 else + f"All batches done! Run: python3 {scripts}/run_batch.py --final-test --json\n" + f"Then run: python3 {scripts}/run_batch.py --global-review --json" + ), + } + + +def _failure_result( + batch_id: str, + task: PlannedTask, + batch_state: BatchExecutionState, + attempts: List[Dict], + total_duration: float, + scripts: str, + tasks_path: Path, + state_path: Path, +) -> Dict[str, Any]: + """Build result dict for a failed batch.""" + global_state = load_code_gen_state(state_path) + all_tasks = load_tasks_from_tasks_json(tasks_path) + completed = len(global_state.completed_task_ids) + failed = len(global_state.failed_task_ids) + total = len(all_tasks) + remaining = total - completed - failed + + last_attempt = attempts[-1] if attempts else {} + return { + "success": False, + "type": "batch_failed", + "batch_id": batch_id, + "file_path": task.file_path, + "task_type": task.task_type, + "attempts_used": len(attempts), + "total_duration": round(total_duration, 1), + "failure_reason": last_attempt.get("failure_reason", "Unknown"), + "branch_preserved": batch_state.branch_name, + "stats": { + "total": total, + "completed": completed, + "failed": failed, + "remaining": remaining, + }, + "next_action": ( + f"Batch failed after {len(attempts)} attempts. " + f"Branch '{batch_state.branch_name}' preserved for inspection. " + f"Retry: python3 {scripts}/run_batch.py --retry {batch_id} --json, " + f"or continue: python3 {scripts}/run_batch.py --next --json" + ), + } diff --git a/RPG-Kit/scripts/code_gen/rpg_updater.py b/RPG-Kit/scripts/code_gen/rpg_updater.py new file mode 100644 index 0000000..e4ff69e --- /dev/null +++ b/RPG-Kit/scripts/code_gen/rpg_updater.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python3 +"""RPG Updater - Update repo_rpg.json after code generation. + +This module is responsible for updating the repo_rpg.json file after +code generation is complete. It: + +1. Analyzes the generated code to extract actual dependencies +2. Checks node consistency (warns on mismatches) +3. Updates feature node metadata (marks implementation status) +4. Updates edges (INHERITS, INVOKES, REFERENCES) based on actual code + +Node lookup strategy: + RPG nodes are feature-level (e.g., "Parse CSV input"), NOT code-level. + Code-level names (class/function) are stored in ``node.meta.path`` + (format: ``"src/file.py::class ClassName"``). All lookups match + against ``meta.path`` rather than ``node.name``. + +Usage: + Called automatically by ``run_batch.py`` after a batch completes successfully. +""" + +import ast +import logging +import shutil +from datetime import datetime +from pathlib import Path +from typing import Dict, List, Any, Optional, Tuple, Set + +# Import RPG models +from rpg import ( + RPG, Node, EdgeType, NodeMetaData, NodeType, +) +from rpg.code_unit import ParsedFile + +# Import task batch +from common.task_batch import PlannedTask + +# Constants +GENERATOR_NAME = "code_gen" +BACKUP_SUFFIX = ".backup" + + +# ============================================================================ +# Dependency Extraction (similar to interface_agent.py) +# ============================================================================ + +def extract_name_from_node(node: ast.AST) -> Optional[str]: + """Extract a name string from an AST node.""" + if isinstance(node, ast.Name): + return node.id + elif isinstance(node, ast.Attribute): + # e.g., module.ClassName -> return "ClassName" + return node.attr + elif isinstance(node, ast.Subscript): + # e.g., List[int] -> return "List" + return extract_name_from_node(node.value) + return None + + +def extract_type_names(annotation: ast.AST) -> List[str]: + """Extract all type names from a type annotation.""" + names = [] + if isinstance(annotation, ast.Name): + names.append(annotation.id) + elif isinstance(annotation, ast.Attribute): + names.append(annotation.attr) + elif isinstance(annotation, ast.Subscript): + # Handle generic types like List[int], Dict[str, Any] + names.extend(extract_type_names(annotation.value)) + if isinstance(annotation.slice, ast.Tuple): + for elt in annotation.slice.elts: + names.extend(extract_type_names(elt)) + else: + names.extend(extract_type_names(annotation.slice)) + elif isinstance(annotation, ast.BinOp): + # Handle Union types with | operator + names.extend(extract_type_names(annotation.left)) + names.extend(extract_type_names(annotation.right)) + return names + + +def extract_function_calls(node: ast.AST) -> List[str]: + """Extract function/method call names from an AST node.""" + calls = [] + for child in ast.walk(node): + if isinstance(child, ast.Call): + func_name = extract_name_from_node(child.func) + if func_name: + calls.append(func_name) + return calls + + +class CodeDependencyAnalyzer: + """Analyzes Python code to extract dependencies. + + Extracts: + - Inheritance relationships (class X(BaseClass)) + - Function/method invocations + - Type references in annotations + """ + + def __init__(self, known_units: Set[str] = None): + """Args: known_units: Set of known unit names (classes, functions) in the repo. + + Only dependencies to these units will be recorded. + """ + self.known_units = known_units or set() + self.inheritance_edges: List[Dict[str, Any]] = [] + self.invocation_edges: List[Dict[str, Any]] = [] + self.reference_edges: List[Dict[str, Any]] = [] + + def analyze_file(self, file_path: Path, code: str) -> None: + """Analyze a Python file for dependencies. + + Args: + file_path: Path to the file + code: Source code content + """ + try: + tree = ast.parse(code) + except SyntaxError as e: + logging.warning(f"SyntaxError parsing {file_path}: {e}") + return + + file_path_str = str(file_path) + + for node in ast.walk(tree): + # Extract inheritance + if isinstance(node, ast.ClassDef): + self._analyze_class(node, file_path_str) + + # Extract function-level dependencies + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): + self._analyze_function(node, file_path_str) + + def _analyze_class(self, cls_node: ast.ClassDef, file_path: str) -> None: + """Analyze a class definition for inheritance.""" + class_name = cls_node.name + + for base in cls_node.bases: + parent_name = extract_name_from_node(base) + if parent_name and self._is_known_unit(parent_name): + self.inheritance_edges.append({ + "child": class_name, + "parent": parent_name, + "source_file": file_path, + "edge_type": EdgeType.INHERITS, + "generator": GENERATOR_NAME + }) + + # Analyze methods within the class + for item in cls_node.body: + if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)): + self._analyze_function( + item, file_path, + parent_class=class_name + ) + + def _analyze_function( + self, + func_node: ast.FunctionDef, + file_path: str, + parent_class: Optional[str] = None + ) -> None: + """Analyze a function/method for dependencies.""" + if parent_class: + unit_name = f"{parent_class}::{func_node.name}" + else: + unit_name = func_node.name + + # Extract type references from annotations + for arg in func_node.args.args: + if arg.annotation: + for type_name in extract_type_names(arg.annotation): + if self._is_known_unit(type_name): + self.reference_edges.append({ + "unit": unit_name, + "referenced_type": type_name, + "source_file": file_path, + "edge_type": EdgeType.REFERENCES, + "generator": GENERATOR_NAME + }) + + # Check return type + if func_node.returns: + for type_name in extract_type_names(func_node.returns): + if self._is_known_unit(type_name): + self.reference_edges.append({ + "unit": unit_name, + "referenced_type": type_name, + "source_file": file_path, + "edge_type": EdgeType.REFERENCES, + "generator": GENERATOR_NAME + }) + + # Extract function calls + for call_name in extract_function_calls(func_node): + if self._is_known_unit(call_name) and call_name != func_node.name: + self.invocation_edges.append({ + "caller": unit_name, + "callee": call_name, + "source_file": file_path, + "edge_type": EdgeType.INVOKES, + "generator": GENERATOR_NAME + }) + + def _is_known_unit(self, name: str) -> bool: + """Check if a name is a known unit in the repo.""" + if not self.known_units: + return True # If no filter, accept all + return name in self.known_units + + def get_all_edges(self) -> Dict[str, List[Dict]]: + """Get all extracted edges.""" + return { + "inheritance": self.inheritance_edges, + "invocation": self.invocation_edges, + "reference": self.reference_edges + } + + +# ============================================================================ +# Node Consistency Checker +# ============================================================================ + +class NodeConsistencyChecker: + """Checks consistency between generated code and repo_rpg nodes. + + Reports warnings when: + - Code defines units not matching expected node names + - Unit types don't match (class vs function) + - Implementation paths don't match node.meta.path + """ + + def __init__(self, rpg: RPG): + self.rpg = rpg + self.warnings: List[str] = [] + + def check_batch_consistency( + self, + batch: PlannedTask, + parsed_file: ParsedFile, + repo_path: Path + ) -> List[str]: + """Check consistency for a completed batch. + + Args: + batch: The completed PlannedTask + parsed_file: Parsed representation of the generated file + repo_path: Repository root path + + Returns: + List of warning messages + """ + self.warnings = [] + + # Get expected units from batch + expected_units = set(batch.units_key) + + # Get actual units from parsed file + actual_units = {} + for unit in parsed_file.units: + if unit.unit_type in ("class", "function", "method"): + key = unit.name + if unit.parent: + key = f"{unit.parent}::{unit.name}" + actual_units[key] = unit + + # Check for expected units not found in code + for expected in expected_units: + # Normalize expected name (remove "class " or "function " prefix) + normalized = expected + if expected.startswith("class "): + normalized = expected[6:] + elif expected.startswith("function "): + normalized = expected[9:] + + found = False + for actual_key in actual_units: + if normalized in actual_key or actual_key in normalized: + found = True + break + + if not found: + self.warnings.append( + f"[WARNING] Expected unit '{expected}' not found in generated code for {batch.file_path}" + ) + + # Check node consistency in RPG + for unit_key in batch.units_key: + features = batch.unit_to_features.get(unit_key, []) + for feature_path in features: + node = self._find_feature_node(feature_path) + if node: + self._check_node_path_consistency(node, batch.file_path, unit_key) + + return self.warnings + + def _find_feature_node(self, feature_path: str) -> Optional[Node]: + """Find a feature node by name or path.""" + # Try direct lookup by name + feature_name = feature_path.split("/")[-1] if "/" in feature_path else feature_path + + for node in self.rpg.nodes.values(): + if node.name == feature_name: + return node + if node.node_type == "feature" and node.feature_path() == feature_path: + return node + return None + + def _check_node_path_consistency( + self, + node: Node, + file_path: str, + unit_key: str + ) -> None: + """Check if node's meta.path is consistent with implementation.""" + if not node.meta or not node.meta.path: + return + + expected_path = f"{file_path}::{unit_key}" + actual_path = node.meta.path + + # Normalize paths for comparison + if "::" in actual_path: + # Compare file parts + actual_file = actual_path.split("::")[0] + expected_file = file_path + + if actual_file != expected_file: + self.warnings.append( + f"[WARNING] Node '{node.name}' has path '{actual_path}' but " + f"code is in '{file_path}'" + ) + + +# ============================================================================ +# Edge Updater +# ============================================================================ + +class EdgeUpdater: + """Updates edges in repo_rpg based on analyzed code dependencies. + + Handles: + - Adding new edges discovered in code + - Removing stale edges (generated by code_gen but no longer in code) + - Preserving edges from other generators + + Uses RPGService for all edge operations (dedup, generator tagging). + """ + + def __init__(self, rpg: RPG): + self.rpg = rpg + self._svc = None + self.added_count: int = 0 + self.removed_count: int = 0 + + @property + def svc(self): + if self._svc is None: + from rpg.service import RPGService + self._svc = RPGService(self.rpg) + return self._svc + + def update_edges( + self, + analyzed_deps: Dict[str, List[Dict]], + batch_file_path: str + ) -> Tuple[int, int]: + """Update RPG edges based on analyzed dependencies. + + Args: + analyzed_deps: Dependencies from CodeDependencyAnalyzer + batch_file_path: File path of the batch being completed + + Returns: + Tuple of (edges_added, edges_removed) + """ + # Step 1: Remove old edges from this file generated by code_gen + self.removed_count = self.svc.refresh_file_edges(GENERATOR_NAME, batch_file_path) + + # Step 2: Add new edges from analysis + self._add_new_edges(analyzed_deps) + + return self.added_count, self.removed_count + + def _add_new_edges(self, analyzed_deps: Dict[str, List[Dict]]) -> None: + """Add new edges from analyzed dependencies.""" + # Process inheritance edges + for dep in analyzed_deps.get("inheritance", []): + self._add_edge_if_nodes_exist( + child_name=dep["child"], + parent_name=dep["parent"], + edge_type=EdgeType.INHERITS, + description=f"{dep['child']} inherits from {dep['parent']} (in {dep['source_file']})" + ) + + # Process invocation edges + for dep in analyzed_deps.get("invocation", []): + self._add_edge_if_nodes_exist( + child_name=dep["caller"], + parent_name=dep["callee"], + edge_type=EdgeType.INVOKES, + description=f"{dep['caller']} invokes {dep['callee']} (in {dep['source_file']})" + ) + + # Process reference edges + for dep in analyzed_deps.get("reference", []): + self._add_edge_if_nodes_exist( + child_name=dep["unit"], + parent_name=dep["referenced_type"], + edge_type=EdgeType.REFERENCES, + description=f"{dep['unit']} references {dep['referenced_type']} (in {dep['source_file']})" + ) + + def _add_edge_if_nodes_exist( + self, + child_name: str, + parent_name: str, + edge_type: EdgeType, + description: str + ) -> None: + """Add an edge if both source and destination nodes exist.""" + src_node = self.svc.find_node_by_unit_name(child_name) + dst_node = self.svc.find_node_by_unit_name(parent_name) + + if not src_node or not dst_node: + return + + was_added = self.svc.add_dependency_edge( + src_node, dst_node, edge_type, GENERATOR_NAME, + description=description, + ) + if was_added: + self.added_count += 1 + + +# ============================================================================ +# Feature Node Updater +# ============================================================================ + +def _update_feature_nodes( + rpg: RPG, + batch: PlannedTask, + parsed_file: ParsedFile, + file_path_str: str, + code: str, +) -> int: + """Update feature-node metadata after successful code generation. + + For each unit in the batch, locate the corresponding RPG feature node + (via ``unit_to_features``) and update: + + * ``meta.description`` โ€“ append "[implemented]" marker if not present + * ``meta.content`` โ€“ replace interface skeleton with actual source + + Args: + rpg: Loaded RPG graph + batch: The completed PlannedTask + parsed_file: The parsed representation of the generated file + file_path_str: Relative file path (e.g. ``"src/parser.py"``) + code: Full source code of the generated file + + Returns: + Number of nodes that were updated. + """ + updated = 0 + + # Build a lookup: bare unit name -> actual source text from parsed file + actual_source_by_name: Dict[str, str] = {} + for cu in parsed_file.units: + key = cu.name + if cu.parent: + key = f"{cu.parent}::{cu.name}" + actual_source_by_name[key] = cu.source if hasattr(cu, "source") else "" + + for unit_key in batch.units_key: + feature_paths = batch.unit_to_features.get(unit_key, []) + if not feature_paths: + continue + + # Build the impl_path as design_interfaces does: + # "src/file.py::class Foo" or "src/file.py::function bar" + impl_path = f"{file_path_str}::{unit_key}" + + for feature_ref in feature_paths: + # feature_ref can be a feature path string or a dict with "path" key + if isinstance(feature_ref, dict): + fp = feature_ref.get("path", feature_ref.get("name", "")) + else: + fp = str(feature_ref) + + # Find the feature node โ€“ try meta.path first, then name + target_node = _find_feature_node_by_path(rpg, impl_path, fp) + if target_node is None: + continue + + if target_node.meta is None: + target_node.meta = NodeMetaData() + + # --- Mark as implemented in description --- + desc = target_node.meta.description or "" + if "[implemented]" not in desc: + target_node.meta.description = ( + f"{desc} [implemented]" if desc else "[implemented]" + ) + + # --- Store actual source snippet --- + # Try to find the matching source by unit key + source_text = "" + # Normalize unit_key for lookup: "class Foo" -> "Foo" + bare_key = unit_key + if unit_key.startswith("class "): + bare_key = unit_key[6:] + elif unit_key.startswith("function "): + bare_key = unit_key[9:] + + source_text = actual_source_by_name.get(bare_key, "") + if not source_text: + # Try with original key + source_text = actual_source_by_name.get(unit_key, "") + + if source_text: + target_node.meta.content = source_text + + updated += 1 + + if updated: + logging.info(f"Updated {updated} feature nodes for {file_path_str}") + return updated + + +def _find_feature_node_by_path( + rpg: RPG, + impl_path: str, + feature_ref: str, +) -> Optional[Node]: + """Locate a feature node by implementation path or feature reference. + + Search order: + 1. ``meta.path == impl_path`` (most reliable) + 2. ``node.name`` or ``feature_path()`` matching *feature_ref* + """ + # 1. Match by meta.path + for node in rpg.nodes.values(): + if node.meta and node.meta.path: + p = node.meta.path if isinstance(node.meta.path, str) else "" + if p == impl_path: + return node + + # 2. Match by feature reference (name / feature_path) + if feature_ref: + feature_name = feature_ref.split("/")[-1] if "/" in feature_ref else feature_ref + for node in rpg.nodes.values(): + if node.name == feature_name: + return node + if node.name == feature_ref: + return node + try: + if node.feature_path() == feature_ref: + return node + except Exception: + pass + + return None + + +# ============================================================================ +# Main Entry Point +# ============================================================================ + +def backup_rpg_file(rpg_path: Path) -> Optional[Path]: + """Backup the repo_rpg.json file before modification. + + Args: + rpg_path: Path to repo_rpg.json + + Returns: + Path to backup file, or None if backup failed + """ + if not rpg_path.exists(): + return None + + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + backup_path = rpg_path.parent / f"repo_rpg_{timestamp}{BACKUP_SUFFIX}.json" + + try: + shutil.copy2(rpg_path, backup_path) + logging.info(f"Backed up repo_rpg.json to {backup_path}") + return backup_path + except Exception as e: + logging.warning(f"Failed to backup repo_rpg.json: {e}") + return None + + +def collect_known_units(rpg: RPG) -> Set[str]: + """Collect all known unit names from RPG for filtering. + + RPG nodes are feature-level; code-level identifiers live in + ``node.meta.path`` (canonical form: ``"src/file.py::Foo"`` for classes, + ``"src/file.py::bar"`` for functions, ``"src/file.py::Foo::m"`` for + methods). We extract the unit name from those paths so the + ``CodeDependencyAnalyzer`` can filter against real identifiers. + + The returned set also includes legacy "class Foo" / "function bar" + forms so downstream callers that still pass type-prefixed identifiers + continue to match while the rest of the codebase migrates. + """ + known: Set[str] = set() + for node in rpg.nodes.values(): + if not node.meta or not node.meta.path: + continue + path_str = node.meta.path if isinstance(node.meta.path, str) else "" + if "::" not in path_str: + continue + # Canonical form: ``file::Name`` or ``file::Class::method``. + # We add every non-empty segment after the file boundary so both + # class names and method names become matchable. + _file, _sep, sym_chain = path_str.partition("::") + segments = [s for s in sym_chain.split("::") if s] + for seg in segments: + # Strip legacy ``class ``/``function ``/``method `` prefix + # if it slipped through from older encoder runs. + bare = seg + for legacy_prefix in ("class ", "function ", "method "): + if bare.startswith(legacy_prefix): + bare = bare[len(legacy_prefix):] + break + known.add(bare) + + # Backward-compat: also emit ``"class Foo"`` / ``"function bar"`` + # for downstream code that has not yet migrated to bare names. + if node.meta.type_name == NodeType.CLASS and segments: + known.add(f"class {segments[-1]}") + elif node.meta.type_name == NodeType.FUNCTION and segments: + known.add(f"function {segments[-1]}") + elif node.meta.type_name == NodeType.METHOD and segments: + known.add(f"method {segments[-1]}") + + # Also include base-class nodes added by design_base_classes + # (their node.name IS the class name). + for node in rpg.nodes.values(): + if node.meta and node.meta.generator == "design_base_classes": + known.add(node.name) + return known + + +def run_rpg_update( + batch: PlannedTask, + repo_path: Path, + rpg_path: Path, + backup: bool = True +) -> Dict[str, Any]: + """Main entry point for updating repo_rpg after code generation. + + Args: + batch: The completed PlannedTask + repo_path: Repository root path + rpg_path: Path to repo_rpg.json + backup: Whether to backup before modification + + Returns: + Dict with update results: + - success: bool + - warnings: List[str] + - edges_added: int + - edges_removed: int + - backup_path: Optional[str] + """ + result = { + "success": False, + "warnings": [], + "edges_added": 0, + "edges_removed": 0, + "nodes_updated": 0, + "backup_path": None + } + + # Check if RPG file exists + if not rpg_path.exists(): + result["warnings"].append(f"RPG file not found: {rpg_path}") + return result + + # Load RPG + try: + rpg = RPG.load_json(str(rpg_path)) + except Exception as e: + result["warnings"].append(f"Failed to load RPG: {e}") + return result + + # Get the generated file path + file_path = repo_path / batch.file_path + + if not file_path.exists(): + result["warnings"].append(f"Generated file not found: {file_path}") + return result + + # Read and parse the generated code + try: + with open(file_path, 'r', encoding='utf-8') as f: + code = f.read() + parsed_file = ParsedFile(code, str(file_path)) + except Exception as e: + result["warnings"].append(f"Failed to parse generated file: {e}") + return result + + # Step 1: Check node consistency + consistency_checker = NodeConsistencyChecker(rpg) + warnings = consistency_checker.check_batch_consistency(batch, parsed_file, repo_path) + result["warnings"].extend(warnings) + + # Step 2: Update feature node metadata (mark as implemented) + nodes_updated = _update_feature_nodes( + rpg=rpg, + batch=batch, + parsed_file=parsed_file, + file_path_str=batch.file_path, + code=code, + ) + result["nodes_updated"] = nodes_updated + + # Step 3: Analyze code dependencies + known_units = collect_known_units(rpg) + analyzer = CodeDependencyAnalyzer(known_units) + # Pass the repo-relative path (batch.file_path) instead of the absolute + # filesystem path. The path is only used to populate ``source_file`` in + # edge metadata, which feeds into edge ``description`` text injected into + # LLM prompts. Absolute paths leak host-specific prefixes + # (e.g. /home/.../RPG-Kit-backup/...) and mislead agents (plan A4). + analyzer.analyze_file(Path(batch.file_path), code) + analyzed_deps = analyzer.get_all_edges() + + # Step 4: Backup before modification + if backup: + backup_path = backup_rpg_file(rpg_path) + if backup_path: + result["backup_path"] = str(backup_path) + + # Step 5: Update edges + edge_updater = EdgeUpdater(rpg) + edges_added, edges_removed = edge_updater.update_edges( + analyzed_deps, + batch.file_path + ) + result["edges_added"] = edges_added + result["edges_removed"] = edges_removed + + # Step 6: Save updated RPG + try: + rpg.save_json(str(rpg_path)) + result["success"] = True + logging.info( + f"RPG updated: {nodes_updated} nodes, " + f"+{edges_added} edges, -{edges_removed} edges, " + f"{len(result['warnings'])} warnings" + ) + except Exception as e: + result["warnings"].append(f"Failed to save RPG: {e}") + + return result diff --git a/RPG-Kit/scripts/code_gen/stage_io.py b/RPG-Kit/scripts/code_gen/stage_io.py new file mode 100644 index 0000000..c0f9214 --- /dev/null +++ b/RPG-Kit/scripts/code_gen/stage_io.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +"""Per-stage result persistence for the codegen pipeline. + +Each pipeline stage (``final_test``, ``smoke_test``, ``global_review``) +writes its outcome to a JSON sidecar under +``.rpgkit/logs/codegen_.json`` so: + +* ``global_review`` can load earlier stages' findings without re-running + them. +* Users / debugging can ``cat`` the file to inspect a stage in isolation. + +These helpers were lifted from ``scripts.run_batch`` Module 6b's +"Stage results persistence" block. Internal to the codegen package; +no external API contract. +""" + +from __future__ import annotations + +import json +import logging +from typing import Any, Dict, Optional + +from common.paths import LOGS_DIR + +logger = logging.getLogger(__name__) + + +def stage_path(name: str): + """Return the absolute path of a stage's JSON sidecar.""" + return LOGS_DIR / f"codegen_{name}.json" + + +def save_stage_result(name: str, data: Dict[str, Any]) -> None: + """Save a stage result to ``.rpgkit/logs/codegen_.json``. + + Each pipeline stage (final_test, smoke_test, global_review) saves + its output independently. Global review loads all of them as context. + """ + LOGS_DIR.mkdir(parents=True, exist_ok=True) + dest = stage_path(name) + try: + with open(dest, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2, default=str) + logger.info("Saved stage result: %s", dest) + except Exception as exc: + logger.debug("Failed to save stage result %s: %s", name, exc) + + +def load_stage_result(name: str) -> Optional[Dict[str, Any]]: + """Load a stage result, or ``None`` if not found / unreadable.""" + src = stage_path(name) + if not src.is_file(): + return None + try: + with open(src, "r", encoding="utf-8") as f: + return json.load(f) + except Exception: + return None diff --git a/RPG-Kit/scripts/code_gen/static_checks.py b/RPG-Kit/scripts/code_gen/static_checks.py new file mode 100644 index 0000000..b02a909 --- /dev/null +++ b/RPG-Kit/scripts/code_gen/static_checks.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python3 +"""Static Completeness Checks for RPG-Kit Code Generation. + +Project-type-agnostic static checks run after a subtree completes. +These detect unimplemented stubs and placeholder returns without LLM cost. +""" + +import ast +import logging +from pathlib import Path +from typing import List + +logger = logging.getLogger(__name__) + + +def static_completeness_check(files: List[str], repo_path: Path) -> List[str]: + """Project-type-agnostic static completeness check. + + Run after ALL tasks in a subtree are completed. Checks for: + 1. Functions/methods whose only real body is ``pass`` (stub) + 2. Return statements returning TODO/PLACEHOLDER strings + 3. Functions that raise NotImplementedError + 4. Functions whose only real body is ``...`` (Ellipsis) + + Args: + files: List of file paths (relative to *repo_path*) to check. + repo_path: Absolute path to the project repository root. + + Returns: + List of human-readable issue strings (empty = all clean). + """ + issues: List[str] = [] + + for filepath in files: + full_path = repo_path / filepath + if not full_path.exists(): + issues.append(f"MISSING: {filepath} does not exist") + continue + + if full_path.suffix != ".py": + continue + + try: + content = full_path.read_text(encoding="utf-8") + tree = ast.parse(content, filename=filepath) + except (SyntaxError, UnicodeDecodeError) as exc: + issues.append(f"PARSE_ERROR: {filepath} โ€” {exc}") + continue + + # Build set of abstract class names in this file (classes that + # inherit from ABC, ABCMeta, or have names ending in Base/Abstract) + _abc_class_names: set = set() + for node in ast.walk(tree): + if isinstance(node, ast.ClassDef): + is_abc = False + for base in node.bases: + base_name = "" + if isinstance(base, ast.Name): + base_name = base.id + elif isinstance(base, ast.Attribute): + base_name = base.attr + if base_name in ("ABC", "ABCMeta", "Protocol"): + is_abc = True + for kw in node.keywords: + if kw.arg == "metaclass" and isinstance(kw.value, ast.Name): + if kw.value.id == "ABCMeta": + is_abc = True + if is_abc: + _abc_class_names.add(node.name) + + # Walk with parent context to detect abstract methods + _parent_map: dict = {} + for node in ast.walk(tree): + for child in ast.iter_child_nodes(node): + _parent_map[child] = node + + def _is_abstract_method(func_node) -> bool: + """Check if a function node is a legitimate abstract/protocol method.""" + # Check @abstractmethod decorator + for dec in func_node.decorator_list: + dec_name = "" + if isinstance(dec, ast.Name): + dec_name = dec.id + elif isinstance(dec, ast.Attribute): + dec_name = dec.attr + if dec_name == "abstractmethod": + return True + # Check if parent class is ABC or Protocol + parent = _parent_map.get(func_node) + if isinstance(parent, ast.ClassDef): + if parent.name in _abc_class_names: + return True + for base in parent.bases: + base_name = "" + if isinstance(base, ast.Name): + base_name = base.id + elif isinstance(base, ast.Attribute): + base_name = base.attr + if base_name == "Protocol": + return True + return False + + for node in ast.walk(tree): + # Check 1: function/method body is only ``pass`` + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): + body = node.body + # Filter out docstrings (Expr(Constant(str))) + real_body = [ + n + for n in body + if not ( + isinstance(n, ast.Expr) + and isinstance(n.value, ast.Constant) + and isinstance(n.value.value, str) + ) + ] + if len(real_body) == 1 and isinstance(real_body[0], ast.Pass): + if not _is_abstract_method(node): + issues.append( + f"STUB: {filepath}:{node.lineno} " + f"{node.name}() has only `pass` โ€” not implemented" + ) + # Check 4: function body is only ``...`` (Ellipsis) + # Skip if it's an abstract method or Protocol method + elif ( + len(real_body) == 1 + and isinstance(real_body[0], ast.Expr) + and isinstance(real_body[0].value, ast.Constant) + and real_body[0].value.value is ... + ): + if not _is_abstract_method(node): + issues.append( + f"STUB: {filepath}:{node.lineno} " + f"{node.name}() has only `...` โ€” not implemented" + ) + # Check 3: function body is only ``raise NotImplementedError`` + # Skip if it's an abstract method or Protocol method + elif len(real_body) == 1 and isinstance(real_body[0], ast.Raise): + exc_node = real_body[0].exc + if exc_node is not None and ( + (isinstance(exc_node, ast.Name) and exc_node.id == "NotImplementedError") + or ( + isinstance(exc_node, ast.Call) + and isinstance(exc_node.func, ast.Name) + and exc_node.func.id == "NotImplementedError" + ) + ): + if not _is_abstract_method(node): + issues.append( + f"STUB: {filepath}:{node.lineno} " + f"{node.name}() raises NotImplementedError โ€” not implemented" + ) + + # Check 2: return TODO/PLACEHOLDER string + if isinstance(node, ast.Return) and isinstance( + node.value, ast.Constant + ): + val = node.value.value + if isinstance(val, str) and any( + marker in val.upper() + for marker in ("TODO", "PLACEHOLDER", "NOT IMPLEMENTED") + ): + issues.append( + f"PLACEHOLDER: {filepath}:{node.lineno} " + f"returns placeholder string" + ) + + return issues + + return issues diff --git a/RPG-Kit/scripts/code_gen/sub_agent.py b/RPG-Kit/scripts/code_gen/sub_agent.py new file mode 100644 index 0000000..9e6b223 --- /dev/null +++ b/RPG-Kit/scripts/code_gen/sub_agent.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +"""Sub-agent dispatch helpers used by the codegen pipeline. + +This module hosts the four helpers extracted from +``scripts/run_batch.py`` Module 3 ("Sub-agent Dispatch"): + +* :func:`dispatch_sub_agent` โ€” launch an LLM sub-agent on a prompt. +* :func:`parse_batch_result` โ€” read the agent's ``BATCH_RESULT:`` marker. +* :func:`parse_pytest_summary` โ€” extract the agent's reported pytest summary line. +* :func:`truncate_test_output` โ€” trim long pytest output for retry prompts. + +These helpers are shared across the codegen orchestrator +(``scripts.run_batch``), the post-codegen subtree reviewer +(``scripts.code_gen.subtree_review``), and the RPG-edit pipeline +(``scripts.rpg_edit.review`` / ``scripts.rpg_edit.code``). + +``scripts.run_batch`` re-exports these names so the legacy +``from run_batch import dispatch_sub_agent`` imports keep working; +new code should prefer ``from code_gen.sub_agent import โ€ฆ``. +""" + +from __future__ import annotations + +import logging +import time +from pathlib import Path +from typing import Optional, Tuple + +from common.llm_client import LLMClient + +logger = logging.getLogger(__name__) + + +from code_gen._constants import DEFAULT_AGENT_TIMEOUT # noqa: E402 + + +def dispatch_sub_agent( + prompt: str, + repo_path: Path, + timeout: int = DEFAULT_AGENT_TIMEOUT, + trajectory=None, + step_id=None, + purpose: str = "run_batch", + max_retries: int = 1, +) -> Tuple[Optional[str], Optional[str]]: + """Dispatch a sub-agent with the given prompt. + + Args: + prompt: Full prompt string. + repo_path: Project repo path. + timeout: Max time for the sub-agent session. + trajectory: Trajectory instance for recording. + step_id: Current step ID in trajectory. + purpose: Purpose string for trajectory/logging. + max_retries: Number of LLM call attempts (1 = no retry). + + Returns: + (response_text, error_message) โ€” one of them is None. + """ + client = LLMClient(trajectory=trajectory, step_id=step_id) + logger.info( + "Dispatching sub-agent (purpose=%s, timeout=%ds, prompt_len=%d)", + purpose, timeout, len(prompt), + ) + logger.debug("Sub-agent prompt:\n%s", prompt) + + start_time = time.time() + try: + response = client.generate( + prompt, + purpose=purpose, + timeout=timeout, + max_retries=max_retries, + ) + elapsed = time.time() - start_time + logger.info("Sub-agent completed in %.1fs (response_len=%d)", elapsed, len(response)) + logger.debug("Sub-agent response:\n%s", response) + return response, None + except RuntimeError as exc: + elapsed = time.time() - start_time + error_msg = f"Sub-agent failed after {elapsed:.1f}s: {exc}" + logger.error(error_msg) + return None, error_msg + + +def parse_batch_result(response: Optional[str]) -> Tuple[bool, str]: + """Parse the sub-agent's exit status from its response. + + Looks for ``BATCH_RESULT: PASS`` or ``BATCH_RESULT: FAIL | `` + in the last 20 lines of the response. + + Args: + response: Sub-agent response text. + + Returns: + ``(passed, reason)`` โ€” ``passed`` is ``True`` if ``PASS`` found. + """ + if not response: + return False, "No response from sub-agent" + + # Search last 20 lines for the result marker + lines = response.strip().splitlines() + search_lines = lines[-20:] if len(lines) > 20 else lines + + for line in reversed(search_lines): + line = line.strip() + if line.startswith("BATCH_RESULT: PASS"): + return True, "Sub-agent reported PASS" + if line.startswith("BATCH_RESULT: FAIL"): + reason = line.split("|", 1)[1].strip() if "|" in line else "Unknown failure" + return False, reason + + # No explicit marker found โ€” treat as failure + return False, "Sub-agent did not output BATCH_RESULT marker" + + +def parse_pytest_summary(response: Optional[str]) -> Optional[str]: + """Extract the sub-agent's claimed pytest summary line, if present. + + The runner's TDD prompt asks the sub-agent to copy the literal pytest + summary line into a ``PYTEST_SUMMARY: โ€ฆ`` line right above + ``BATCH_RESULT``. This helper returns that quoted text (everything + after the first colon, stripped) so the orchestrator can cross-check + it against the post-verify rerun. + + Returns ``None`` if the sub-agent did not provide the line. + """ + if not response: + return None + lines = response.strip().splitlines() + search_lines = lines[-20:] if len(lines) > 20 else lines + for line in reversed(search_lines): + stripped = line.strip() + if stripped.startswith("PYTEST_SUMMARY:"): + return stripped.split(":", 1)[1].strip() + return None + + +def truncate_test_output(text: str, head: int = 20, tail: int = 50) -> str: + """Trim a long pytest output for safe injection into a retry prompt. + + Keeps the first ``head`` lines (typically: pytest header, collected + test count, first failure summary) **and** the last ``tail`` lines + (where pytest places the FAILED/ERROR detail and the summary line), + inserting ``... ...`` between them. + + Returns ``text`` unchanged when it is already shorter than + ``head + tail + 1`` lines. + """ + if not text: + return text + lines = text.splitlines() + keep = head + tail + if len(lines) <= keep + 1: + return text + omitted = len(lines) - keep + body = ( + lines[:head] + + [f"... <{omitted} lines truncated> ..."] + + lines[-tail:] + ) + return "\n".join(body) diff --git a/RPG-Kit/scripts/code_gen/subtree_review.py b/RPG-Kit/scripts/code_gen/subtree_review.py new file mode 100644 index 0000000..c9ddc1c --- /dev/null +++ b/RPG-Kit/scripts/code_gen/subtree_review.py @@ -0,0 +1,558 @@ +#!/usr/bin/env python3 +"""Subtree Review โ€” Feature-level completeness verification. + +After all tasks in a subtree are completed, this module runs: +1. Static checks (no LLM cost) for stubs and placeholders +2. An LLM review agent that traces user journeys through the code +3. Post-verification pytest to ensure fixes don't break anything + +The review is **non-blocking**: failures are recorded but do not +prevent subsequent subtrees from proceeding. +""" + +import logging +import re +import sys +import time +from dataclasses import dataclass, field, asdict +from pathlib import Path +from typing import Any, Dict, List, Optional, Set + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) + +from common.task_batch import load_tasks_from_tasks_json +from common.paths import TASKS_FILE +from code_gen.static_checks import static_completeness_check + +logger = logging.getLogger(__name__) + + +# ============================================================================ +# Data structures +# ============================================================================ + +@dataclass +class _SubtreeReviewResult: + """Outcome of a single subtree review.""" + + subtree: str + status: str = "NOT_RUN" # ALL_COMPLETE | FIXED | BLOCKED | NOT_RUN + timestamp: str = "" + issues_found: int = 0 + issues_fixed: int = 0 + static_issues: List[str] = field(default_factory=list) + duration: float = 0.0 + reason: str = "" + + def to_dict(self) -> Dict[str, Any]: + return asdict(self) + + +# ============================================================================ +# Subtree info gathering +# ============================================================================ + +def _get_subtree_files(tasks_path: Path, subtree: str) -> List[str]: + """Return deduplicated list of file paths for a subtree.""" + all_tasks = load_tasks_from_tasks_json(tasks_path) + seen: Set[str] = set() + files: List[str] = [] + for t in all_tasks: + if t.subtree == subtree and t.file_path not in seen: + # Skip marker paths like + if t.file_path.startswith("<") and t.file_path.endswith(">"): + continue + seen.add(t.file_path) + files.append(t.file_path) + return files + + +def _get_subtree_description(tasks_path: Path, subtree: str) -> str: + """Build a description string for a subtree from its task descriptions.""" + all_tasks = load_tasks_from_tasks_json(tasks_path) + descs = [t.task for t in all_tasks if t.subtree == subtree] + return "\n".join(f"- {d}" for d in descs[:10]) # cap at 10 + + +def _needs_llm_review(subtree_files: List[str], repo_path: Path) -> bool: + """Determine if a subtree needs full LLM review. + + Skips LLM review (saves ~95s) when the subtree has: + - No static issues (no stubs/placeholders) + - No string-based cross-module references (href, form action, event emit, etc.) + + For pure-logic subtrees (services, models, utilities), this avoids + unnecessary LLM calls that would return ALL_COMPLETE anyway. + + Args: + subtree_files: List of file paths in this subtree. + repo_path: Project repo root. + + Returns: + True if LLM review is recommended, False if safe to skip. + """ + for filepath in subtree_files: + full_path = repo_path / filepath + if not full_path.exists() or full_path.suffix != '.py': + continue + try: + content = full_path.read_text(encoding='utf-8', errors='replace') + except Exception: + continue + + # Check for string-based cross-module references + # These patterns match HTML link generation, event emission, etc. + # Match both regular quotes and escaped quotes (\" in Python strings) + if re.search(r'href\s*=\s*(?:["\']|\\["\'])\s*/', content): + return True + if re.search(r'action\s*=\s*(?:["\']|\\["\'])\s*/', content): + return True + if re.search(r'\.emit\s*\(\s*["\']', content): + return True + if re.search(r'\.publish\s*\(\s*["\']', content): + return True + + return False + + +def is_subtree_just_completed( + batch_id: str, + completed_task_ids: List[str], + tasks_path: Path, +) -> Optional[str]: + """Check if completing *batch_id* causes its subtree to be fully done. + + Args: + batch_id: The batch that just completed. + completed_task_ids: All completed task IDs (including batch_id). + tasks_path: Path to tasks.json. + + Returns: + Subtree name if it just completed, else ``None``. + """ + all_tasks = load_tasks_from_tasks_json(tasks_path) + + # Find the subtree for this batch + task = None + for t in all_tasks: + if t.task_id == batch_id: + task = t + break + if task is None: + return None + + subtree = task.subtree + + # Special subtrees are never reviewed + if subtree in ("FINAL_TASKS", "PROJECT_FILES", ""): + return None + + # Check if all tasks in this subtree are now completed + subtree_ids = {t.task_id for t in all_tasks if t.subtree == subtree} + completed_set = set(completed_task_ids) + if subtree_ids.issubset(completed_set): + return subtree + return None + + +# ============================================================================ +# Review prompt +# ============================================================================ + +REVIEW_PROMPT = """\ +# Feature Completeness Review + +## Your Role +You are a code reviewer verifying that a feature is complete and functional. +Think from the END USER's perspective, not the developer's. +"User" means end-user for applications, or developer-user for libraries/SDKs. + +## Feature: "{subtree_name}" +{subtree_description} + +## Files Implementing This Feature +{file_list} + +## Project Background +{project_background} + +## Static Check Results +{static_check_results} + +## Instructions + +1. **Read ALL files listed above** โ€” understand what has been built. + +2. **Think about the user journey**: + - How would a user access this feature? (URL? command? button? API call?) + - What steps would they take to complete the workflow? + - What would they see at each step? + - What happens on success? On failure? On invalid input? + +3. **Verify completeness** โ€” check that: + - Every step in the user journey has working code behind it + - Entry points exist (routes registered, commands wired, menus populated โ€” + whatever the project uses) + - User-facing output is generated (pages, CLI output, widgets โ€” whatever applies) + - Error handling provides feedback to the user + - The feature connects to the rest of the application + +4. **You MUST trace at least one complete user journey through the code.** + For EACH step, cite the specific file:function that handles it. + If you cannot trace a complete journey, the feature is NOT complete. + +5. **Report findings** with a structured checklist: + + ## Self-Generated Checklist + - [ ] : PASS / MISSING / BROKEN + - [ ] ... + + ## User Journey Trace + Step 1: User does X โ†’ file.py:func() โ†’ Result: โœ“/โœ— + Step 2: ... + +6. **Fix issues**: If you found MISSING or BROKEN items, write the fix code. + Then run the test command to verify no regressions. + +## Constraints +- Do NOT refactor working code +- Do NOT change function signatures that already have passing tests +- Only ADD missing pieces or FIX broken connections +- Your changes must not break existing tests + +## Already Completed (other subtrees โ€” do not modify) +{completed_modules_from_other_subtrees} + +## Skeleton Only (not yet implemented โ€” will be done in later subtrees) +{skeleton_only_files} + +## Test Command +{pytest_cmd} + +## Output +Last line MUST be one of: + REVIEW_RESULT: ALL_COMPLETE + REVIEW_RESULT: FIXED issues + REVIEW_RESULT: BLOCKED | +""" + + +def _build_review_prompt( + subtree_name: str, + subtree_description: str, + subtree_files: List[str], + static_check_results: str, + completed_task_ids: List[str], + tasks_path: Path, + repo_path: Path, + project_background: str = "", + pytest_cmd: str = "", +) -> str: + """Construct the review prompt for an LLM sub-agent.""" + all_tasks = load_tasks_from_tasks_json(tasks_path) + completed_set = set(completed_task_ids) + + # Classify files + all_completed_files: Set[str] = set() + for t in all_tasks: + if t.task_id in completed_set: + fp = t.file_path + if not (fp.startswith("<") and fp.endswith(">")): + all_completed_files.add(fp) + + current_files = set(subtree_files) + other_completed = sorted(all_completed_files - current_files) + + # Find skeleton-only files (exist on disk but not completed) + all_source_files: Set[str] = set() + for t in all_tasks: + fp = t.file_path + if not (fp.startswith("<") and fp.endswith(">")): + all_source_files.add(fp) + skeleton_files = sorted(all_source_files - all_completed_files) + + file_list = "\n".join(f"- `{f}`" for f in subtree_files) + other_list = "\n".join(f"- `{f}`" for f in other_completed) if other_completed else "(none)" + skel_list = "\n".join(f"- `{f}`" for f in skeleton_files) if skeleton_files else "(none)" + + base_prompt = REVIEW_PROMPT.format( + subtree_name=subtree_name, + subtree_description=subtree_description, + file_list=file_list, + project_background=project_background or "(see existing source files)", + static_check_results=static_check_results or "All static checks passed.", + completed_modules_from_other_subtrees=other_list, + skeleton_only_files=skel_list, + pytest_cmd=pytest_cmd or "python3 -m pytest tests/ -x --tb=short -q --timeout=30", + ) + + # Append cross-subtree connection check if there are completed dependencies + cross_subtree_section = "" + if other_completed: + # Build a list of cross-subtree dependencies from interfaces.json + try: + from common.paths import INTERFACES_FILE + from code_gen.context_collector import collect_dependency_files + cross_deps = [] + for f in subtree_files: + try: + deps = collect_dependency_files(INTERFACES_FILE, f) + for dep_type in ("inherits_from", "invokes", "references"): + for dep in deps.get(dep_type, []): + dep_file = dep.get("parent_file") or dep.get("callee_file") or dep.get("type_file", "") + if dep_file and dep_file not in current_files and dep_file in all_completed_files: + cross_deps.append(f"- `{f}` โ†’ `{dep_file}` ({dep_type})") + except Exception: + pass + + if cross_deps: + cross_subtree_section = ( + "\n\n## Cross-Subtree Connection Verification\n\n" + "The following files in YOUR subtree depend on files from OTHER completed subtrees.\n" + "For each connection, verify that:\n" + "1. The function/class your code imports from the other subtree actually exists\n" + "2. The parameters your code passes match the other module's signature\n" + "3. Any string identifiers (URLs, event names, etc.) match exactly\n\n" + "Dependencies on other subtrees:\n" + + "\n".join(sorted(set(cross_deps))) + + "\n" + ) + except Exception as exc: + logger.debug("Could not build cross-subtree deps: %s", exc) + + return base_prompt + cross_subtree_section + + +# ============================================================================ +# Review execution +# ============================================================================ + +def _parse_review_result(response: Optional[str]) -> _SubtreeReviewResult: + """Parse the review agent's output for the result marker.""" + result = _SubtreeReviewResult(subtree="") + if not response: + result.status = "BLOCKED" + result.reason = "No response from review agent" + return result + + lines = response.strip().splitlines() + search_lines = lines[-20:] if len(lines) > 20 else lines + + for line in reversed(search_lines): + line = line.strip() + if line.startswith("REVIEW_RESULT: ALL_COMPLETE"): + result.status = "ALL_COMPLETE" + return result + if line.startswith("REVIEW_RESULT: FIXED"): + result.status = "FIXED" + # Try to extract count + parts = line.split() + for p in parts: + if p.isdigit(): + result.issues_fixed = int(p) + break + return result + if line.startswith("REVIEW_RESULT: BLOCKED"): + result.status = "BLOCKED" + result.reason = line.split("|", 1)[1].strip() if "|" in line else "Unknown" + return result + + result.status = "BLOCKED" + result.reason = "Review agent did not output REVIEW_RESULT marker" + return result + + +def run_subtree_review( + subtree_name: str, + completed_task_ids: List[str], + repo_path: Path, + tasks_path: Path = TASKS_FILE, + project_background: str = "", + agent_timeout: int = 900, +) -> _SubtreeReviewResult: + """Execute a full subtree review: static checks โ†’ LLM review โ†’ post-verify. + + This function is designed to be called from ``run_batch.py`` after a + batch merge succeeds and ``is_subtree_just_completed()`` returns the + subtree name. + + The review is **non-blocking**: exceptions are caught and recorded. + + Args: + subtree_name: Name of the subtree that just completed. + completed_task_ids: All completed task IDs so far. + repo_path: Absolute path to the project repository. + tasks_path: Path to tasks.json. + project_background: Project background text for the prompt. + agent_timeout: Timeout for the LLM review agent (seconds). + + Returns: + _SubtreeReviewResult with status and metrics. + """ + start_time = time.time() + result = _SubtreeReviewResult( + subtree=subtree_name, + timestamp=time.strftime("%Y-%m-%dT%H:%M:%S"), + ) + + # 1. Collect subtree info + subtree_files = _get_subtree_files(tasks_path, subtree_name) + subtree_description = _get_subtree_description(tasks_path, subtree_name) + + if not subtree_files: + result.status = "ALL_COMPLETE" + result.reason = "No source files in subtree" + result.duration = time.time() - start_time + return result + + # 2. Static checks + static_issues = static_completeness_check(subtree_files, repo_path) + result.static_issues = static_issues + result.issues_found = len(static_issues) + static_result_str = "\n".join(static_issues) if static_issues else "All static checks passed." + + if static_issues: + logger.warning( + "Subtree '%s' has %d static issues", subtree_name, len(static_issues) + ) + + # 2b. Smart skip: if no static issues AND no cross-module string references, + # skip the LLM review (saves ~95s per subtree) + if not static_issues and not _needs_llm_review(subtree_files, repo_path): + result.status = "ALL_COMPLETE" + result.reason = "No static issues and no cross-module string references" + result.duration = time.time() - start_time + logger.info( + "Subtree '%s' skipped LLM review (no cross-ref patterns)", + subtree_name, + ) + return result + + # 3. Build pytest command + from code_gen.test_runner import get_dev_python + venv_python = get_dev_python(repo_path) or "python3" + pytest_cmd = ( + f"{venv_python} -m pytest tests/ -x --tb=short -q " + f"--timeout=30 --timeout-method=signal " + f"-W ignore::DeprecationWarning" + ) + + # 4. Build review prompt + prompt = _build_review_prompt( + subtree_name=subtree_name, + subtree_description=subtree_description, + subtree_files=subtree_files, + static_check_results=static_result_str, + completed_task_ids=completed_task_ids, + tasks_path=tasks_path, + repo_path=repo_path, + project_background=project_background, + pytest_cmd=pytest_cmd, + ) + + # 5. Setup review branch + from common.git_utils import GitRunner + git = GitRunner(str(repo_path)) + + safe_name = subtree_name.lower().replace(" ", "_").replace("/", "_")[:40] + branch_name = f"review/{safe_name}" + + # Ensure on main first + current = git.get_current_branch() + if current != git.main_branch: + if git.has_uncommitted_changes(): + git.stage_and_commit("WIP: auto-save before review") + git.switch_branch(git.main_branch) + + if git.branch_exists(branch_name): + git.delete_branch(branch_name, force=True) + git.create_branch(branch_name) + + # 6. Dispatch review agent (reuse existing sub-agent mechanism) + try: + # Import here to avoid circular imports at module level + from run_batch import dispatch_sub_agent + + response, error = dispatch_sub_agent( + prompt, repo_path, timeout=agent_timeout, purpose="subtree_review" + ) + + if error: + result.status = "BLOCKED" + result.reason = f"Review agent error: {error}" + # Switch back to main + if git.has_uncommitted_changes(): + git.stage_and_commit("WIP: review agent failed") + git.switch_branch(git.main_branch) + result.duration = time.time() - start_time + return result + + # 7. Parse review result + parsed = _parse_review_result(response) + result.status = parsed.status + result.issues_fixed = parsed.issues_fixed + result.reason = parsed.reason + + # 8. Post-verify if review made changes + if result.status in ("FIXED", "ALL_COMPLETE"): + # Run pytest to verify no regressions + from code_gen.test_runner import run_pytest, ensure_deps_installed + try: + ensure_deps_installed(repo_path) + except Exception: + pass + verify_result = run_pytest( + repo_path, + timeout=180, + extra_args=["--timeout=30", "--timeout-method=signal"], + ) + verify_passed = verify_result.success + if verify_passed: + # Merge review branch + if git.has_uncommitted_changes(): + git.stage_and_commit( + f"review({safe_name}): fix {result.issues_fixed} issues" + ) + merge_ok, merge_err = git.merge_branch( + branch_name, + message=f"merge: review {subtree_name}\n\nsubtree_review: {subtree_name}", + ) + if merge_ok: + git.delete_branch(branch_name) + logger.info("Review branch '%s' merged", branch_name) + else: + result.status = "BLOCKED" + result.reason = f"Merge failed: {merge_err}" + logger.warning("Review merge failed: %s", merge_err) + # Ensure we're on main after failed merge + try: + current = git.get_current_branch() + if current != git.main_branch: + git.switch_branch(git.main_branch) + except Exception: + pass + else: + result.status = "BLOCKED" + result.reason = "Fix introduced test regression" + if git.has_uncommitted_changes(): + git.stage_and_commit("WIP: review fix caused regression") + git.switch_branch(git.main_branch) + else: + # BLOCKED โ€” switch back to main + if git.has_uncommitted_changes(): + git.stage_and_commit("WIP: review blocked") + git.switch_branch(git.main_branch) + + except Exception as exc: + logger.warning("Subtree review for '%s' failed: %s", subtree_name, exc) + result.status = "BLOCKED" + result.reason = str(exc) + try: + if git.has_uncommitted_changes(): + git.stage_and_commit("WIP: review exception") + git.switch_branch(git.main_branch) + except Exception: + pass + + result.duration = time.time() - start_time + return result diff --git a/RPG-Kit/scripts/code_gen/task_loader.py b/RPG-Kit/scripts/code_gen/task_loader.py new file mode 100644 index 0000000..ffa63e6 --- /dev/null +++ b/RPG-Kit/scripts/code_gen/task_loader.py @@ -0,0 +1,273 @@ +#!/usr/bin/env python3 +"""Task selection helpers for the codegen batch orchestrator. + +This module hosts the two task-picker helpers that were originally +defined in the now-deleted top-level ``prepare_batch.py``: + +* :func:`get_next_pending_task_id` โ€” pick the next single task to run, + with git-based auto-recovery and integration-test deferral. +* :func:`get_next_merged_tasks` โ€” pick a same-file group of pending + implementation tasks for "file-merge" mode batches. + +Both are consumed by ``scripts.run_batch``'s Module 5 orchestrator. +They share three private helpers โ€” ``_git_grep_pattern``, +``_git_has_gen_code_commit``, ``_has_failed_impl_dependencies`` โ€” kept +local to this module since they have no callers elsewhere. +""" + +from __future__ import annotations + +import logging +from pathlib import Path +from typing import List, Optional + +from common.execution_state import ( + CodeGenState, + STATE_FILE, + save_code_gen_state, +) +from common.git_utils import GitRunner +from common.paths import REPO_DIR +from common.task_batch import PlannedTask, load_tasks_from_tasks_json + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Git-based recovery helpers (private) +# --------------------------------------------------------------------------- + +def _git_grep_pattern(repo_path: Path, pattern: str) -> bool: + """``True`` iff ``git log --grep=`` finds at least one commit.""" + try: + git = GitRunner(str(repo_path)) + result = git.run_git( + ["log", "--all", "--oneline", "--grep", pattern, "--max-count=1"] + ) + return result.success and bool(result.stdout.strip()) + except Exception: + return False + + +def _git_has_gen_code_commit( + repo_path: Path, + batch_id: str, + completed_ids: Optional[set] = None, +) -> bool: + """Check if a batch was **successfully completed** based on git history. + + Uses a two-tier strategy: + 1. Check for the new ``batch_completed: `` marker (reliable). + 2. Fall back to the old ``gen_code iter 1 โ€” batch `` pattern, + but ONLY if *batch_id* is already in *completed_ids* โ€” this avoids + false positives for tasks that had gen_code commits but failed tests. + """ + # 1. New completion marker + if _git_grep_pattern(repo_path, f"batch_completed: {batch_id}"): + return True + # 2. Legacy fallback โ€” only trust for known-completed tasks + if completed_ids and batch_id in completed_ids: + return _git_grep_pattern(repo_path, f"gen_code iter 1 โ€” batch {batch_id}") + return False + + +def _has_failed_impl_dependencies( + integration_task: PlannedTask, + failed_ids: set, + all_tasks: list, +) -> bool: + """Check if an integration test has failed implementation dependencies. + + Heuristic: if any implementation task in the same subtree is failed, + the integration test likely depends on it and will fail too. + """ + subtree = integration_task.subtree + for t in all_tasks: + if (t.task_type == "implementation" + and t.subtree == subtree + and t.task_id in failed_ids): + return True + return False + + +# --------------------------------------------------------------------------- +# Public task pickers +# --------------------------------------------------------------------------- + +def get_next_pending_task_id( + tasks_path: Path, + state: CodeGenState, + repo_path: Optional[Path] = None, + auto_recover: bool = True, + state_path: Path = STATE_FILE, +) -> Optional[str]: + """Get the task_id of the next pending task to process. + + If ``auto_recover`` is True and a pending task already has gen_code + commits in git history, it is auto-completed (added to + ``completed_task_ids`` and persisted) to avoid redundant TDD cycles + after state resets. + + Integration tests are deferred until all implementation tasks are + complete, since they often depend on modules from multiple subtrees. + """ + completed = set(state.completed_task_ids) + failed = set(state.failed_task_ids) + repo_path = repo_path or REPO_DIR + state_modified = False + + all_tasks = load_tasks_from_tasks_json(tasks_path) + + # Track pending implementation task IDs. Updated during auto-recovery + # so the integration-test deferral check stays accurate. + _pending_impl_ids = { + t.task_id for t in all_tasks + if t.task_id not in completed and t.task_id not in failed + and t.task_type == "implementation" + } + + for t in all_tasks: + if t.task_id in completed or t.task_id in failed: + continue + # Git-based auto-recovery: skip tasks whose code was already generated + if auto_recover and t.task_id not in failed and _git_has_gen_code_commit(repo_path, t.task_id, completed): + logger.info( + "Git-based recovery: auto-completing %s " + "(gen_code commits found in git history)", + t.task_id, + ) + state.completed_task_ids.append(t.task_id) + completed.add(t.task_id) + _pending_impl_ids.discard(t.task_id) + state_modified = True + continue + # Defer integration tests until all implementation tasks are done. + if t.task_type == "integration_test" and _pending_impl_ids: + continue + # Skip integration tests whose implementation dependencies have failed + if t.task_type == "integration_test" and not _pending_impl_ids: + if _has_failed_impl_dependencies(t, failed, all_tasks): + logger.info( + "Skipping integration test %s: " + "dependent implementation tasks are in failed state", + t.task_id, + ) + state.failed_task_ids.append(t.task_id) + failed.add(t.task_id) + state_modified = True + continue + # Found a genuinely pending task + if state_modified: + state.completed_tasks = len(state.completed_task_ids) + save_code_gen_state(state, state_path) + return t.task_id + + # All tasks processed โ€” persist any auto-recoveries + if state_modified: + state.completed_tasks = len(state.completed_task_ids) + save_code_gen_state(state, state_path) + return None + + +def get_next_merged_tasks( + tasks_path: Path, + state: CodeGenState, + max_units: int = 0, + repo_path: Optional[Path] = None, + state_path: Path = STATE_FILE, +) -> Optional[List[PlannedTask]]: + """Get the next group of pending tasks for one merged batch (file-merge mode). + + Rules: + - Only merge ``task_type == "implementation"`` tasks from the same ``file_path``. + - Special types (integration_test, final_test_docs, main_entry, project_*) + are never merged; they are returned as a single-element list. + - If ``max_units > 0``, cap the merged group so total units ``<= max_units``. + - If ``max_units == 0`` (default), merge all tasks for the same file. + - Tasks with gen_code commits in git history are auto-completed (skipped). + - Integration tests are deferred until all implementation tasks are complete. + + Returns: + List of PlannedTask objects to implement together, or None if nothing pending. + """ + completed = set(state.completed_task_ids) + failed = set(state.failed_task_ids) + all_tasks = load_tasks_from_tasks_json(tasks_path) + repo_path = repo_path or REPO_DIR + state_modified = False + + # Track pending implementation task IDs. Updated during auto-recovery + # so the integration-test deferral check stays accurate. + _pending_impl_ids = { + t.task_id for t in all_tasks + if t.task_id not in completed and t.task_id not in failed + and t.task_type == "implementation" + } + + # 1. Find the first pending task (with auto-recovery) + first_pending: Optional[PlannedTask] = None + for t in all_tasks: + if t.task_id in completed or t.task_id in failed: + continue + # Auto-recover tasks with existing gen_code commits + if _git_has_gen_code_commit(repo_path, t.task_id, completed): + logger.info("Git-based recovery (merge mode): auto-completing %s", t.task_id) + state.completed_task_ids.append(t.task_id) + completed.add(t.task_id) + _pending_impl_ids.discard(t.task_id) + state_modified = True + continue + # Defer integration tests until all implementation tasks are done + if t.task_type == "integration_test" and _pending_impl_ids: + continue + # Skip integration tests whose impl dependencies failed + if t.task_type == "integration_test" and not _pending_impl_ids: + if _has_failed_impl_dependencies(t, failed, all_tasks): + logger.info( + "Skipping integration test %s (merge mode): " + "dependent implementation tasks are in failed state", + t.task_id, + ) + state.failed_task_ids.append(t.task_id) + failed.add(t.task_id) + state_modified = True + continue + first_pending = t + break + + # Persist any auto-recoveries + if state_modified: + state.completed_tasks = len(state.completed_task_ids) + save_code_gen_state(state, state_path) + + if not first_pending: + return None + + # 2. Non-implementation types are never merged + if first_pending.task_type != "implementation": + return [first_pending] + + # 3. Collect all pending implementation tasks for the same file_path + target_file = first_pending.file_path + file_tasks = [ + t for t in all_tasks + if t.file_path == target_file + and t.task_type == "implementation" + and t.task_id not in completed + and t.task_id not in failed + ] + + # 4. If max_units is set, greedily collect tasks up to the limit + if max_units > 0: + selected: List[PlannedTask] = [] + unit_count = 0 + for t in file_tasks: + if unit_count + len(t.units_key) <= max_units: + selected.append(t) + unit_count += len(t.units_key) + else: + break + # Always return at least one task (even if it alone exceeds max_units) + return selected if selected else [file_tasks[0]] + + return file_tasks if file_tasks else [first_pending] diff --git a/RPG-Kit/scripts/code_gen/test_output_parser.py b/RPG-Kit/scripts/code_gen/test_output_parser.py new file mode 100644 index 0000000..c35a719 --- /dev/null +++ b/RPG-Kit/scripts/code_gen/test_output_parser.py @@ -0,0 +1,394 @@ +#!/usr/bin/env python3 +"""Test Output Parser โ€” Unified pytest output analysis. + +This module provides a single-pass parser for pytest output that extracts +all information needed by the TDD workflow: + +- Statistics (passed/failed/errors/skipped/duration) +- Failure classification (ENV_ERROR / TEST_ERROR / CODE_ERROR / UNKNOWN_ERROR) +- ENV_ERROR sub-classification (missing_import / wrong_import_path / missing_package) +- Structured error extraction (all NameErrors, all ModuleNotFoundErrors) +- Failing test file paths (from actual output, not naming heuristics) +- Compact failure line summary (for prompt injection) + +Usage:: + + from code_gen.test_output_parser import analyze_test_output + + analysis = analyze_test_output(pytest_raw_output) + # analysis.failure_type โ†’ "ENV_ERROR" + # analysis.missing_names โ†’ ["Enum", "dataclass", "Callable"] + # analysis.has_tests_run โ†’ True/False +""" + +from __future__ import annotations + +import re +from collections import Counter +from dataclasses import dataclass, field, asdict +from typing import Dict, List, Any, Optional +from common.import_normalizer import detect_project_import_prefix +from common.paths import REPO_DIR as _REPO_DIR + + +# ============================================================================ +# Data class +# ============================================================================ + +@dataclass +class TestOutputAnalysis: + """Complete analysis of a pytest run output. + + Produced once by ``analyze_test_output()``, then consumed by + ``run_batch.py`` (post-verify) and the orchestrator's analyse-failure + paths without re-parsing. + """ + + # --- Statistics ---------------------------------------------------------- + passed: int = 0 + failed: int = 0 + errors: int = 0 + skipped: int = 0 + duration: float = 0.0 + + # --- Top-level classification -------------------------------------------- + failure_type: str = "" # ENV_ERROR | TEST_ERROR | CODE_ERROR | UNKNOWN_ERROR + + # --- ENV_ERROR sub-classification ---------------------------------------- + env_sub_type: str = "" # missing_import | wrong_import_path | missing_package + env_fix_target: str = "" # code | test + env_instruction: str = "" # human-readable fix guidance for sub-agent + env_details: str = "" # short detail string + + # --- Structured error info ----------------------------------------------- + missing_names: List[str] = field(default_factory=list) + missing_modules: List[str] = field(default_factory=list) + failing_test_files: List[str] = field(default_factory=list) + failure_lines: str = "" # compact excerpt of failure-relevant lines + + # --- Meta ---------------------------------------------------------------- + has_tests_run: bool = False # True if at least one test was executed + raw_output: str = "" # original pytest output (for LLM fallback) + + # -- Serialization -------------------------------------------------------- + + def to_dict(self) -> Dict[str, Any]: + """Serialize to dict for JSON persistence in BatchExecutionState. + + ``raw_output`` is excluded to keep the serialized size small; + ``last_test_output`` in BatchExecutionState already stores it. + """ + d = asdict(self) + d.pop("raw_output", None) + return d + + @classmethod + def from_dict(cls, data: Dict[str, Any], raw_output: str = "") -> "TestOutputAnalysis": + valid = {f.name for f in __import__("dataclasses").fields(cls)} + filtered = {k: v for k, v in data.items() if k in valid} + obj = cls(**filtered) + obj.raw_output = raw_output + return obj + + +# ============================================================================ +# Unified parse entry point +# ============================================================================ + +def analyze_test_output(raw_output: str) -> TestOutputAnalysis: + """Parse pytest output in a single pass. + + This is the **only** place in the codebase that parses raw pytest text. + The returned ``TestOutputAnalysis`` is then shared by ``run_batch.py``'s + post-verify path (for pass/fail decision) and the orchestrator's + analyse-failure handler (for failure routing). + """ + result = TestOutputAnalysis(raw_output=raw_output) + + # 1. Statistics + _parse_stats(raw_output, result) + result.has_tests_run = (result.passed + result.failed + result.errors) > 0 + + # 2. Extract all structured errors (one-pass findall) + result.missing_names = _extract_all_name_errors(raw_output) + result.missing_modules = _extract_all_module_errors(raw_output) + + # 3. Failing test file paths + result.failing_test_files = _extract_failing_files(raw_output) + + # 4. Classify + _classify(raw_output, result) + + # 5. Compact failure lines + result.failure_lines = _extract_relevant_lines(raw_output) + + return result + + +# ============================================================================ +# Internal helpers +# ============================================================================ + +_SUMMARY_RE = re.compile( + r"(\d+)\s+passed|(\d+)\s+failed|(\d+)\s+error|(\d+)\s+skipped|" + r"in\s+([\d.]+)s" +) + + +def _parse_stats(output: str, result: TestOutputAnalysis) -> None: + for m in _SUMMARY_RE.finditer(output): + if m.group(1): + result.passed = int(m.group(1)) + if m.group(2): + result.failed = int(m.group(2)) + if m.group(3): + result.errors = int(m.group(3)) + if m.group(4): + result.skipped = int(m.group(4)) + if m.group(5): + result.duration = float(m.group(5)) + + +# -- Error extraction -------------------------------------------------------- + +def _extract_all_name_errors(output: str) -> List[str]: + """Extract ALL NameError names (deduplicated, order-preserved).""" + names = re.findall(r"NameError: name '(\w+)' is not defined", output) + return list(dict.fromkeys(names)) + + +def _extract_all_module_errors(output: str) -> List[str]: + """Extract ALL ModuleNotFoundError / ImportError module names.""" + mods = re.findall( + r"(?:ModuleNotFoundError|ImportError):.*?No module named '([^']+)'", + output, + ) + return list(dict.fromkeys(mods)) + + +def _extract_failing_files(output: str) -> List[str]: + """Extract test file paths from FAILED/ERROR lines, sorted by frequency.""" + raw = re.findall(r"(?:FAILED|ERROR)\s+(tests/\S+\.py)", output) + # Strip ::TestClass::test_method, keep only the file path + files = [f.split("::")[0] for f in raw] + if not files: + return [] + # Most-frequently-failing file first + return [f for f, _ in Counter(files).most_common()] + + +# -- Classification ---------------------------------------------------------- + +# Keywords checked in priority order (first match wins). +_ENV_KEYWORDS = ( + "modulenotfounderror", + "importerror", + "no module named", + "nameerror", + "package not found", + "pip install", + "missing dependency", + "command not found", +) + +_TEST_ERROR_KEYWORDS = ( + "fixture", + "conftest", + "test setup failed", + "test collection failed", + "@pytest", + "parametrize", + "test file", +) + +_CODE_ERROR_KEYWORDS = ( + "assertionerror", + "assert", + "expected", + "actual", + "!=", + "not equal", + "typeerror", + "valueerror", + "attributeerror", + "keyerror", +) + + +def _classify(output: str, result: TestOutputAnalysis) -> None: + """Set ``failure_type`` and, for ENV_ERROR, the sub-classification.""" + lower = output.lower() + + # Priority: ENV โ†’ TEST โ†’ CODE โ†’ UNKNOWN + if any(kw in lower for kw in _ENV_KEYWORDS): + result.failure_type = "ENV_ERROR" + _classify_env(output, result) + return + + if any(kw in lower for kw in _TEST_ERROR_KEYWORDS): + result.failure_type = "TEST_ERROR" + return + + if any(kw in lower for kw in _CODE_ERROR_KEYWORDS): + result.failure_type = "CODE_ERROR" + return + + result.failure_type = "UNKNOWN_ERROR" + + +def _classify_env(output: str, result: TestOutputAnalysis) -> None: + """Sub-classify an ENV_ERROR and populate ``env_*`` fields. + + This consolidates the logic from the old ``_classify_env_error()`` in + Earlier failure routing in ``run_batch.py``, enhanced to extract ALL + missing names at once. + """ + # --- 1. NameError: missing imports in source file --- + if result.missing_names: + names = result.missing_names + names_str = ", ".join(f"`{n}`" for n in names) + result.env_sub_type = "missing_import" + result.env_fix_target = "code" + result.env_instruction = ( + f"The source file uses {names_str} but they are not imported. " + f"Add the correct import statements for ALL of these names " + f"at the top of the file (after `from __future__` imports). " + f"Common mappings: Enumโ†’enum, dataclassโ†’dataclasses, " + f"Callable/Optional/Listโ†’typing. " + f"Do NOT remove any existing code. Do NOT modify test files." + ) + result.env_details = f"Undefined names: {', '.join(names)}" + return + + # --- 2. ModuleNotFoundError / ImportError --- + if result.missing_modules: + missing_mod = result.missing_modules[0] + top_level = missing_mod.split(".")[0] + + # Project-internal wrong path? + # Dynamically detect project package names from repo layout. + _detected_prefix = detect_project_import_prefix(repo_path=_REPO_DIR) + project_indicators: set = set() + if _detected_prefix: + _parts = _detected_prefix.split('.', 1) + if len(_parts) == 2: + project_indicators.add(_parts[1]) + if top_level in project_indicators: + prefix_str = _detected_prefix or f"src.{top_level}" + is_test_import = "importing test module" in output.lower() + if is_test_import: + result.env_sub_type = "wrong_import_path" + result.env_fix_target = "test" + result.env_instruction = ( + f"The test file uses the wrong import path `{missing_mod}`. " + f"This project uses `{prefix_str}.*` (with `src.` prefix). " + f"Change ALL occurrences of `from {missing_mod}` to " + f"`from src.{missing_mod}` in the test file. " + f"Do NOT modify production/source code." + ) + result.env_details = f"Wrong path: {missing_mod} -> src.{missing_mod}" + else: + result.env_sub_type = "wrong_import_path" + result.env_fix_target = "code" + result.env_instruction = ( + f"The source file uses the wrong import path `{missing_mod}`. " + f"This project uses `{prefix_str}.*` (with `src.` prefix). " + f"Fix the import path. Do NOT modify test files." + ) + result.env_details = f"Wrong path: {missing_mod}" + return + + # Third-party package + result.env_sub_type = "missing_package" + result.env_fix_target = "code" + result.env_instruction = ( + f"Third-party package `{missing_mod}` is not installed. " + f"The build system will attempt auto-installation. " + f"If the package is genuinely needed, keep the import. " + f"Only remove the import if it is truly NOT used in the code. " + f"Do NOT modify test files." + ) + result.env_details = f"Missing package: {missing_mod}" + return + + # --- 3. Fallback --- + result.env_sub_type = "missing_package" + result.env_fix_target = "code" + result.env_instruction = ( + "Environment or import issue detected. " + "Check the error output and fix the import in the appropriate file." + ) + result.env_details = "" + + +# -- Failure line extraction -------------------------------------------------- + +_FAILURE_LINE_KEYWORDS = ( + "FAILED", + "ERROR", + "AssertionError", + "TypeError", + "ValueError", + "NameError", + "AttributeError", + "KeyError", + "ModuleNotFoundError", + "ImportError", + "E ", # pytest indented assertion detail lines +) + + +def _extract_relevant_lines(output: str, max_chars: int = 1500) -> str: + """Extract only failure-relevant lines from pytest output. + + Returns a compact excerpt suitable for prompt injection (~1.5 KB max). + """ + lines = [ + line + for line in output.split("\n") + if any(kw in line for kw in _FAILURE_LINE_KEYWORDS) + ] + excerpt = "\n".join(lines) + if len(excerpt) > max_chars: + excerpt = excerpt[:max_chars] + "\n... (truncated)" + return excerpt + + +# ============================================================================ +# Keyword filter helpers (used by ``run_batch.py`` to derive ``-k`` patterns) +# ============================================================================ + +def build_keyword_filter(units_key: List[str]) -> Optional[str]: + """Build a pytest ``-k`` filter expression from unit class names. + + Returns ``None`` when *units_key* is empty. + + Strips common prefixes like "class " and "def " from unit names. + + Example:: + + build_keyword_filter(["class DirtyRegion", "class DirtyRegionTracker"]) + # โ†’ "DirtyRegion or DirtyRegionTracker" + """ + if not units_key: + return None + unique = list(dict.fromkeys(units_key)) + # Strip common prefixes like "class ", "def ", "function " from unit names + cleaned = [] + for unit in unique: + name = unit + for prefix in ["class ", "def ", "async def ", "function "]: + if name.startswith(prefix): + name = name[len(prefix):] + break + cleaned.append(name) + return " or ".join(cleaned) + + +def validate_test_ran(analysis: TestOutputAnalysis) -> bool: + """Return True if at least one test was actually executed. + + Use after ``is_test_successful()`` to guard against the -k filter + matching zero tests (pytest returns exit 0 with ``-v`` in that case). + """ + return analysis.has_tests_run diff --git a/RPG-Kit/scripts/code_gen/test_runner.py b/RPG-Kit/scripts/code_gen/test_runner.py new file mode 100644 index 0000000..4f28020 --- /dev/null +++ b/RPG-Kit/scripts/code_gen/test_runner.py @@ -0,0 +1,886 @@ +#!/usr/bin/env python3 +"""Test Runner Utilities for RPG-Kit Code Generation. + +Provides utilities for: +- Finding test files related to source changes +- Building pytest commands +- Executing tests and parsing results +- Determining test success/failure +""" + +import os +import re +import signal +import subprocess +import sys +import ast +import shutil +import importlib.util +import logging +from pathlib import Path +from typing import List, Tuple, Set, Optional, Dict, Any +from dataclasses import dataclass +from .test_output_parser import TestOutputAnalysis, _parse_stats, _SUMMARY_RE +from .test_output_parser import analyze_test_output +from common.llm_client import LLMClient +import json as _json +from common.import_normalizer import normalize_files + + +def _set_pdeathsig() -> None: + """Preexec hook: ask the kernel to send SIGTERM to this child when its parent dies (including SIGKILL). Called after fork() but before exec() so it runs in the child's address space. Silently ignored on non-Linux.""" + try: + import ctypes, signal as _s + ctypes.CDLL("libc.so.6").prctl(1, _s.SIGTERM) # PR_SET_PDEATHSIG = 1 + except Exception: + pass + + +# ============================================================================ +# Test File Detection Patterns +# ============================================================================ + +DEFAULT_TEST_PATTERNS: Tuple[str, ...] = ( + r"(^|/)(tests|test|testing)/.*\.py$", + r"(^|/)test_.*\.py$", + r"(^|/).*_test\.py$", +) + +DEFAULT_PYTHON_PATTERN = r".*\.py$" + + +# ============================================================================ +# Test Result Data Classes +# ============================================================================ + +@dataclass +class TestResult: + """Result of test execution.""" + success: bool + return_code: int + output: str + test_files: List[str] + passed: int = 0 + failed: int = 0 + errors: int = 0 + skipped: int = 0 + duration: float = 0.0 + + def to_dict(self) -> Dict[str, Any]: + return { + "success": self.success, + "return_code": self.return_code, + "output": self.output, + "test_files": self.test_files, + "passed": self.passed, + "failed": self.failed, + "errors": self.errors, + "skipped": self.skipped, + "duration": self.duration, + } + + +# ============================================================================ +# Test File Detection +# ============================================================================ + +def is_test_file(filepath: str, patterns: Tuple[str, ...] = DEFAULT_TEST_PATTERNS) -> bool: + """Check if a file path matches test file patterns.""" + compiled = [re.compile(p) for p in patterns] + return any(p.search(filepath) for p in compiled) + + +def find_test_files_in_directory( + directory: Path, + patterns: Tuple[str, ...] = DEFAULT_TEST_PATTERNS +) -> List[str]: + """Find all test files in a directory.""" + test_files = [] + + for root, _, files in os.walk(directory): + for file in files: + if file.endswith('.py'): + filepath = os.path.join(root, file) + rel_path = os.path.relpath(filepath, directory) + if is_test_file(rel_path, patterns): + test_files.append(rel_path) + + return sorted(test_files) + + +def find_related_test_files( + source_file: str, + repo_root: Path +) -> List[str]: + """Find test files related to a source file using path-signature matching. + + Builds a canonical signature from the source path by stripping common + prefixes (``src/``, ``lib/``) and the project-package directory, then + joining remaining directory parts + stem with ``_``. + + Example:: + + src/flask_blog/auth/views.py โ†’ signature "auth_views" + tests/test_auth_views.py โ†’ match โœ“ + + If no signature match is found, falls back to simple stem matching + (the legacy behavior). + + Args: + source_file: Path to the source file (relative to repo root) + repo_root: Repository root path + + Returns: + List of related test file paths (relative to repo root) + """ + source_path = Path(source_file) + if source_path.suffix != '.py': + return [] + + # --- Build canonical signature from source path --- + # Strip known prefixes: "src", "lib" + skip_prefixes = {'src', 'lib'} + parts = list(source_path.parts) + + # Find where meaningful path starts (after src/lib + package root) + start_idx = 0 + if parts and parts[0] in skip_prefixes: + start_idx = 1 + # Also skip the project-package root (e.g., "flask_blog") + # because test file names typically don't include it + if len(parts) > 1: + start_idx = 2 + + # Collect directory parts (excluding the filename) + stem + relevant = [] + for part in parts[start_idx:-1]: + if not part.startswith('.'): + relevant.append(part) + relevant.append(source_path.stem) + src_signature = '_'.join(relevant) + + # --- Search test directories for matching files --- + related_tests = [] + test_dirs = ['tests', 'test', 'testing'] + + for test_dir in test_dirs: + test_path = repo_root / test_dir + if not test_path.exists(): + continue + for test_file in test_path.rglob("test_*.py"): + test_sig = test_file.stem.replace('test_', '', 1) + if test_sig == src_signature: + related_tests.append(str(test_file.relative_to(repo_root))) + + # Fallback: if signature matching found nothing, try simple stem match + if not related_tests: + module_name = source_path.stem + for test_dir in test_dirs: + test_path = repo_root / test_dir + if not test_path.exists(): + continue + test_file = test_path / f"test_{module_name}.py" + if test_file.exists(): + related_tests.append(str(test_file.relative_to(repo_root))) + test_file = test_path / f"{module_name}_test.py" + if test_file.exists(): + related_tests.append(str(test_file.relative_to(repo_root))) + + return related_tests + + +def extract_files_from_diff(diff_content: str) -> Tuple[List[str], List[str]]: + """Extract file paths from a git diff. + + Returns: + Tuple of (source_files, test_files) + """ + source_files = [] + test_files = [] + + # Pattern to match file paths in diff + file_pattern = re.compile(r'^diff --git a/(.+) b/(.+)$', re.MULTILINE) + + for match in file_pattern.finditer(diff_content): + filepath = match.group(2) + + if not filepath.endswith('.py'): + continue + + if filepath == '/dev/null': + continue + + if is_test_file(filepath): + test_files.append(filepath) + else: + source_files.append(filepath) + + return source_files, test_files + + +def build_pytest_command( + test_files: List[str], + repo_root: Optional[Path] = None, + verbose: bool = True, + extra_args: Optional[List[str]] = None, + python_exe: Optional[str] = None +) -> List[str]: + """Build a pytest command for running specific test files. + + Args: + test_files: List of test file paths + repo_root: Repository root (for relative paths) + verbose: Include verbose flag + extra_args: Additional pytest arguments + python_exe: Python executable to use (default: "python3") + + Returns: + Command as list of strings + """ + py = python_exe or "python3" + cmd = [py, "-m", "pytest"] + + if verbose: + cmd.append("-v") + + # Add extra args + if extra_args: + cmd.extend(extra_args) + + # Add test files + for test_file in test_files: + if repo_root: + full_path = repo_root / test_file + if full_path.exists(): + cmd.append(str(test_file)) + else: + cmd.append(test_file) + + return cmd + + +def build_comprehensive_test_command( + diff_content: str, + repo_root: Path, + extra_args: Optional[List[str]] = None +) -> Tuple[List[str], Dict[str, Any]]: + """Build a pytest command that covers all relevant tests for a diff. + + This includes: + 1. Test files directly modified in the diff + 2. Test files related to modified source files + + Args: + diff_content: Git diff content + repo_root: Repository root path + extra_args: Additional pytest arguments + + Returns: + Tuple of (command, analysis_info) + """ + source_files, diff_test_files = extract_files_from_diff(diff_content) + + # Find related test files for modified source files + related_test_files = [] + for source_file in source_files: + related = find_related_test_files(source_file, repo_root) + related_test_files.extend(related) + + # Combine all test files + all_test_files = list(set(diff_test_files + related_test_files)) + + # If no specific test files, run all tests + if not all_test_files: + cmd = ["python3", "-m", "pytest"] + if extra_args: + cmd.extend(extra_args) + else: + cmd = build_pytest_command(all_test_files, repo_root, extra_args=extra_args) + + analysis_info = { + "patch_source_files": source_files, + "patch_test_files": diff_test_files, + "related_test_files": related_test_files, + "all_test_files": all_test_files, + } + + return cmd, analysis_info + + +# ============================================================================ +# Test Execution +# ============================================================================ + +def run_pytest( + repo_root: Path, + test_files: Optional[List[str]] = None, + timeout: int = 300, + extra_args: Optional[List[str]] = None, + env: Optional[Dict[str, str]] = None +) -> TestResult: + """Run pytest and return results. + + If a dev venv exists at DEV_VENV_DIR, its python is used automatically. + + Args: + repo_root: Repository root path + test_files: Specific test files to run (None = all) + timeout: Timeout in seconds + extra_args: Additional pytest arguments + env: Environment variables + + Returns: + TestResult with execution details + """ + # Use dev venv python if available + python_exe = get_dev_python(repo_root) + + # Build command + if test_files: + cmd = build_pytest_command(test_files, repo_root, extra_args=extra_args, python_exe=python_exe) + else: + cmd = [python_exe or "python3", "-m", "pytest", "-v"] + if extra_args: + cmd.extend(extra_args) + + # Setup environment + run_env = os.environ.copy() + run_env["PYTHONPATH"] = str(repo_root) + if env: + run_env.update(env) + + try: + proc = subprocess.Popen( + cmd, + cwd=repo_root, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + env=run_env, + start_new_session=True, # own process group โ†’ killpg kills pytest + children + preexec_fn=_set_pdeathsig, # PR_SET_PDEATHSIG: killed even when parent SIGKILL'd + ) + try: + stdout_data, stderr_data = proc.communicate(timeout=timeout) + except BaseException: + # Kill the entire pytest process group (covers forked workers, etc.) + try: + os.killpg(os.getpgid(proc.pid), signal.SIGTERM) + except Exception: + proc.kill() + proc.wait() + raise + + output = stdout_data + if stderr_data: + output += "\n\nSTDERR:\n" + stderr_data + + # Parse results + success = proc.returncode == 0 + stats = parse_pytest_output(output) + + return TestResult( + success=success, + return_code=proc.returncode, + output=output, + test_files=test_files or [], + passed=stats.get("passed", 0), + failed=stats.get("failed", 0), + errors=stats.get("errors", 0), + skipped=stats.get("skipped", 0), + duration=stats.get("duration", 0.0), + ) + + except subprocess.TimeoutExpired: + return TestResult( + success=False, + return_code=-1, + output=f"Test execution timed out after {timeout} seconds", + test_files=test_files or [], + ) + except Exception as e: + return TestResult( + success=False, + return_code=-1, + output=f"Test execution failed: {str(e)}", + test_files=test_files or [], + ) + + +def parse_pytest_output(output: str) -> Dict[str, Any]: + """Parse pytest output to extract statistics. + + Returns dict with: passed, failed, errors, skipped, duration + + .. note:: The canonical implementation now lives in + ``code_gen.test_output_parser._parse_stats``. This wrapper is kept + for backward compatibility. + """ + result = TestOutputAnalysis() + _parse_stats(output, result) + return { + "passed": result.passed, + "failed": result.failed, + "errors": result.errors, + "skipped": result.skipped, + "duration": result.duration, + } + + +def is_test_successful(return_code: int, test_output: str) -> bool: + """Determine if tests passed based on return code and output. + + Args: + return_code: pytest return code + test_output: pytest output text + + Returns: + True if tests passed + """ + # Return code 0 means success + if return_code == 0: + return True + + # Return code 5 means no tests collected (not a failure) + if return_code == 5: + # Check if this is expected + if "no tests ran" in test_output.lower(): + return True + + return False + + +# ============================================================================ +# Failure Type Detection (Simple Heuristics) +# ============================================================================ + +def detect_failure_type_simple(test_output: str) -> str: + """Detect failure type using simple heuristics (no LLM). + + Returns: "TEST_ERROR", "CODE_ERROR", "ENV_ERROR", or "UNKNOWN_ERROR" + + .. note:: The canonical implementation now lives in + ``code_gen.test_output_parser._classify``. This wrapper is kept + for backward compatibility and returns upper-case values. + """ + analysis = analyze_test_output(test_output) + return analysis.failure_type or "UNKNOWN_ERROR" + + +# ============================================================================ +# Dev Virtual Environment Management +# ============================================================================ + +# Bare directory name kept locally for backward compatibility with callers +# that build relative paths. ``common.paths.DEV_VENV_NAME`` is the +# canonical source of truth; the import re-exports it under the original +# name so existing ``from code_gen.test_runner import DEV_VENV_DIR`` +# imports keep working (only call sites today are +# ``code_gen.test_runner``-internal anyway). +from common.paths import DEV_VENV_NAME as DEV_VENV_DIR + +_logger = logging.getLogger(__name__) + +# Stdlib modules โ€” used to filter out standard library imports during scanning. +_STDLIB_TOP_LEVEL = frozenset({ + 'abc', 'aifc', 'argparse', 'array', 'ast', 'asynchat', 'asyncio', + 'asyncore', 'atexit', 'audioop', 'base64', 'bdb', 'binascii', + 'binhex', 'bisect', 'builtins', 'bz2', 'calendar', 'cgi', 'cgitb', + 'chunk', 'cmath', 'cmd', 'code', 'codecs', 'codeop', 'collections', + 'colorsys', 'compileall', 'concurrent', 'configparser', 'contextlib', + 'contextvars', 'copy', 'copyreg', 'cProfile', 'crypt', 'csv', + 'ctypes', 'curses', 'dataclasses', 'datetime', 'dbm', 'decimal', + 'difflib', 'dis', 'distutils', 'doctest', 'email', 'encodings', + 'enum', 'errno', 'faulthandler', 'fcntl', 'filecmp', 'fileinput', + 'fnmatch', 'fractions', 'ftplib', 'functools', 'gc', 'getopt', + 'getpass', 'gettext', 'glob', 'graphlib', 'grp', 'gzip', 'hashlib', + 'heapq', 'hmac', 'html', 'http', 'idlelib', 'imaplib', 'imghdr', + 'imp', 'importlib', 'inspect', 'io', 'ipaddress', 'itertools', + 'json', 'keyword', 'lib2to3', 'linecache', 'locale', 'logging', + 'lzma', 'mailbox', 'mailcap', 'marshal', 'math', 'mimetypes', + 'mmap', 'modulefinder', 'multiprocessing', 'netrc', 'nis', + 'nntplib', 'numbers', 'operator', 'optparse', 'os', 'ossaudiodev', + 'pathlib', 'pdb', 'pickle', 'pickletools', 'pipes', 'pkgutil', + 'platform', 'plistlib', 'poplib', 'posix', 'posixpath', 'pprint', + 'profile', 'pstats', 'pty', 'pwd', 'py_compile', 'pyclbr', + 'pydoc', 'queue', 'quopri', 'random', 're', 'readline', 'reprlib', + 'resource', 'rlcompleter', 'runpy', 'sched', 'secrets', 'select', + 'selectors', 'shelve', 'shlex', 'shutil', 'signal', 'site', + 'smtpd', 'smtplib', 'sndhdr', 'socket', 'socketserver', 'spwd', + 'sqlite3', 'ssl', 'stat', 'statistics', 'string', 'stringprep', + 'struct', 'subprocess', 'sunau', 'symtable', 'sys', 'sysconfig', + 'syslog', 'tabnanny', 'tarfile', 'telnetlib', 'tempfile', 'termios', + 'test', 'textwrap', 'threading', 'time', 'timeit', 'tkinter', + 'token', 'tokenize', 'trace', 'traceback', 'tracemalloc', 'tty', + 'turtle', 'turtledemo', 'types', 'typing', 'unicodedata', 'unittest', + 'urllib', 'uu', 'uuid', 'venv', 'warnings', 'wave', 'weakref', + 'webbrowser', 'winreg', 'winsound', 'wsgiref', 'xdrlib', 'xml', + 'xmlrpc', 'zipapp', 'zipfile', 'zipimport', 'zlib', 'zoneinfo', + '_thread', '__future__', 'typing_extensions', +}) + +# Common import-name โ†’ PyPI-package-name mappings +_IMPORT_TO_PACKAGE: Dict[str, str] = { + 'cv2': 'opencv-python', + 'PIL': 'Pillow', + 'sklearn': 'scikit-learn', + 'yaml': 'PyYAML', + 'bs4': 'beautifulsoup4', + 'dateutil': 'python-dateutil', + 'dotenv': 'python-dotenv', + 'jwt': 'PyJWT', + 'serial': 'pyserial', + 'usb': 'pyusb', + 'git': 'GitPython', + 'skimage': 'scikit-image', + 'attr': 'attrs', + 'wx': 'wxPython', +} + + +def get_dev_venv_path(repo_root: Path) -> Path: + """Return the path to the dev venv directory.""" + return repo_root / DEV_VENV_DIR + + +def get_dev_python(repo_root: Path) -> Optional[str]: + """Return the dev venv python executable path, or None if venv doesn't exist.""" + venv_path = get_dev_venv_path(repo_root) + if sys.platform == "win32": + py = venv_path / "Scripts" / "python.exe" + else: + py = venv_path / "bin" / "python" + if py.exists(): + return str(py) + return None + + +def ensure_dev_venv(repo_root: Path) -> Tuple[bool, Path]: + """Lazily create the dev venv if it doesn't exist. + + Installs pytest into it on creation. + + Returns: + Tuple of (created_new, venv_path) + """ + venv_path = get_dev_venv_path(repo_root) + py = get_dev_python(repo_root) + if py is not None: + return False, venv_path + + _logger.info("Creating dev venv at %s", venv_path) + uv = shutil.which("uv") + try: + if uv: + subprocess.run( + ["uv", "venv", str(venv_path)], + cwd=repo_root, capture_output=True, text=True, timeout=60, + check=True, + ) + else: + subprocess.run( + [sys.executable, "-m", "venv", str(venv_path)], + cwd=repo_root, capture_output=True, text=True, timeout=120, + check=True, + ) + except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as exc: + _logger.error("Failed to create dev venv: %s", exc) + return False, venv_path + + # Install pytest into the new venv + install_packages_into_venv(["pytest", "pytest-timeout"], repo_root) + return True, venv_path + + +def _build_pip_cmd( + pip_names: List[str], + repo_root: Path, +) -> List[str]: + """Build a complete pip/uv install command for the given packages.""" + venv_path = get_dev_venv_path(repo_root) + uv = shutil.which("uv") + if uv: + py_exe = get_dev_python(repo_root) or str(venv_path / "bin" / "python") + return ["uv", "pip", "install"] + pip_names + ["--python", py_exe] + else: + if sys.platform == "win32": + pip_exe = str(venv_path / "Scripts" / "pip") + else: + pip_exe = str(venv_path / "bin" / "pip") + return [pip_exe, "install"] + pip_names + + +def _pip_install_single(pkg: str, repo_root: Path) -> bool: + """Try to pip-install a single package. Returns True on success.""" + try: + cmd = _build_pip_cmd([pkg], repo_root) + result = subprocess.run( + cmd, cwd=repo_root, + capture_output=True, text=True, timeout=120, + ) + return result.returncode == 0 + except Exception: + return False + + +def resolve_pip_names( + import_names: List[str], + repo_root: Path, +) -> Dict[str, str]: + """Resolve Python import names to pip-installable package names. + + Strategy (layered): + 1. Static mapping table (_IMPORT_TO_PACKAGE) for known mismatches. + 2. For unmapped names, batch-ask LLM to resolve importโ†’package. + 3. Fallback: use import name as-is (works for ~80% of packages). + + Returns: + Dict mapping import_name โ†’ pip_package_name. + """ + resolved: Dict[str, str] = {} + needs_llm: List[str] = [] + + for name in import_names: + if name in _IMPORT_TO_PACKAGE: + resolved[name] = _IMPORT_TO_PACKAGE[name] + else: + needs_llm.append(name) + + if not needs_llm: + return resolved + + # Try LLM resolution for unknown mappings + llm_resolved = _resolve_packages_via_llm(needs_llm) + for name in needs_llm: + pip_name = llm_resolved.get(name) + if pip_name and pip_name != "UNKNOWN": + resolved[name] = pip_name + else: + # Fallback: use import name directly + resolved[name] = name + + return resolved + + +def _resolve_packages_via_llm(import_names: List[str]) -> Dict[str, str]: + """Ask LLM to resolve import names to PyPI package names. + + Uses a single, cheap LLM call (~200 tokens). Falls back to empty dict + on any error so callers can use the import-name-as-is fallback. + + Returns: + Dict of {import_name: pip_package_name} for successfully resolved names. + """ + if not import_names: + return {} + + try: + + prompt = ( + "Map these Python import names to their PyPI package names.\n" + "Return ONLY a JSON object: {\"import_name\": \"pip_package_name\"}\n" + "Rules:\n" + "- If the import name equals the pip package name, repeat it " + "(e.g. \"numpy\": \"numpy\").\n" + "- If you don't know, use \"UNKNOWN\".\n" + "- Do NOT include any explanation, only the JSON object.\n\n" + f"Import names: {_json.dumps(import_names)}" + ) + + client = LLMClient() + response = client.generate(prompt, purpose="resolve_pip_names", timeout=60) + parsed = client.parse_json_block(response) + + if parsed and isinstance(parsed, dict): + # Cache successful resolutions for this session + for k, v in parsed.items(): + if v and v != "UNKNOWN" and k not in _IMPORT_TO_PACKAGE: + _IMPORT_TO_PACKAGE[k] = v + return parsed + except Exception as exc: + _logger.warning("LLM package resolution failed: %s", exc) + + return {} + + +def install_packages_into_venv( + packages: List[str], + repo_root: Path, +) -> Tuple[bool, List[str]]: + """Install packages into the dev venv. + + Resolves import names to pip package names (via mapping table + LLM), + tries bulk install first, then falls back to per-package install for + any failures. + + Args: + packages: List of import names to install + repo_root: Repository root + + Returns: + Tuple of (any_succeeded, list of packages actually installed) + """ + if not packages: + return True, [] + + # Resolve import names โ†’ pip package names + name_map = resolve_pip_names(packages, repo_root) + pip_names = [name_map.get(p, p) for p in packages] + # Deduplicate while preserving order + seen: Set[str] = set() + unique_pip_names: List[str] = [] + for n in pip_names: + if n not in seen: + seen.add(n) + unique_pip_names.append(n) + pip_names = unique_pip_names + + # Try bulk install first + try: + cmd = _build_pip_cmd(pip_names, repo_root) + result = subprocess.run( + cmd, cwd=repo_root, + capture_output=True, text=True, timeout=300, + ) + if result.returncode == 0: + return True, pip_names + except (subprocess.TimeoutExpired, Exception) as exc: + _logger.warning("Bulk install error: %s", exc) + + # Bulk failed โ€” install individually, collect successes + _logger.info("Bulk install failed, retrying packages individually...") + installed: List[str] = [] + for pkg in pip_names: + if _pip_install_single(pkg, repo_root): + installed.append(pkg) + else: + _logger.warning("Failed to install package: %s", pkg) + + return (len(installed) > 0, installed) + + +# --------------------------------------------------------------------------- +# Import prefix normalization +# --------------------------------------------------------------------------- + +def fix_import_prefixes(repo_root: Path) -> List[str]: + """Fix inconsistent import prefixes in source files. + + Delegates to :func:`common.import_normalizer.normalize_files`. + Kept here for backward compatibility with existing callers. + + Returns: + List of file paths (relative to *repo_root*) that were modified. + """ + return normalize_files(repo_root) + + +def scan_missing_imports(repo_root: Path) -> List[str]: + """Scan all Python files under src/ and tests/ for imports that cannot be resolved in the environment that will run tests. + + When a dev venv exists, the check runs inside the venv python so the + result matches what pytest will actually see. + + Returns: + List of top-level module names that are missing. + """ + src_dir = repo_root / "src" + tests_dir = repo_root / "tests" + + # Collect project top-level package names (to skip internal imports). + # Include ALL subdirectories under src/ (not just those with __init__.py) + # to handle namespace packages correctly. + project_modules: Set[str] = set() + for d in [src_dir, tests_dir]: + if d.is_dir(): + project_modules.add(d.name) + for child in d.iterdir(): + if child.is_dir() and not child.name.startswith('.'): + project_modules.add(child.name) + + # Collect all external imports from source files + external_imports: Set[str] = set() + scan_dirs = [d for d in [src_dir, tests_dir] if d.is_dir()] + + for scan_dir in scan_dirs: + for py_file in scan_dir.rglob("*.py"): + if any(part.startswith('.') or part == '__pycache__' for part in py_file.parts): + continue + try: + source = py_file.read_text(encoding='utf-8') + tree = ast.parse(source) + except (SyntaxError, UnicodeDecodeError): + continue + for node in ast.walk(tree): + mod_name = None + if isinstance(node, ast.Import): + for alias in node.names: + mod_name = alias.name.split('.')[0] + elif isinstance(node, ast.ImportFrom): + if node.module and node.level == 0: + mod_name = node.module.split('.')[0] + if mod_name is None: + continue + if mod_name in _STDLIB_TOP_LEVEL or mod_name in project_modules: + continue + external_imports.add(mod_name) + + if not external_imports: + return [] + + # Determine which python to check against โ€” dev venv if it exists, + # otherwise the system python that will run tests. + py_exe = get_dev_python(repo_root) or sys.executable + + # Check importability in the target python via a single subprocess call + # to avoid per-module overhead. + check_script = ( + "import importlib.util, json, sys\n" + "modules = json.loads(sys.argv[1])\n" + "missing = [m for m in modules if importlib.util.find_spec(m) is None]\n" + "print(json.dumps(missing))\n" + ) + try: + result = subprocess.run( + [py_exe, "-c", check_script, _json.dumps(sorted(external_imports))], + capture_output=True, text=True, timeout=30, + env={**os.environ, "PYTHONPATH": str(repo_root)}, + ) + if result.returncode == 0 and result.stdout.strip(): + return _json.loads(result.stdout.strip()) + except Exception as exc: + _logger.warning("Subprocess import check failed, falling back: %s", exc) + + # Fallback: check in current process (may be inaccurate if dev venv exists) + missing: Set[str] = set() + for mod_name in external_imports: + if importlib.util.find_spec(mod_name) is None: + missing.add(mod_name) + return sorted(missing) + + +def ensure_deps_installed(repo_root: Path) -> Tuple[bool, List[str]]: + """Ensure dev venv exists and all detectable third-party deps are installed. + + This is the single entry point for proactive dependency management. + Call before running pytest for the first time in a batch. + + Steps: + 1. Create dev venv if it doesn't exist (+ install pytest). + 2. AST-scan src/ and tests/ for third-party imports. + 3. Check which imports are missing in the venv. + 4. Resolve import names โ†’ pip package names (mapping table + LLM). + 5. Install missing packages (bulk, with per-package retry on failure). + + Returns: + (any_installed, list_of_installed_pip_names) + """ + ensure_dev_venv(repo_root) + missing = scan_missing_imports(repo_root) + if not missing: + return False, [] + _logger.info("Detected missing imports: %s", missing) + ok, installed = install_packages_into_venv(missing, repo_root) + if installed: + _logger.info("Auto-installed packages: %s", installed) + return ok, installed diff --git a/RPG-Kit/scripts/common/__init__.py b/RPG-Kit/scripts/common/__init__.py new file mode 100644 index 0000000..7d0be31 --- /dev/null +++ b/RPG-Kit/scripts/common/__init__.py @@ -0,0 +1,232 @@ +#!/usr/bin/env python3 +"""Common Module. + +This module contains common utilities shared across all scripts. +""" + +from .trajectory import ( + Trajectory, + StepStatus, + LLMInteraction, + load_or_create_trajectory, +) + +from .llm_client import ( + LLMClient, + LLMCallRecord, +) + +from .task_batch import ( + PlannedTask, + load_tasks_from_tasks_json, + get_task_by_id, + get_next_pending_task, +) + +from .execution_state import ( + BatchExecutionState, + CodeGenState, + WorkflowPhase, + FailureType, + WorkflowType, + load_code_gen_state, + save_code_gen_state, + update_batch_state, + complete_batch, + reset_rpg_backup_tracking, +) + +from .git_utils import ( + GitRunner, + GitResult, + create_task_branch, + complete_task_branch, +) + +from .paths import ( + RPGKIT_DIR, + + SKELETON_FILE, + DATA_FLOW_FILE, + INTERFACES_FILE, + BASE_CLASSES_FILE, + RPG_FILE, + REPO_RPG_FILE, + DEP_GRAPH_FILE, + REPO_INFO_FILE, + TASKS_FILE, + CODE_GEN_STATE_FILE, + TRAJECTORY_DIR, + SKELETON_SUMMARY_FILE, + ensure_rpgkit_dir, + get_trajectory_file, +) + +from .utils import ( + print_unicode_table, + get_skeleton_tree_string, + extract_functional_areas_from_skeleton, + format_functional_graph_overview, + extract_component_directories, + validate_python_syntax, + format_data_flow_edges, + format_base_classes, + format_data_structures, + format_base_classes_and_data_structures, + get_repo_info_from_files, + get_project_background_context, + get_all_leaf_paths, + get_leaf_name, + get_leaf_description, + get_all_leaf_descriptions, + extract_class_names, + # --- M4 Utils: newly ported functions --- + normalize_path, + is_test_file, + merge_intervals, + filter_excluded_files, + parse_solution_output, + parse_code_blocks, + get_skeleton, + transfer_parsed_tree, + format_parsed_tree, + iterative_by_folder, + get_node_range_robust, + extract_source_by_lines, + # Utils (M6: token counting) + calculate_tokens, + truncate_by_token, +) + +from .tools import ( + Tool, + ToolCall, + ToolCallArguments, + ToolError, + ToolExecResult, + ToolExecutionError, + ToolExecutor, + ToolHandler, + ToolNotFoundError, + ToolParameter, + ToolResult, + ToolValidationError, +) + +from .llm_types import ( + LLMMessage, + LLMResponse, + LLMUsage, + Message, + UserMessage, + SystemMessage, + AssistantMessage, + ToolMessage, + Memory, +) + +__all__ = [ + # Trajectory + "Trajectory", + "StepStatus", + "LLMInteraction", + "load_or_create_trajectory", + # LLM Client + "LLMClient", + "LLMCallRecord", + # Task Batch + "PlannedTask", + "load_tasks_from_tasks_json", + "get_task_by_id", + "get_next_pending_task", + # Execution State + "BatchExecutionState", + "CodeGenState", + "WorkflowPhase", + "FailureType", + "WorkflowType", + "load_code_gen_state", + "save_code_gen_state", + "update_batch_state", + "complete_batch", + "reset_rpg_backup_tracking", + # Git Utils + "GitRunner", + "GitResult", + "create_task_branch", + "complete_task_branch", + # Paths + "RPGKIT_DIR", + + "SKELETON_FILE", + "DATA_FLOW_FILE", + "INTERFACES_FILE", + "BASE_CLASSES_FILE", + "RPG_FILE", + "REPO_RPG_FILE", + "DEP_GRAPH_FILE", + "REPO_INFO_FILE", + "TASKS_FILE", + "CODE_GEN_STATE_FILE", + "TRAJECTORY_DIR", + "SKELETON_SUMMARY_FILE", + "ensure_rpgkit_dir", + "get_trajectory_file", + # Utils + "print_unicode_table", + "get_skeleton_tree_string", + "extract_functional_areas_from_skeleton", + "format_functional_graph_overview", + "extract_component_directories", + "validate_python_syntax", + "format_data_flow_edges", + "format_base_classes", + "format_data_structures", + "format_base_classes_and_data_structures", + "get_repo_info_from_files", + "get_project_background_context", + "get_all_leaf_paths", + "get_leaf_name", + "get_leaf_description", + "get_all_leaf_descriptions", + "extract_class_names", + # Utils (M4: ported from RPG-ZeroRepo) + "normalize_path", + "is_test_file", + "merge_intervals", + "filter_excluded_files", + "parse_solution_output", + "parse_code_blocks", + "get_skeleton", + "transfer_parsed_tree", + "format_parsed_tree", + "iterative_by_folder", + "get_node_range_robust", + "extract_source_by_lines", + # Utils (M6: token counting) + "calculate_tokens", + "truncate_by_token", + # Tools + "Tool", + "ToolCall", + "ToolCallArguments", + "ToolError", + "ToolExecResult", + "ToolExecutionError", + "ToolExecutor", + "ToolHandler", + "ToolNotFoundError", + "ToolParameter", + "ToolResult", + "ToolValidationError", + # LLM Types (M5: ported from RPG-ZeroRepo) + "LLMMessage", + "LLMResponse", + "LLMUsage", + "Message", + "UserMessage", + "SystemMessage", + "AssistantMessage", + "ToolMessage", + "Memory", +] diff --git a/RPG-Kit/scripts/common/execution_state.py b/RPG-Kit/scripts/common/execution_state.py new file mode 100644 index 0000000..1e3e5b3 --- /dev/null +++ b/RPG-Kit/scripts/common/execution_state.py @@ -0,0 +1,758 @@ +#!/usr/bin/env python3 +"""Execution State Management for RPG-Kit Code Generation. + +Manages the state of code generation execution, including: +- Current batch being processed +- Iteration tracking within a batch +- Failure history and analysis +- Git commit tracking + +This module handles state persistence between command invocations, +which is essential since RPG-Kit uses multiple CLI sessions. +""" + +import json +import logging +from datetime import datetime +from enum import Enum +from pathlib import Path +from typing import ClassVar, Dict, List, Any, Optional +from dataclasses import dataclass, field, asdict + +from .paths import CODE_GEN_STATE_FILE as STATE_FILE +from .trajectory import Trajectory +from .paths import WORKSPACE_ROOT + + +# ============================================================================ +# Enums +# ============================================================================ + +class WorkflowPhase(str, Enum): + """Current phase in the TDD workflow. + + The code generation process follows a fixed phase progression: + INIT โ†’ TEST_GEN โ†’ CODE_GEN โ†’ TESTING โ†’ ANALYZING โ†’ COMPLETE/FAILED. + """ + INIT = "init" # Initial state, not started + TEST_GEN = "test_gen" # Generating tests + CODE_GEN = "code_gen" # Generating implementation + TESTING = "testing" # Running tests + ANALYZING = "analyzing" # Analyzing failure + COMPLETE = "complete" # Batch completed successfully + FAILED = "failed" # Batch failed after max iterations + + +class FailureType(str, Enum): + """Classification of failures during code generation. + + Used by the failure analysis step to decide whether to + regenerate tests, fix implementation code, or adjust the environment. + """ + TEST_ERROR = "test_error" # Test itself is wrong + CODE_ERROR = "code_error" # Code implementation is wrong + ENV_ERROR = "env_error" # Environment/setup issues + UNKNOWN_ERROR = "unknown_error" # Unknown/unclassified error + + +class WorkflowType(str, Enum): + """Workflow classification for commit messages and progress tracking. + + Each TDD iteration is labeled with a workflow type so that + git history and trajectory logs are easy to filter. + """ + TEST_DEVELOPMENT = "test_development" + TEST_FIX = "test_fix" + CODE_INCREMENTAL = "code_incremental" + CODE_BUG_FIX = "code_bug_fix" + ENV_SETUP = "env_setup" + # Legacy support (aliases) + TEST_GENERATION = "test_development" + CODE_GENERATION = "code_incremental" + + +# ============================================================================ +# Iteration Record +# ============================================================================ + +@dataclass +class IterationRecord: + """Record of a single iteration attempt.""" + iteration: int + timestamp: str + phase: str + test_generated: bool = False + code_generated: bool = False + test_passed: bool = False + failure_type: Optional[str] = None + failure_analysis: Optional[str] = None + test_output: Optional[str] = None + commits: List[str] = field(default_factory=list) + + def to_dict(self) -> Dict[str, Any]: + return asdict(self) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "IterationRecord": + return cls(**data) + + +# ============================================================================ +# Batch Execution State +# ============================================================================ + +@dataclass +class BatchExecutionState: + """State of execution for a single batch. + + This is persisted between command invocations to maintain + continuity across the TDD iteration loop. + """ + # Batch identification + batch_id: str + file_path: str + subtree: str + + # Iteration control + iteration: int = 0 + max_iterations: int = 5 + phase: str = WorkflowPhase.INIT.value + + # Current iteration state + test_prompt: str = "" # Current test generation prompt + code_prompt: str = "" # Current code generation prompt + test_generated: bool = False # Test code generated this iteration + code_generated: bool = False # Implementation code generated this iteration + + # Failure tracking (for next iteration) + last_test_output: str = "" + last_failure_type: Optional[str] = None + last_failure_analysis: str = "" + failure_history: List[str] = field(default_factory=list) + + # Git tracking + branch_name: str = "" + initial_commit: str = "" # Commit when batch started + current_commit: str = "" # Latest commit + commits_this_batch: List[str] = field(default_factory=list) + + # Iteration history + iterations: List[Dict] = field(default_factory=list) + + # Timestamps + started_at: str = "" + completed_at: str = "" + + # Agent call tracking + last_agent_result: Optional[Dict[str, Any]] = None + agent_call_count: int = 0 + + # Merged task tracking (file-level merge mode) + # Contains all original task IDs when multiple tasks are merged into one batch. + # Empty list means single-task mode. + merged_task_ids: List[str] = field(default_factory=list) + + # Phase skip flags + skip_tests: bool = False # Skip test running (e.g., documentation batches) + skip_code_gen: bool = False # Skip code generation (e.g., integration test batches) + + # Pending test fix (for code_then_test flow) + # When the failure-analysis pass determines both code and test need fixing, + # it sets phase=code_gen first and stores the test fix plan here. + # After code-gen + post-verify, if pending_test_fix is True, the main + # loop should go directly to test-fix (skip analyse-failure). + pending_test_fix: bool = False + pending_test_fix_plan: str = "" + + # Structured test output analysis (from code_gen.test_output_parser). + # Stored as dict so the failure-analysis handler can consume it without re-parsing. + last_test_analysis: Optional[Dict] = None + + def to_dict(self) -> Dict[str, Any]: + return asdict(self) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "BatchExecutionState": + # Handle enum conversion, filter out unknown fields for backward compatibility + valid_fields = {f.name for f in __import__('dataclasses').fields(cls)} + filtered_data = {k: v for k, v in data.items() if k in valid_fields} + return cls(**filtered_data) + + def start_iteration(self) -> None: + """Start a new iteration.""" + self.iteration += 1 + self.test_generated = False + self.code_generated = False + + # Record iteration start + record = IterationRecord( + iteration=self.iteration, + timestamp=datetime.now().isoformat(), + phase=self.phase + ) + self.iterations.append(record.to_dict()) + + def update_iteration(self, **kwargs) -> None: + """Update current iteration record.""" + if self.iterations: + self.iterations[-1].update(kwargs) + + def record_failure(self, failure_type: str, analysis: str, test_output: str) -> None: + """Record a failure for the current iteration.""" + self.last_failure_type = failure_type + self.last_failure_analysis = analysis + self.last_test_output = test_output + self.failure_history.append(failure_type) + + self.update_iteration( + failure_type=failure_type, + failure_analysis=analysis, + test_output=test_output[:2000] # Truncate for storage + ) + + def record_commit(self, commit_hash: str) -> None: + """Record a commit made during this batch.""" + self.current_commit = commit_hash + self.commits_this_batch.append(commit_hash) + + if self.iterations: + commits = self.iterations[-1].get("commits", []) + commits.append(commit_hash) + self.iterations[-1]["commits"] = commits + + def mark_complete(self, success: bool) -> None: + """Mark the batch as complete.""" + self.phase = WorkflowPhase.COMPLETE.value if success else WorkflowPhase.FAILED.value + self.completed_at = datetime.now().isoformat() + + if self.iterations: + self.iterations[-1]["test_passed"] = success + + def can_continue(self) -> bool: + """Check if more iterations are allowed.""" + return self.iteration < self.max_iterations and self.phase not in [ + WorkflowPhase.COMPLETE.value, + WorkflowPhase.FAILED.value + ] + + def get_workflow_type( + self, + for_test: bool = True, + task_description: str = "" + ) -> str: + """Determine workflow type based on current state. + + Args: + for_test: Whether this is for test generation (True) or code generation (False) + task_description: Task description to check for fix-related keywords + """ + # Check for fix-related keywords in task description (like ZeroRepo) + fix_keywords = ['fix', 'repair', 'correct', 'debug', 'resolve', 'bug', 'issue', 'error', 'problem'] + is_fix_task = any(keyword in task_description.lower() for keyword in fix_keywords) if task_description else False + + if for_test: + # Test workflow + if is_fix_task: + return WorkflowType.TEST_FIX.value + if self.iteration == 1 and not self.failure_history: + return WorkflowType.TEST_DEVELOPMENT.value + elif FailureType.TEST_ERROR.value in (self.failure_history[-3:] if self.failure_history else []): + return WorkflowType.TEST_FIX.value + return WorkflowType.TEST_DEVELOPMENT.value + else: + # Code workflow + if is_fix_task: + return WorkflowType.CODE_BUG_FIX.value + if self.iteration == 1 and not self.failure_history: + return WorkflowType.CODE_INCREMENTAL.value + elif any(f in (self.failure_history[-3:] if self.failure_history else []) + for f in [FailureType.CODE_ERROR.value, FailureType.UNKNOWN_ERROR.value]): + return WorkflowType.CODE_BUG_FIX.value + return WorkflowType.CODE_INCREMENTAL.value + + +# ============================================================================ +# Global Execution State +# ============================================================================ + +@dataclass +class CodeGenState: + """Global state for the entire code generation process. + + Tracks overall progress across all tasks. + """ + # Overall progress + total_tasks: int = 0 + completed_tasks: int = 0 + failed_tasks: int = 0 + + # Task tracking (individual PlannedTask IDs) + completed_task_ids: List[str] = field(default_factory=list) + failed_task_ids: List[str] = field(default_factory=list) + skipped_task_ids: List[str] = field(default_factory=list) + + # Current batch (if any) + current_batch_id: Optional[str] = None + current_batch_state: Optional[Dict] = None + + # Initialization tracking + initialized: bool = False + initialized_at: Optional[str] = None + initial_commit: Optional[str] = None + + # Timestamps + started_at: str = "" + last_updated: str = "" + + # RPG backup tracking (to avoid multiple backups per code_gen run) + rpg_backup_path: Optional[str] = None + + # Trajectory file path (relative to repo root) + trajectory_file: Optional[str] = None + + # Whether interface skeletons have been written to source files + interfaces_written: bool = False + + # Track how many times each batch_id has been prepared, to prevent infinite loops. + # Maps batch_id -> prepare count. A batch prepared more than _MAX_BATCH_PREPARES + # times is automatically rejected. + batch_prepare_counts: Dict[str, int] = field(default_factory=dict) + + # Subtree review results (subtree_name -> review result dict). + # Populated by subtree_review.run_subtree_review() after each subtree completes. + subtree_reviews: Dict[str, Dict] = field(default_factory=dict) + + # Class-level constant โ€” ClassVar is excluded from asdict() serialization. + _MAX_BATCH_PREPARES: ClassVar[int] = 5 + + def to_dict(self) -> Dict[str, Any]: + return asdict(self) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "CodeGenState": + # Filter out unknown fields to handle backward compatibility + valid_fields = {f.name for f in __import__('dataclasses').fields(cls)} + filtered_data = {k: v for k, v in data.items() if k in valid_fields} + return cls(**filtered_data) + + def set_current_batch(self, batch_state: BatchExecutionState) -> None: + """Set the current batch being processed.""" + self.current_batch_id = batch_state.batch_id + self.current_batch_state = batch_state.to_dict() + self.last_updated = datetime.now().isoformat() + + def complete_current_batch(self, success: bool) -> None: + """Mark current batch as complete. + + If the current batch has merged_task_ids (file-level merge mode), + all constituent task IDs are marked as completed/failed. + """ + if self.current_batch_id: + # Collect all task IDs to mark (merged mode or single) + batch_state = self.current_batch_state or {} + merged_ids = batch_state.get("merged_task_ids", []) + + # Build deduplicated list: current_batch_id + any merged IDs + all_ids = [self.current_batch_id] + for mid in merged_ids: + if mid != self.current_batch_id and mid not in all_ids: + all_ids.append(mid) + + for tid in all_ids: + if success: + if tid not in self.completed_task_ids: + self.completed_task_ids.append(tid) + else: + if tid not in self.failed_task_ids: + self.failed_task_ids.append(tid) + + if success: + self.completed_tasks = len(self.completed_task_ids) + else: + self.failed_tasks = len(self.failed_task_ids) + + self.current_batch_id = None + self.current_batch_state = None + self.last_updated = datetime.now().isoformat() + + def get_current_batch_state(self) -> Optional[BatchExecutionState]: + """Get the current batch state as an object.""" + if self.current_batch_state: + return BatchExecutionState.from_dict(self.current_batch_state) + return None + + +# ============================================================================ +# State Persistence +# ============================================================================ + + +def _count_total_tasks_from_tasks_json(state_path: Path = STATE_FILE) -> int: + """Count total planned tasks by reading tasks.json. + + Returns 0 if tasks.json doesn't exist or cannot be parsed. Used to backfill + ``CodeGenState.total_tasks`` since nothing else writes that field after + ``plan_tasks`` runs (see plan A2). + + The tasks.json path is derived from ``state_path`` (assumed to live in + the same ``.rpgkit/data/`` directory) so callers passing a custom + state_path see the matching tasks.json instead of the workspace + default. + """ + try: + from .task_batch import load_tasks_from_tasks_json + tasks_path = state_path.parent / "tasks.json" + if not tasks_path.exists(): + return 0 + return len(load_tasks_from_tasks_json(tasks_path)) + except Exception as exc: + logging.debug("total_tasks backfill skipped: %s", exc) + return 0 + + +def _maybe_backfill_total_tasks( + state: CodeGenState, + state_path: Path = STATE_FILE, +) -> CodeGenState: + """Ensure ``state.total_tasks`` reflects the current tasks.json size. + + The field defaults to 0 because ``CodeGenState`` is constructed before + ``plan_tasks`` produces tasks.json. Backfilling on each load keeps the + persisted state in sync with the actual task count without requiring + every call site to remember to update it (see plan A2). + """ + if state.total_tasks > 0: + return state + counted = _count_total_tasks_from_tasks_json(state_path) + if counted > 0: + state.total_tasks = counted + return state + + +def load_code_gen_state(state_path: Path = STATE_FILE) -> CodeGenState: + """Load the global code generation state from JSONL file (last valid line). + + The state file uses JSONL (JSON Lines) format where each line is a complete + JSON snapshot of the state. Reading the last valid line gives the latest state. + If the last line is corrupted (e.g. due to a write failure), the second-to-last + line is used as a fallback. + """ + if not state_path.exists(): + return _maybe_backfill_total_tasks( + CodeGenState(started_at=datetime.now().isoformat()), + state_path, + ) + + try: + with open(state_path, 'r', encoding='utf-8') as f: + lines = f.readlines() + + # Try lines from last to first, skipping empty/whitespace lines + for line in reversed(lines): + line = line.strip() + if not line: + continue + try: + data = json.loads(line) + return _maybe_backfill_total_tasks( + CodeGenState.from_dict(data), state_path, + ) + except json.JSONDecodeError: + logging.warning("Skipping corrupted state line, trying previous line") + continue + + # All lines corrupted or file empty + logging.warning("All lines in state file are corrupted or empty, starting fresh") + return _maybe_backfill_total_tasks( + CodeGenState(started_at=datetime.now().isoformat()), + state_path, + ) + except Exception as e: + logging.warning(f"Failed to load code gen state: {e}") + return _maybe_backfill_total_tasks( + CodeGenState(started_at=datetime.now().isoformat()), + state_path, + ) + + +def save_code_gen_state(state: CodeGenState, state_path: Path = STATE_FILE) -> None: + """Append the global code generation state as a new line to the JSONL file. + + Each save appends a single JSON line. This is crash-safe: if the append + fails mid-write, the previous lines remain intact and can be recovered. + + A diagnostic check logs if the new state has fewer completed/failed IDs + than the persisted state, but does NOT auto-restore them (to avoid + deadlocks caused by phantom restored IDs). + """ + state_path.parent.mkdir(parents=True, exist_ok=True) + state.last_updated = datetime.now().isoformat() + + # Defensive check: warn if the state being saved has fewer completed/failed + # task IDs than the last persisted state. We intentionally do NOT auto-restore + # the "lost" IDs because: + # 1. A --skip operation legitimately does not add to completed_task_ids. + # 2. Auto-restoring IDs from a corrupted/externally-modified state file + # can create phantom "all tasks completed" states and deadlocks. + # The warning is logged so operators can investigate if needed. + if state_path.exists(): + try: + existing = load_code_gen_state(state_path) + existing_completed = set(existing.completed_task_ids) + existing_failed = set(existing.failed_task_ids) + new_completed = set(state.completed_task_ids) + new_failed = set(state.failed_task_ids) + + lost_completed = existing_completed - new_completed + lost_failed = existing_failed - new_failed + + if lost_completed: + logging.info( + f"State save: {len(lost_completed)} completed task IDs in persisted " + f"state are not in new state (this is expected after --skip or state reset)." + ) + + if lost_failed: + logging.info( + f"State save: {len(lost_failed)} failed task IDs in persisted " + f"state are not in new state (this is expected after retry or state reset)." + ) + except Exception: + pass # If check fails, proceed with normal save + + # Serialize โ€” truncate redundant large text fields in the persisted copy to + # keep the state file small. The in-memory state object is NOT modified. + # NOTE: test_prompt and code_prompt are NOT truncated because the + # orchestrator's sub-agent prompts load them back from the persisted state. + state_dict = state.to_dict() + batch_dict = state_dict.get('current_batch_state') + if batch_dict and isinstance(batch_dict, dict): + _MAX_OUTPUT_PERSIST = 8000 # chars of test output kept + # Truncate last_test_output โ€” keep enough for the failure-analysis + # handler to work with (it reads from persisted state). Pytest error + # summaries appear at the end, so keep the tail when truncating. + if isinstance(batch_dict.get('last_test_output'), str): + output = batch_dict['last_test_output'] + if len(output) > _MAX_OUTPUT_PERSIST: + head_size = 1000 + tail_size = _MAX_OUTPUT_PERSIST - head_size + batch_dict['last_test_output'] = ( + output[:head_size] + + '\n...(middle truncated)...\n' + + output[-tail_size:] + ) + # Truncate iteration history โ€” keep only the last 2 entries + iterations = batch_dict.get('iterations', []) + if isinstance(iterations, list) and len(iterations) > 2: + batch_dict['iterations'] = iterations[-2:] + + line = json.dumps(state_dict, ensure_ascii=False) + with open(state_path, 'a', encoding='utf-8') as f: + f.write(line + '\n') + + # Auto-compact: when file exceeds threshold, archive old lines and keep + # only the most recent N snapshots. The previous threshold was 50 MB + # which never triggered in practice, leaving the state file at ~880 KB + # after a single 100-batch run because every save dumps the full state. + # Lowered to 200 KB and we keep the last 20 snapshots so debugging can + # still walk back a few steps without bloating the file (plan E3). + _COMPACT_THRESHOLD = 200 * 1024 # 200 KB + _KEEP_LAST_N = 20 # snapshots retained after compact + try: + file_size = state_path.stat().st_size + if file_size > _COMPACT_THRESHOLD: + with open(state_path, 'r', encoding='utf-8') as f: + all_lines = f.readlines() + if len(all_lines) > _KEEP_LAST_N: + archive_path = state_path.with_suffix('.jsonl.archive') + with open(archive_path, 'a', encoding='utf-8') as af: + af.writelines(all_lines[:-_KEEP_LAST_N]) + # Write the compacted file to a temp location first, then + # atomically rename to avoid data loss on crash. + tmp_path = state_path.with_suffix('.jsonl.tmp') + with open(tmp_path, 'w', encoding='utf-8') as f: + f.writelines(all_lines[-_KEEP_LAST_N:]) + tmp_path.replace(state_path) # atomic on POSIX + kept_size = sum(len(l) for l in all_lines[-_KEEP_LAST_N:]) + logging.info( + f"State file compacted: archived {len(all_lines)-_KEEP_LAST_N} lines, " + f"kept last {_KEEP_LAST_N} " + f"({file_size/1024:.0f}KB -> {kept_size/1024:.0f}KB)" + ) + except Exception as e: + logging.warning(f"State file compaction failed (non-fatal): {e}") + + +def get_or_create_batch_state( + batch_id: str, + file_path: str = "", + subtree: str = "", + state_path: Path = STATE_FILE +) -> BatchExecutionState: + """Get existing batch state or create a new one. + + If a batch is already in progress, returns its state. + Otherwise creates a new BatchExecutionState. + """ + global_state = load_code_gen_state(state_path) + + # Check if this batch is already in progress + if global_state.current_batch_id == batch_id and global_state.current_batch_state: + return BatchExecutionState.from_dict(global_state.current_batch_state) + + # Check if already completed + if batch_id in global_state.completed_task_ids: + raise ValueError(f"Batch {batch_id} is already completed") + + # Create new batch state + batch_state = BatchExecutionState( + batch_id=batch_id, + file_path=file_path, + subtree=subtree, + started_at=datetime.now().isoformat() + ) + + return batch_state + + +def update_batch_state( + batch_state: BatchExecutionState, + state_path: Path = STATE_FILE +) -> None: + """Update the batch state in the global state file.""" + global_state = load_code_gen_state(state_path) + global_state.set_current_batch(batch_state) + save_code_gen_state(global_state, state_path) + + +def complete_batch( + batch_id: str, + success: bool, + state_path: Path = STATE_FILE, + rpg_backup_path: Optional[str] = None +) -> None: + """Mark a batch as complete (success or failure). + + Args: + batch_id: The batch to complete. + success: Whether the batch succeeded. + state_path: Path to state file. + rpg_backup_path: If provided, update rpg_backup_path atomically + with the completion (avoids stale intermediate saves). + """ + global_state = load_code_gen_state(state_path) + + if global_state.current_batch_id == batch_id: + if rpg_backup_path: + global_state.rpg_backup_path = rpg_backup_path + global_state.complete_current_batch(success) + save_code_gen_state(global_state, state_path) + + +def skip_current_batch(batch_id: str, state_path: Path = STATE_FILE) -> bool: + """Clear the current batch without marking it completed or failed. + + Used when an out-of-band condition (e.g. the batch branch went missing + because the sub-agent committed to main directly) prevents the batch + from being merged, but is not a code-quality failure. The batch_id is + recorded in ``skipped_task_ids`` for observability, yet remains absent + from ``completed_task_ids`` and ``failed_task_ids`` so the next + ``--next`` invocation re-attempts it without consuming a retry slot + (see plan A3). + + Loop guard: ``batch_prepare_counts[batch_id]`` is incremented on each + skip; once it reaches ``_MAX_BATCH_PREPARES`` the batch is recorded + in ``failed_task_ids`` instead, so a sub-agent that keeps making the + same mistake (e.g. always committing to main) cannot trap the + workflow in an infinite skip loop. + + Returns ``True`` when the skip succeeded, ``False`` when the loop + guard converted the skip into a hard failure. + """ + global_state = load_code_gen_state(state_path) + if global_state.current_batch_id != batch_id: + return False + + # Increment skip counter and check the loop guard. We reuse + # batch_prepare_counts since it's already wired into the orchestrator's + # batch-prep flow and asdict() persists it through to_dict(). + skip_count = global_state.batch_prepare_counts.get(batch_id, 0) + 1 + global_state.batch_prepare_counts[batch_id] = skip_count + + max_skips = CodeGenState._MAX_BATCH_PREPARES + if skip_count >= max_skips: + # Promote to a hard failure so the orchestrator stops picking + # this task up. The operator can investigate and either --retry + # (which clears failed_task_ids) or --skip explicitly. + if batch_id not in global_state.failed_task_ids: + global_state.failed_task_ids.append(batch_id) + global_state.failed_tasks = len(global_state.failed_task_ids) + global_state.current_batch_id = None + global_state.current_batch_state = None + global_state.last_updated = datetime.now().isoformat() + save_code_gen_state(global_state, state_path) + logging.warning( + "Batch %s skipped %d times (limit: %d) โ€” promoted to failed_task_ids", + batch_id, skip_count, max_skips, + ) + return False + + if batch_id not in global_state.skipped_task_ids: + global_state.skipped_task_ids.append(batch_id) + global_state.current_batch_id = None + global_state.current_batch_state = None + global_state.last_updated = datetime.now().isoformat() + save_code_gen_state(global_state, state_path) + return True + + +def reset_rpg_backup_tracking(state_path: Path = STATE_FILE) -> None: + """Reset the RPG backup tracking for a fresh code_gen session. + + Call this when you want to force a new backup on the next batch completion. + """ + global_state = load_code_gen_state(state_path) + global_state.rpg_backup_path = None + save_code_gen_state(global_state, state_path) + + +def get_or_create_code_gen_trajectory( + global_state: 'CodeGenState', + base_dir: Path = None, + state_path: Path = STATE_FILE +) -> 'Any': + """Get existing or create new Trajectory for the code_gen workflow. + + All scripts in the code_gen pipeline share the same trajectory file + within a single code_gen session. The trajectory file path is stored + in CodeGenState.trajectory_file. + + Returns: + Trajectory instance (loaded or newly created) + """ + # Trajectory files live under .rpgkit/data/trajectory/ (workspace level), + # not inside repo/, so base_dir should be the workspace root. + base_dir = base_dir or WORKSPACE_ROOT + + # Try to load existing trajectory + if global_state.trajectory_file: + traj_path = base_dir / global_state.trajectory_file + if traj_path.exists(): + traj = Trajectory("code_gen", base_dir) + traj.trajectory_file = traj_path + if traj.load(): + return traj + + # Create new trajectory + traj = Trajectory("code_gen", base_dir) + traj.start({"workflow": "code_gen"}) + + # Save trajectory file path in state (relative to base_dir) + try: + rel_path = traj.trajectory_file.relative_to(base_dir) + global_state.trajectory_file = str(rel_path) + except ValueError: + global_state.trajectory_file = str(traj.trajectory_file) + save_code_gen_state(global_state, state_path) + + return traj diff --git a/RPG-Kit/scripts/common/git_utils.py b/RPG-Kit/scripts/common/git_utils.py new file mode 100644 index 0000000..68f28f4 --- /dev/null +++ b/RPG-Kit/scripts/common/git_utils.py @@ -0,0 +1,853 @@ +#!/usr/bin/env python3 +"""Git Utilities for RPG-Kit Code Generation. + +Provides Git operations for branch management and version control +during the code generation phase: +- Branch creation, switching, and deletion +- Commit and merge operations with conflict detection +- Stash management for safe branch switching +- Task branch lifecycle (create / merge / abandon) +""" + +import logging +import subprocess +from pathlib import Path +from typing import Optional, Tuple, List, Dict +from dataclasses import dataclass + + +@dataclass +class GitResult: + """Result of a Git command execution.""" + success: bool + stdout: str = "" + stderr: str = "" + returncode: int = 0 + + +class GitRunner: + """Git command runner for code generation workflow. + + Handles: + - Branch creation and switching + - Commits and merges + - Stash operations + - Safe directory handling + """ + + # The canonical main branch name used by all RPG-Kit repos. + MAIN_BRANCH = "main" + + def __init__( + self, + repo_path: str, + main_branch: str = "main", + logger: Optional[logging.Logger] = None + ): + self.repo_path = Path(repo_path) + self.logger = logger or logging.getLogger(__name__) + self.main_branch = self.MAIN_BRANCH + + # Ensure repo exists and is a git repo + self._ensure_git_repository() + + def run_git( + self, + args: List[str], + check: bool = False, + capture_output: bool = True + ) -> GitResult: + """Run a git subcommand (automatically prepends 'git'). + + Args: + args: Git subcommand arguments (e.g., ["add", "-A"]) + check: Raise exception on failure + capture_output: Capture stdout/stderr + + Returns: + GitResult with success status and output + """ + cmd = ["git"] + args + try: + result = subprocess.run( + cmd, + cwd=self.repo_path, + capture_output=capture_output, + text=True, + timeout=60 + ) + + git_result = GitResult( + success=result.returncode == 0, + stdout=result.stdout.strip() if result.stdout else "", + stderr=result.stderr.strip() if result.stderr else "", + returncode=result.returncode + ) + + if check and not git_result.success: + raise subprocess.CalledProcessError( + result.returncode, cmd, result.stdout, result.stderr + ) + + return git_result + + except subprocess.TimeoutExpired: + self.logger.error(f"Git command timed out: {' '.join(cmd)}") + return GitResult(success=False, stderr="Command timed out", returncode=-1) + except Exception as e: + self.logger.error(f"Git command failed: {e}") + return GitResult(success=False, stderr=str(e), returncode=-1) + + def _ensure_git_repository(self) -> None: + """Ensure the repository is a valid git repo with 'main' as the default branch.""" + git_dir = self.repo_path / ".git" + + if not git_dir.exists(): + self.logger.info("Initializing git repository...") + self.repo_path.mkdir(parents=True, exist_ok=True) + self.run_git(["init", "-b", self.MAIN_BRANCH]) + + # Configure safe directory + self.run_git([ + "config", "--global", "--add", + "safe.directory", str(self.repo_path.resolve()) + ]) + + def get_current_branch(self) -> str: + """Get the name of the current branch.""" + result = self.run_git(["branch", "--show-current"]) + return result.stdout if result.success else "" + + def get_head_commit(self) -> str: + """Get the current HEAD commit hash.""" + result = self.run_git(["rev-parse", "HEAD"]) + return result.stdout if result.success else "" + + def get_main_branch_commit(self) -> Optional[str]: + """Get the commit hash of the main branch.""" + result = self.run_git(["rev-parse", self.main_branch]) + return result.stdout if result.success else None + + def has_uncommitted_changes(self) -> bool: + """Check if there are uncommitted changes.""" + result = self.run_git(["status", "--porcelain"]) + return bool(result.stdout) if result.success else False + + def create_branch(self, branch_name: str, from_branch: Optional[str] = None) -> bool: + """Create a new branch. + + Args: + branch_name: Name of the new branch + from_branch: Branch to create from (default: current) + + Returns: + True if successful + """ + if from_branch: + result = self.run_git(["checkout", "-b", branch_name, from_branch]) + else: + result = self.run_git(["checkout", "-b", branch_name]) + + if result.success: + self.logger.info(f"Created branch: {branch_name}") + else: + self.logger.error(f"Failed to create branch: {result.stderr}") + + return result.success + + def switch_branch(self, branch_name: str, force: bool = False) -> bool: + """Switch to an existing branch. + + Args: + branch_name: Branch to switch to + force: Force switch even with uncommitted changes + + Returns: + True if successful + """ + args = ["checkout"] + if force: + args.append("-f") + args.append(branch_name) + + result = self.run_git(args) + + if result.success: + self.logger.info(f"Switched to branch: {branch_name}") + else: + self.logger.error(f"Failed to switch branch: {result.stderr}") + + return result.success + + def branch_exists(self, branch_name: str) -> bool: + """Check if a branch exists.""" + result = self.run_git(["rev-parse", "--verify", branch_name]) + return result.success + + def stage_all(self) -> bool: + """Stage all changes.""" + result = self.run_git(["add", "-A"]) + return result.success + + def commit(self, message: str) -> Tuple[bool, str]: + """Commit staged changes. + + Args: + message: Commit message + + Returns: + Tuple of (success, commit_hash) + """ + # Check if there are changes to commit + result = self.run_git(["diff", "--staged", "--quiet"]) + if result.success: + self.logger.info("No changes to commit") + return True, self.get_head_commit() + + # Commit + result = self.run_git(["commit", "-m", message]) + if result.success: + commit_hash = self.get_head_commit() + self.logger.info(f"Committed: {commit_hash[:8]}") + return True, commit_hash + else: + self.logger.error(f"Commit failed: {result.stderr}") + return False, "" + + def stage_and_commit(self, message: str) -> Tuple[bool, str]: + """Stage all changes and commit. + + Returns: + Tuple of (success, commit_hash) + """ + self.stage_all() + return self.commit(message) + + def merge_branch( + self, + source_branch: str, + target_branch: Optional[str] = None, + no_ff: bool = True, + message: Optional[str] = None + ) -> Tuple[bool, Optional[str]]: + """Merge a branch into target (default: main). + + Args: + source_branch: Branch to merge from + target_branch: Branch to merge into (default: main) + no_ff: Use --no-ff flag + message: Custom merge commit message (default: auto-generated) + + Returns: + Tuple of (success, error_type) + - success: True if merge succeeded + - error_type: None on success, or one of: + 'uncommitted_changes', 'switch_failed', 'merge_conflict', 'merge_failed' + """ + target = target_branch or self.main_branch + + # Check for uncommitted changes before switching + if self.has_uncommitted_changes(): + self.logger.error("Cannot merge: uncommitted changes exist") + return False, "uncommitted_changes" + + # Switch to target branch + if not self.switch_branch(target): + return False, "switch_failed" + + # Merge + args = ["merge"] + if no_ff: + args.append("--no-ff") + merge_msg = message or f"Merge branch '{source_branch}'" + args.extend(["-m", merge_msg, source_branch]) + + result = self.run_git(args) + + if result.success: + self.logger.info(f"Merged {source_branch} into {target}") + return True, None + + # Check if it's a merge conflict + if "CONFLICT" in result.stdout or "CONFLICT" in result.stderr: + self.logger.error("Merge conflict detected, aborting merge") + self.run_git(["merge", "--abort"]) + return False, "merge_conflict" + + self.logger.error(f"Merge failed: {result.stderr}") + return False, "merge_failed" + + def delete_branch(self, branch_name: str, force: bool = False) -> bool: + """Delete a branch.""" + flag = "-D" if force else "-d" + result = self.run_git(["branch", flag, branch_name]) + return result.success + + def reset_hard(self, commit: Optional[str] = None) -> bool: + """Hard reset to a commit. + + Args: + commit: Commit to reset to (default: HEAD) + + Returns: + True if successful + """ + args = ["reset", "--hard"] + if commit: + args.append(commit) + + result = self.run_git(args) + + if result.success: + self.logger.info(f"Reset to: {commit or 'HEAD'}") + else: + self.logger.error(f"Reset failed: {result.stderr}") + + return result.success + + def stash(self, message: Optional[str] = None) -> bool: + """Stash current changes.""" + args = ["stash", "push"] + if message: + args.extend(["-m", message]) + + result = self.run_git(args) + return result.success + + def stash_if_dirty(self, message: Optional[str] = None) -> Tuple[bool, bool]: + """Stash changes only if there are uncommitted changes. + + Args: + message: Optional stash message + + Returns: + Tuple of (success, was_dirty) + - success: True if operation succeeded (including when no stash needed) + - was_dirty: True if there were changes that got stashed + """ + if not self.has_uncommitted_changes(): + return True, False + + stash_msg = message or "auto-stash before git operation" + success = self.stash(stash_msg) + if success: + self.logger.info(f"Stashed uncommitted changes: {stash_msg}") + else: + self.logger.error("Failed to stash uncommitted changes") + return success, success + + def stash_pop(self) -> bool: + """Pop the most recent stash.""" + result = self.run_git(["stash", "pop"]) + return result.success + + def get_diff( + self, + from_commit: Optional[str] = None, + to_commit: str = "HEAD" + ) -> str: + """Get diff between commits. + + Args: + from_commit: Start commit (default: parent of to_commit) + to_commit: End commit (default: HEAD) + + Returns: + Diff content as string + """ + if from_commit: + result = self.run_git(["diff", from_commit, to_commit]) + else: + result = self.run_git(["diff", f"{to_commit}^", to_commit]) + + return result.stdout if result.success else "" + + def get_changed_files( + self, + from_commit: Optional[str] = None, + to_commit: str = "HEAD" + ) -> List[str]: + """Get list of files changed between commits. + + Returns: + List of file paths + """ + if from_commit: + result = self.run_git([ + "diff", "--name-only", from_commit, to_commit + ]) + else: + result = self.run_git([ + "diff", "--name-only", f"{to_commit}^", to_commit + ]) + + if result.success and result.stdout: + return result.stdout.split('\n') + return [] + + def ensure_main_branch(self) -> Tuple[bool, str]: + """Ensure we're on the main branch. + + Returns: + Tuple of (success, message) + """ + try: + current_branch = self.get_current_branch() + if not current_branch: + return False, "Failed to get current branch" + + if current_branch == self.main_branch: + return True, f"Already on {self.main_branch} branch" + + if self.switch_branch(self.main_branch): + return True, f"Switched to {self.main_branch} branch" + + return False, f"Could not switch to {self.main_branch} branch (currently on {current_branch})" + + except Exception as e: + return False, f"Git error: {str(e)}" + + def ensure_clean_workspace(self, message: str = "pre-init-codebase") -> bool: + """Stash any uncommitted changes. + + Args: + message: Stash message + + Returns: + True if workspace is clean (or was successfully stashed) + """ + try: + success, _ = self.stash_if_dirty(message) + return success + except Exception: + return False + + +# --------------------------------------------------------------------------- +# Module-level read-only helpers for hooks / status commands +# --------------------------------------------------------------------------- +# +# These functions intentionally avoid the GitRunner class because hooks and +# status commands need: +# 1. No exceptions on missing / shallow / non-git repos (silent failure +# with ``None`` return so the caller falls back gracefully). +# 2. Sub-second timeouts (a slow git call must not stall ``rpgkit init``, +# a pre-commit hook, or VS Code's folderOpen task). +# 3. No mutation of the working tree, index, or any git state. +# +# Used by: +# - rpg.models.RPG.set_git_meta(...) callers +# - scripts/update_graphs.py status output +# - (future Step 3) RPGService.sync_from_commit_diff + +def _run_git_readonly( + args: List[str], + cwd: Path, + timeout: float = 5.0, +) -> Optional[str]: + """Run a read-only git command, return stdout stripped or None on any failure. + + Never raises. Used by helpers below to keep them silent-fail. + """ + try: + result = subprocess.run( + ["git", *args], + cwd=str(cwd), + capture_output=True, + text=True, + timeout=timeout, + check=False, + ) + except (subprocess.TimeoutExpired, FileNotFoundError, OSError): + return None + if result.returncode != 0: + return None + return (result.stdout or "").strip() or None + + +def read_head(repo_dir: str | Path) -> Optional[dict]: + """Read the current git HEAD for ``repo_dir``. + + Returns ``None`` if: + * ``repo_dir`` does not exist + * ``git`` is not on PATH + * ``repo_dir`` is not a git working tree + * The repository has no commits yet (unborn HEAD) + + Otherwise returns a dict with these keys (any individual value may be + ``None`` on best-effort failures, e.g. detached HEAD has no branch): + + { + "head_commit": "8a3f9c1d4e2b...", # 40-char SHA + "head_short": "8a3f9c1", # short SHA + "head_branch": "main" | None, # None on detached HEAD + "head_timestamp": "2026-05-12T08:30:00+00:00", # ISO 8601 UTC + } + + Designed for SessionStart / pre-commit hook use โ€” must never raise and + must complete in well under a second on a healthy repo. + """ + if not repo_dir: + # Empty string would otherwise reach subprocess as cwd="" which + # silently falls back to the caller's working directory โ€” never + # what callers of this helper intend. + return None + repo_path = Path(repo_dir) + if not repo_path.is_dir(): + return None + + head_commit = _run_git_readonly(["rev-parse", "HEAD"], repo_path) + if not head_commit: + return None + + head_short = _run_git_readonly(["rev-parse", "--short", "HEAD"], repo_path) + + # symbolic-ref fails on detached HEAD with exit 128 โ€” that's expected, + # _run_git_readonly returns None and we keep head_branch as None. + head_branch = _run_git_readonly( + ["symbolic-ref", "--short", "HEAD"], repo_path + ) + + # ISO 8601 UTC timestamp of the HEAD commit. + head_timestamp = _run_git_readonly( + ["show", "-s", "--format=%cI", "HEAD"], repo_path + ) + + return { + "head_commit": head_commit, + "head_short": head_short, + "head_branch": head_branch, + "head_timestamp": head_timestamp, + } + + +# --------------------------------------------------------------------------- +# Diff helpers โ€” produce ``(modified, renames)`` from various git scopes. +# --------------------------------------------------------------------------- +# +# Every helper: +# * returns ``(modified: list[str], renames: dict[old, new])``; +# * returns ``([], {})`` on any failure (caller can't distinguish +# "no changes" from "git not available" without consulting +# ``read_head`` first โ€” that's intentional, falling back to full +# sync in either case is safe); +# * filters to ``.py`` files at the source โ€” RPG-Kit doesn't currently +# parse anything else. When that changes, lift the filter into the +# caller. + +# Single-letter status codes that ``git diff --name-status`` emits. +# Anything else (e.g. T = type change, U = unmerged) we ignore; full +# sync will eventually pick those up. +_GIT_STATUS_ADDED = "A" +_GIT_STATUS_DELETED = "D" +_GIT_STATUS_MODIFIED = "M" +_GIT_STATUS_RENAME_PREFIX = "R" # may be followed by similarity score: "R98" +_GIT_STATUS_COPY_PREFIX = "C" + + +def _parse_name_status( + raw: Optional[str], + *, + py_only: bool = True, +) -> Tuple[List[str], Dict[str, str]]: + r"""Parse output of ``git diff --name-status -M``. + + Format per line is tab-separated: + + ``A\\tpath`` โ€” added + ``D\\tpath`` โ€” deleted + ``M\\tpath`` โ€” modified + ``R\\told\\tnew`` โ€” rename (score is similarity 0-100) + ``C\\told\\tnew`` โ€” copy (treated as rename for our purposes) + + Returns: + ``(modified, renames)`` where ``modified`` lists every path that + the dep_graph must re-examine (additions, deletions, plain + modifications, **and** rename targets), and ``renames`` maps old + paths to new paths so callers can pass it straight into + :meth:`DependencyGraph.update_files(renames=...)`. + """ + modified: List[str] = [] + renames: Dict[str, str] = {} + if not raw: + return modified, renames + + def _keep(p: str) -> bool: + return (not py_only) or p.endswith(".py") + + for line in raw.splitlines(): + parts = line.split("\t") + if len(parts) < 2: + continue + status = parts[0] + if status.startswith(_GIT_STATUS_RENAME_PREFIX) or status.startswith( + _GIT_STATUS_COPY_PREFIX + ): + if len(parts) < 3: + continue + old_path, new_path = parts[1], parts[2] + if _keep(new_path) or _keep(old_path): + renames[old_path] = new_path + # update_files() treats the OLD path as a deletion (via + # ``renames``) and the NEW path as something it must + # reparse โ€” so we surface the new path through the + # modified list as well. + if _keep(new_path): + modified.append(new_path) + continue + path = parts[1] + if not _keep(path): + continue + if status in (_GIT_STATUS_ADDED, _GIT_STATUS_DELETED, _GIT_STATUS_MODIFIED): + modified.append(path) + # Type / unmerged / other status letters โ†’ ignore (caller will + # fall back to full sync via the safety threshold if there are + # many of them). + return modified, renames + + +def staged_changes( + repo_dir: str | Path, +) -> Tuple[List[str], Dict[str, str]]: + """Return the paths in the **index** (i.e. ``git add``'d) vs HEAD. + + Used by the pre-commit hook: at hook time the new commit hasn't + been recorded yet, so the right scope is "what's about to be + committed" = index vs HEAD. Anything in the working tree that + hasn't been ``git add``'d is intentionally out of scope. + + Silent-fail: returns ``([], {})`` if not a git repo / git missing / + timeout. The caller's safety net is to fall back to full sync. + """ + if not repo_dir: + return [], {} + repo_path = Path(repo_dir) + if not repo_path.is_dir(): + return [], {} + raw = _run_git_readonly( + ["diff", "--cached", "--name-status", "-M", "HEAD"], + repo_path, + ) + if raw is None: + # ``HEAD`` may not exist yet (unborn branch); try without it so + # the very first commit's staged files still get picked up. + raw = _run_git_readonly( + ["diff", "--cached", "--name-status", "-M"], + repo_path, + ) + return _parse_name_status(raw) + + +def working_tree_changes( + repo_dir: str | Path, + *, + include_untracked: bool = True, +) -> Tuple[List[str], Dict[str, str]]: + """Return tracked-and-modified + (optionally) untracked paths vs HEAD. + + Used by the **manual** ``update_graphs.py sync`` invocation (i.e. + when a user runs it from the CLI without ``--staged-only``). This + covers everything dirty on disk, regardless of whether it's been + ``git add``'d. + + Untracked files are reported as additions (no rename pairing + possible since they have no git history). + + Silent-fail like its siblings. + """ + if not repo_dir: + return [], {} + repo_path = Path(repo_dir) + if not repo_path.is_dir(): + return [], {} + + raw = _run_git_readonly( + ["diff", "--name-status", "-M", "HEAD"], + repo_path, + ) + modified, renames = _parse_name_status(raw) + + if include_untracked: + untracked_raw = _run_git_readonly( + ["ls-files", "--others", "--exclude-standard"], + repo_path, + ) + if untracked_raw: + for line in untracked_raw.splitlines(): + line = line.strip() + if line.endswith(".py") and line not in modified: + modified.append(line) + return modified, renames + + +def changed_files_between( + repo_dir: str | Path, + old_ref: str, + new_ref: str = "HEAD", +) -> Tuple[List[str], Dict[str, str]]: + """Return ``.py`` changes between two arbitrary commits / refs. + + This is the workhorse for incremental sync: ``old_ref`` is the + commit RPG was last synced against (from ``meta.git.head_commit``) + and ``new_ref`` is typically the current HEAD. Git stitches + together every intermediate commit's diff for us, so this handles + "user committed 5 times since last sync" naturally. + + Silent-fail returns ``([], {})``. An empty list is **ambiguous**: + it could mean "no .py files changed" or "old_ref doesn't exist any + more in the current history". The caller is responsible for + pre-checking the relationship via :func:`merge_base` before + interpreting this output as "incremental is safe". + """ + if not repo_dir or not old_ref: + return [], {} + repo_path = Path(repo_dir) + if not repo_path.is_dir(): + return [], {} + raw = _run_git_readonly( + ["diff", "--name-status", "-M", f"{old_ref}..{new_ref}"], + repo_path, + ) + return _parse_name_status(raw) + + +def merge_base( + repo_dir: str | Path, + ref_a: str, + ref_b: str, +) -> Optional[str]: + """Return the longest common ancestor commit of ``ref_a`` and ``ref_b``. + + Used by ``RPGService.sync_from_commit_diff`` to decide whether + ``meta.git.head_commit`` is still on the current history: + + * ``merge_base(last, HEAD) == last`` โ†’ linear advance, safe to + diff ``last..HEAD`` for incremental update. + * ``merge_base(last, HEAD) != last`` โ†’ history was rewritten + (rebase, amend, reset, branch fork); must fall back to full + sync because ``last..HEAD`` would mix unrelated changes. + + Returns ``None`` on any failure โ€” caller treats this the same as + "diverged" and falls back to full sync. + """ + if not repo_dir or not ref_a or not ref_b: + return None + repo_path = Path(repo_dir) + if not repo_path.is_dir(): + return None + return _run_git_readonly( + ["merge-base", ref_a, ref_b], + repo_path, + ) + + +def create_task_branch( + repo_path: str, + batch_id: str, + stash_if_dirty: bool = True +) -> Tuple[bool, str, str]: + """Create a new branch for a task batch, always from latest main HEAD. + + Key invariants (serial workflow, no concurrent batches): + 1. Always switch to main first โ€” branches are NEVER created from other + task branches. + 2. If a branch with the same name already exists (e.g., from a previous + failed run), delete it and recreate from current main HEAD. Reusing + a stale branch causes merge conflicts because the old fork point is + behind main. + 3. initial_commit is recorded AFTER switching to main, so it always + reflects the latest main HEAD. + + Args: + repo_path: Path to the repository + batch_id: ID of the batch (used in branch name) + stash_if_dirty: If True, stash uncommitted changes before switching + + Returns: + Tuple of (success, branch_name, initial_commit) + """ + git = GitRunner(repo_path) + + # Create sanitized branch name + safe_id = batch_id.replace("/", "_").replace("\\", "_")[:50] + branch_name = f"task/{safe_id}" + + # Handle uncommitted changes + was_stashed = False + if stash_if_dirty: + success, was_stashed = git.stash_if_dirty(f"pre-task-{safe_id}") + if not success: + return False, "", "" + + # ALWAYS switch to main first โ€” this is the core invariant. + # Branches must fork from latest main HEAD, never from another task branch. + current_branch = git.get_current_branch() + if current_branch != git.main_branch: + if git.branch_exists(git.main_branch): + if not git.switch_branch(git.main_branch): + if was_stashed: + git.stash_pop() + return False, "", "" + else: + git.logger.warning("Main branch does not exist, creating branch from current HEAD") + + initial_commit = git.get_head_commit() + + # If a branch with this name already exists (from a previous failed run), + # delete it first. The old branch has a stale fork point that would cause + # merge conflicts. We recreate from the current (latest) main HEAD. + if git.branch_exists(branch_name): + git.logger.info( + f"Deleting stale branch {branch_name} (will recreate from main HEAD)" + ) + git.delete_branch(branch_name, force=True) + + # Create new branch from current main HEAD + success = git.create_branch(branch_name) + + # Restore stashed changes regardless of branch creation outcome + if was_stashed: + git.stash_pop() + + return success, branch_name, initial_commit + + +def complete_task_branch( + repo_path: str, + branch_name: str, + success: bool, +) -> Tuple[bool, Optional[str]]: + """Complete a task branch by merging (success) or abandoning (failure). + + Args: + repo_path: Path to the repository + branch_name: Task branch name + success: Whether the task succeeded + + Returns: + Tuple of (success, error_type) + - success: True if operation succeeded + - error_type: None on success, or error description + """ + git = GitRunner(repo_path) + + # Check for uncommitted changes + if git.has_uncommitted_changes(): + git.logger.warning("Uncommitted changes detected, committing before branch completion") + commit_success, _ = git.stage_and_commit(f"WIP: auto-commit before completing {branch_name}") + if not commit_success: + return False, "commit_failed" + + if success: + # Merge the branch + merge_success, error_type = git.merge_branch(branch_name) + if merge_success: + # Delete the task branch + git.delete_branch(branch_name) + return True, None + return False, error_type + else: + # Abandon the branch - switch to main + if git.switch_branch(git.main_branch): + return True, None + return False, "switch_failed" diff --git a/RPG-Kit/scripts/common/import_normalizer.py b/RPG-Kit/scripts/common/import_normalizer.py new file mode 100644 index 0000000..b0f2203 --- /dev/null +++ b/RPG-Kit/scripts/common/import_normalizer.py @@ -0,0 +1,646 @@ +#!/usr/bin/env python3 +"""Import Normalizer โ€” Detect and fix inconsistent import prefixes. + +This module solves the problem where LLM-generated skeleton code uses bare +package names (``from vibeanim.foo import Bar``) while the project layout +requires a ``src.`` prefix (``from src.vibeanim.foo import Bar``). + +Usage:: + + from common.import_normalizer import ( + detect_project_import_prefix, + normalize_code, + normalize_files, + ) + + # Detect the correct prefix from file paths or repo layout + prefix = detect_project_import_prefix(repo_path) + # e.g. "src.vibeanim" + + # Normalize a code string before writing to disk + fixed_code = normalize_code(code_string, prefix) + + # Normalize all .py files in the repo (safety net before testing) + changed_files = normalize_files(repo_path, prefix) + +Design: + - ``detect_project_import_prefix`` inspects the repo directory structure + to determine the correct import prefix (e.g. ``src.vibeanim``). + - ``normalize_code`` rewrites import lines in a code string. + - ``normalize_files`` scans all ``.py`` files under ``src/`` and ``tests/`` + and fixes imports on disk. + +All three can also accept an ``interfaces.json``-style dict to derive the +prefix from file paths inside the JSON, for use before the repo exists. +""" + +import logging +import re +from pathlib import Path +from typing import Dict, List, Optional, Any +import ast as _ast + +logger = logging.getLogger(__name__) + + +# ============================================================================ +# Prefix Detection +# ============================================================================ + +def detect_project_import_prefix( + repo_path: Optional[Path] = None, + interfaces_subtrees: Optional[Dict[str, Any]] = None, +) -> Optional[str]: + """Detect the correct import prefix for this project. + + Checks two sources (in priority order): + + 1. **Repo directory structure** โ€” if ``repo_path/src//`` exists, + the prefix is ``src.``. + 2. **interfaces.json subtrees** โ€” if file paths start with ``src//``, + the prefix is ``src.``. + + Args: + repo_path: Path to the project repository root. + interfaces_subtrees: The ``subtrees`` dict from interfaces.json. + + Returns: + The import prefix string (e.g. ``"src.vibeanim"``) or *None*. + """ + # Strategy 1: on-disk directory structure + if repo_path is not None: + prefix = _detect_from_directory(repo_path) + if prefix: + return prefix + + # Strategy 2: interfaces.json file paths + if interfaces_subtrees is not None: + prefix = _detect_from_interfaces(interfaces_subtrees) + if prefix: + return prefix + + return None + + +def _detect_from_directory(repo_path: Path) -> Optional[str]: + """Detect prefix from ``repo_path/src//``.""" + src_dir = repo_path / "src" + if not src_dir.is_dir(): + return None + + candidates: Dict[str, int] = {} + for child in src_dir.iterdir(): + if child.is_dir() and not child.name.startswith((".", "_")): + # Count .py files to confirm it's a real package + py_count = sum(1 for _ in child.rglob("*.py")) + if py_count > 0: + candidates[child.name] = py_count + + if not candidates: + return None + + best_pkg = max(candidates, key=candidates.get) + return f"src.{best_pkg}" + + +def _detect_from_interfaces(subtrees: Dict[str, Any]) -> Optional[str]: + """Detect prefix from file paths in interfaces.json subtrees.""" + counts: Dict[str, int] = {} + for subtree_data in subtrees.values(): + for file_path in subtree_data.get("interfaces", {}): + parts = file_path.replace("\\", "/").split("/") + if len(parts) >= 2 and parts[0] == "src": + key = f"src.{parts[1]}" + counts[key] = counts.get(key, 0) + 1 + + if not counts: + return None + + return max(counts, key=counts.get) + + +# ============================================================================ +# Code Normalization +# ============================================================================ + +def normalize_code(code: str, expected_prefix: str) -> str: + """Rewrite import lines in *code* to use the correct prefix. + + If *expected_prefix* is ``"src.vibeanim"`` and the code contains + ``from vibeanim.foo import Bar``, it becomes + ``from src.vibeanim.foo import Bar``. + + Already-correct lines (``from src.vibeanim.โ€ฆ``) are left untouched. + String literals and comments are not modified. + + Args: + code: Python source code string. + expected_prefix: The full correct prefix (e.g. ``"src.vibeanim"``). + + Returns: + Normalized source code string. + """ + if not code or not expected_prefix: + return code + + parts = expected_prefix.split(".", 1) + if len(parts) != 2 or parts[0] != "src": + return code + + bare_package = parts[1] # e.g. "vibeanim" + + pattern = re.compile( + r"^(\s*(?:from|import)\s+)" + re.escape(bare_package) + r"\b", + re.MULTILINE, + ) + + def _replace(m: re.Match) -> str: + return m.group(1) + expected_prefix + + return pattern.sub(_replace, code) + + +# ============================================================================ +# File Normalization (on-disk) +# ============================================================================ + +def normalize_files( + repo_path: Path, + prefix: Optional[str] = None, +) -> List[str]: + """Scan ``.py`` files under ``src/`` and ``tests/`` and fix import prefixes. + + This is a safety-net step that should run before pytest to catch any + imports that slipped through earlier normalization (e.g. from code + generated by sub-agents during the TDD loop). + + Args: + repo_path: Root of the project repository. + prefix: Expected import prefix. Auto-detected if not given. + + Returns: + List of file paths (relative to *repo_path*) that were modified. + """ + if prefix is None: + prefix = detect_project_import_prefix(repo_path=repo_path) + + if not prefix: + return [] + + parts = prefix.split(".", 1) + if len(parts) != 2 or parts[0] != "src": + return [] + + bare_package = parts[1] + + # Only search if src/ exists + if not (repo_path / "src" / bare_package).is_dir(): + return [] + + pattern = re.compile( + r"^(\s*(?:from|import)\s+)" + re.escape(bare_package) + r"\b", + ) + + modified_files: List[str] = [] + search_dirs = [repo_path / "src", repo_path / "tests"] + + for search_dir in search_dirs: + if not search_dir.is_dir(): + continue + for py_file in search_dir.rglob("*.py"): + try: + original = py_file.read_text(encoding="utf-8") + except OSError: + continue + + lines = original.split("\n") + changed = False + new_lines: List[str] = [] + + for line in lines: + stripped = line.lstrip() + # Skip string literals and comments + if stripped.startswith(("'", '"', "#")): + new_lines.append(line) + continue + + m = pattern.match(stripped) + if m: + new_line = line.replace( + m.group(0), + m.group(1) + prefix, + 1, + ) + new_lines.append(new_line) + changed = True + else: + new_lines.append(line) + + if changed: + py_file.write_text("\n".join(new_lines), encoding="utf-8") + rel = str(py_file.relative_to(repo_path)) + modified_files.append(rel) + logger.info("Fixed import prefixes in %s", rel) + + return modified_files + + +# ============================================================================ +# Future Annotations & Missing Imports +# ============================================================================ + +_FUTURE_ANNOTATIONS = "from __future__ import annotations" + + +def ensure_future_annotations(code: str) -> str: + """Ensure ``from __future__ import annotations`` is at the top of the file. + + If already present but in the wrong position (after other imports), + it is moved to the correct position. Python requires ``__future__`` + imports to appear before any other code or imports. + + Args: + code: Python source code string. + + Returns: + Code with the future import guaranteed at the very top + (after shebang / encoding declarations only). + """ + if not code or not code.strip(): + return code + + lines = code.split("\n") + + # Remove any existing __future__ annotations line (may be misplaced) + had_future = False + filtered_lines = [] + for line in lines: + if line.strip() == _FUTURE_ANNOTATIONS: + had_future = True + continue + filtered_lines.append(line) + + # If code doesn't use any type annotations and didn't have __future__, + # add it anyway for safety (skeleton files almost always need it) + + # Find insertion point: after shebang (#!) and encoding (# -*- coding) + insert_idx = 0 + for i, line in enumerate(filtered_lines): + stripped = line.strip() + if i == 0 and stripped.startswith("#!"): + insert_idx = 1 + continue + if stripped.startswith("# -*-") or stripped.startswith("# coding"): + insert_idx = i + 1 + continue + break + + filtered_lines.insert(insert_idx, _FUTURE_ANNOTATIONS) + return "\n".join(filtered_lines) + + +# Common standard-library symbols that often appear in type annotations +# but are forgotten in skeleton imports. +_STDLIB_ANNOTATION_IMPORTS = { + "dataclass": "from dataclasses import dataclass", + "field": "from dataclasses import field", + "Enum": "from enum import Enum", + "ABC": "from abc import ABC, abstractmethod", + "abstractmethod": "from abc import ABC, abstractmethod", + "Optional": "from typing import Optional", + "List": "from typing import List", + "Dict": "from typing import Dict", + "Tuple": "from typing import Tuple", + "Set": "from typing import Set", + "Sequence": "from typing import Sequence", + "Mapping": "from typing import Mapping", + "Callable": "from typing import Callable", + "Union": "from typing import Union", + "Any": "from typing import Any", + "Iterator": "from typing import Iterator", + "Iterable": "from typing import Iterable", + "TYPE_CHECKING": "from typing import TYPE_CHECKING", + "Literal": "from typing import Literal", +} + + +def fix_missing_stdlib_imports(code: str) -> str: + """Add missing standard-library imports for symbols used in the code. + + Scans for common symbols (``@dataclass``, ``Callable``, ``Optional``, etc.) + that appear in the code but are not imported, and adds the necessary + import statements. + + Args: + code: Python source code string. + + Returns: + Code with missing stdlib imports added. + """ + if not code or not code.strip(): + return code + + try: + tree = _ast.parse(code) + except SyntaxError: + return code + + # Collect all names already imported + imported_names: set = set() + for node in _ast.walk(tree): + if isinstance(node, _ast.Import): + for alias in node.names: + imported_names.add(alias.asname or alias.name.split(".")[-1]) + elif isinstance(node, _ast.ImportFrom): + for alias in node.names: + imported_names.add(alias.asname or alias.name) + + # Collect all names used in the code + used_names: set = set() + for node in _ast.walk(tree): + if isinstance(node, _ast.Name): + used_names.add(node.id) + elif isinstance(node, _ast.Attribute): + used_names.add(node.attr) + + # Also check for @dataclass decorator usage + for node in _ast.walk(tree): + if isinstance(node, _ast.ClassDef): + for deco in node.decorator_list: + if isinstance(deco, _ast.Name): + used_names.add(deco.id) + elif isinstance(deco, _ast.Call) and isinstance(deco.func, _ast.Name): + used_names.add(deco.func.id) + + # Determine which imports to add + needed_imports: dict = {} + for symbol, import_line in _STDLIB_ANNOTATION_IMPORTS.items(): + if symbol in used_names and symbol not in imported_names: + # Group by import line to avoid duplicates + needed_imports[import_line] = True + + if not needed_imports: + return code + + # Insert imports in the file header (before the first class/function def). + # We only look at top-level import lines to avoid being confused by + # 'from'/'import' lines that accidentally ended up inside docstrings. + lines = code.split("\n") + + # Find the first class/function definition to bound the header region + first_def_line = len(lines) + for node in tree.body: + if isinstance(node, (_ast.ClassDef, _ast.FunctionDef, _ast.AsyncFunctionDef)): + first_def_line = node.lineno - 1 # 0-indexed + break + + last_import_line = -1 + future_line = -1 + for i in range(first_def_line): + stripped = lines[i].strip() + if stripped.startswith("from __future__"): + future_line = i + elif stripped.startswith(("import ", "from ")): + last_import_line = i + + # Insert after the last header import, or after __future__ + if last_import_line >= 0: + insert_at = last_import_line + 1 + elif future_line >= 0: + insert_at = future_line + 1 + else: + insert_at = 0 + for i in range(first_def_line): + stripped = lines[i].strip() + if stripped and not stripped.startswith("#"): + insert_at = i + break + + for imp_line in sorted(needed_imports.keys()): + lines.insert(insert_at, imp_line) + insert_at += 1 + + return "\n".join(lines) + + +def fix_skeleton_files(repo_path: Path) -> List[str]: + """Fix common skeleton file issues across the entire repo. + + Applies all automated fixes to ``.py`` files under ``src/``: + + 1. Add ``from __future__ import annotations`` (forward ref fix) + 2. Fix missing stdlib imports (``dataclass``, ``Callable``, etc.) + 3. Fix import prefixes (``from pkg.*`` โ†’ ``from src.pkg.*``) + 4. Fix missing base-class imports (``MathEntity``, ``Animation``, etc.) + + This should run once after ``write_interface_skeletons`` and also + as a safety net before each test run. + + Args: + repo_path: Root of the project repository. + + Returns: + List of file paths (relative to *repo_path*) that were modified. + """ + modified: List[str] = [] + src_dir = repo_path / "src" + if not src_dir.is_dir(): + return modified + + prefix = detect_project_import_prefix(repo_path=repo_path) + + for py_file in src_dir.rglob("*.py"): + try: + original = py_file.read_text(encoding="utf-8") + except OSError: + continue + + code = original + + # 1. Ensure from __future__ import annotations + code = ensure_future_annotations(code) + + # 2. Fix missing stdlib imports + code = fix_missing_stdlib_imports(code) + + # 3. Fix import prefixes (inline, not calling normalize_files to avoid double I/O) + if prefix: + code = normalize_code(code, prefix) + + if code != original: + py_file.write_text(code, encoding="utf-8") + rel = str(py_file.relative_to(repo_path)) + modified.append(rel) + logger.info("Fixed skeleton issues in %s", rel) + + # 4. Fix missing base-class / project-internal imports + base_fixed = _fix_missing_base_class_imports(repo_path, prefix) + modified.extend(base_fixed) + + return modified + + +def _fix_missing_base_class_imports(repo_path: Path, prefix: Optional[str] = None) -> List[str]: + """Find classes/names used but not imported and add the import. + + Covers: + - ``class Foo(Bar):`` where ``Bar`` is not imported + - Default parameter values like ``easing: X = EasingFunction.LINEAR`` + where ``EasingFunction`` is not imported + + Only resolves names that are defined as classes in other project files. + """ + src_dir = repo_path / "src" + if not src_dir.is_dir(): + return [] + + # Build a map: class_name -> file_path (relative to repo_path) + class_to_file: Dict[str, str] = {} + for py_file in src_dir.rglob("*.py"): + try: + tree = _ast.parse(py_file.read_text(encoding="utf-8")) + except (OSError, SyntaxError): + continue + rel = str(py_file.relative_to(repo_path)) + for node in _ast.walk(tree): + if isinstance(node, _ast.ClassDef): + if node.name not in class_to_file: + class_to_file[node.name] = rel + + modified: List[str] = [] + builtins = {"object", "Exception", "ValueError", "TypeError", "RuntimeError", + "KeyError", "IndexError", "AttributeError", "NotImplementedError", + "str", "int", "float", "dict", "list", "tuple", "set", "bool", "bytes", + "type", "property", "staticmethod", "classmethod", "super", "None", + "True", "False", "print", "len", "range", "enumerate", "zip", "map", + "filter", "sorted", "reversed", "isinstance", "issubclass", "hasattr", + "getattr", "setattr", "delattr", "id", "hash", "repr", "abs", "round", + "min", "max", "sum", "all", "any", "iter", "next", "open"} + + for py_file in src_dir.rglob("*.py"): + try: + code = py_file.read_text(encoding="utf-8") + tree = _ast.parse(code) + except (OSError, SyntaxError): + continue + + # Collect imported names + imported = set() + for node in _ast.walk(tree): + if isinstance(node, _ast.Import): + for a in node.names: + imported.add(a.asname or a.name.split(".")[-1]) + elif isinstance(node, _ast.ImportFrom): + for a in node.names: + imported.add(a.asname or a.name) + + # Collect all top-level Name references that resolve to project classes + # This covers: base classes, default values, type refs in non-annotation positions + needed_names: set = set() + for node in _ast.walk(tree): + # Base classes + if isinstance(node, _ast.ClassDef): + for base in node.bases: + if isinstance(base, _ast.Name): + needed_names.add(base.id) + # Default argument values (e.g. EasingFunction.LINEAR) + if isinstance(node, _ast.Attribute): + if isinstance(node.value, _ast.Name): + needed_names.add(node.value.id) + + # Filter to only unimported names that exist as project classes + missing_imports: List[str] = [] + for name in sorted(needed_names): + if name in imported or name in builtins: + continue + if name in _STDLIB_ANNOTATION_IMPORTS: + continue + if name not in class_to_file: + continue + src_file = class_to_file[name] + rel_current = str(py_file.relative_to(repo_path)) + if src_file == rel_current: + continue + + module = src_file.replace("/", ".").replace("\\", ".") + if module.endswith(".py"): + module = module[:-3] + imp_line = f"from {module} import {name}" + if prefix: + imp_line = normalize_code(imp_line, prefix).strip() + missing_imports.append(imp_line) + imported.add(name) + + if not missing_imports: + continue + + lines = code.split("\n") + + # Find header region (before first class/function def) + first_def_line = len(lines) + for node in tree.body: + if isinstance(node, (_ast.ClassDef, _ast.FunctionDef, _ast.AsyncFunctionDef)): + first_def_line = node.lineno - 1 + break + + last_import = -1 + for i in range(first_def_line): + stripped = lines[i].strip() + if stripped.startswith(("import ", "from ")) and not stripped.startswith("from __future__"): + last_import = i + + insert_at = last_import + 1 if last_import >= 0 else 1 + for imp in sorted(set(missing_imports)): + lines.insert(insert_at, imp) + insert_at += 1 + + py_file.write_text("\n".join(lines), encoding="utf-8") + rel = str(py_file.relative_to(repo_path)) + modified.append(rel) + logger.info("Added missing project imports in %s", rel) + + return modified + + +# ============================================================================ +# Import Convention Snippet (for LLM prompts) +# ============================================================================ + +def build_import_convention_snippet( + repo_path: Optional[Path] = None, + prefix: Optional[str] = None, +) -> str: + """Build a prompt snippet describing the project's import convention. + + This can be injected into LLM prompts (interface design, code gen, + test gen) so the LLM knows which import style to use. + + Args: + repo_path: Project repo root (used for auto-detection). + prefix: Explicit prefix (skips detection). + + Returns: + Markdown-formatted instruction string, or empty string if + the convention cannot be determined. + """ + if prefix is None and repo_path is not None: + prefix = detect_project_import_prefix(repo_path=repo_path) + + if not prefix: + return "" + + parts = prefix.split(".", 1) + if len(parts) != 2 or parts[0] != "src": + return "" + + bare_package = parts[1] + + return f"""\ +## Import Convention (CRITICAL) +- This project's source code lives under `src/{bare_package}/`. +- ALL internal imports MUST use the full path with `src.` prefix: + - [OK] `from {prefix}.module import ClassName` + - [FAIL] `from {bare_package}.module import ClassName` +- The `src.` prefix is required because the Python path is set to the repo root, + not to `src/`. +""" diff --git a/RPG-Kit/scripts/common/llm_api_client.py b/RPG-Kit/scripts/common/llm_api_client.py new file mode 100644 index 0000000..e721fb2 --- /dev/null +++ b/RPG-Kit/scripts/common/llm_api_client.py @@ -0,0 +1,1212 @@ +#!/usr/bin/env python3 +"""API-Based LLM Client for RPG-Kit. + +This module provides direct API access to LLM providers as an optional +complement to the existing CLI-based LLM client in ``llm_client.py``. + +Ported from RPG-ZeroRepo (zerorepo/rpg_gen/base/llm_client/) with adaptations +for RPG-Kit's project structure and coding conventions. + +Key components: +- LLMConfig: Model configuration for unified LLM access across providers +- BaseLLMClient: Abstract base class for provider implementations +- OpenAICompatibleClient: Shared base for OpenAI-API-compatible providers +- OpenAIClient: OpenAI provider implementation +- AnthropicClient: Anthropic Claude provider implementation +- APILLMClient: High-level unified LLM client (factory/router pattern) + +Usage: + from common.llm_api_client import APILLMClient, LLMConfig + + config = LLMConfig(model="gpt-4o", provider="openai") + client = APILLMClient(config) + response = client.generate(memory) + print(response) + print(client.last_usage) +""" + +from __future__ import annotations + +import json +import logging +import os +import random +import time +import traceback +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from enum import Enum +from functools import wraps +from typing import Any, Callable, Dict, List, Optional, Type, TypeVar, Union + +from pydantic import BaseModel + +from common.llm_types import ( + LLMMessage, + LLMResponse, + LLMUsage, + Memory, + ToolCall, + ToolResult, +) + +logger = logging.getLogger(__name__) + +T = TypeVar("T") + + +# ============================================================================ +# Provider Registry +# ============================================================================ + +class LLMProvider(Enum): + """Supported LLM providers.""" + + OPENAI = "openai" + ANTHROPIC = "anthropic" + AZURE = "azure" + DEEPSEEK = "deepseek" + GOOGLE = "google" + VLLM = "vllm" + OPENROUTER = "openrouter" + OLLAMA = "ollama" + DOUBAO = "doubao" + + +# String constants for backward compatibility +PROVIDER_AZURE = LLMProvider.AZURE.value +PROVIDER_OPENAI = LLMProvider.OPENAI.value +PROVIDER_ANTHROPIC = LLMProvider.ANTHROPIC.value +PROVIDER_DEEPSEEK = LLMProvider.DEEPSEEK.value +PROVIDER_GOOGLE = LLMProvider.GOOGLE.value +PROVIDER_VLLM = LLMProvider.VLLM.value +PROVIDER_OPENROUTER = LLMProvider.OPENROUTER.value +PROVIDER_OLLAMA = LLMProvider.OLLAMA.value +PROVIDER_DOUBAO = LLMProvider.DOUBAO.value + +ALL_PROVIDERS = [p.value for p in LLMProvider] + +# Model prefix -> provider auto-detection +_MODEL_PREFIX_TO_PROVIDER: Dict[str, str] = { + "claude": PROVIDER_ANTHROPIC, + "deepseek": PROVIDER_DEEPSEEK, + "gemini": PROVIDER_GOOGLE, +} + + +def infer_provider(model: str, base_url: str | None = None) -> str: + """Infer provider from model name or base_url. + + Args: + model: The model name (e.g. "gpt-4o", "claude-3-opus"). + base_url: Optional base URL hint. + + Returns: + Provider name string (e.g. "openai", "anthropic"). + """ + m = model.lower().strip() + for prefix, provider in _MODEL_PREFIX_TO_PROVIDER.items(): + if m.startswith(prefix): + return provider + if base_url: + u = base_url.lower() + if "openai.azure" in u or "azure-api" in u: + return PROVIDER_AZURE + if "api.openai.com" in u: + return PROVIDER_OPENAI + if "deepseek.com" in u: + return PROVIDER_DEEPSEEK + if "generativelanguage.googleapis.com" in u: + return PROVIDER_GOOGLE + if "openrouter.ai" in u: + return PROVIDER_OPENROUTER + if "localhost" in u or "127.0.0.1" in u: + return PROVIDER_VLLM + return PROVIDER_OPENAI # default + + +# ============================================================================ +# Retry Utility +# ============================================================================ + +def retry_with( + func: Callable[..., T], + provider_name: str = "OpenAI", + max_retries: int = 3, +) -> Callable[..., T]: + """Decorator that adds retry logic with randomized backoff. + + Args: + func: The function to decorate. + provider_name: The name of the model provider (for logging). + max_retries: Maximum number of retry attempts. + + Returns: + Decorated function with retry logic. + """ + + @wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> T: + last_exception = None + + for attempt in range(max_retries + 1): + try: + return func(*args, **kwargs) + except Exception as e: + last_exception = e + + if attempt == max_retries: + raise + + sleep_time = random.randint(3, 30) + this_error_message = str(e) + logger.warning( + "%s API call failed: %s. Will sleep for %d seconds " + "and retry.\n%s", + provider_name, + this_error_message, + sleep_time, + traceback.format_exc(), + ) + time.sleep(sleep_time) + + raise last_exception or Exception( + "Retry failed for unknown reason" + ) + + return wrapper + + +# ============================================================================ +# LLMConfig +# ============================================================================ + +@dataclass +class LLMConfig: + """Model configuration for unified LLM access across providers. + + Attributes: + model: Model name (e.g. "gpt-4o", "claude-3-opus"). + temperature: Sampling temperature. + max_tokens: Maximum number of tokens to generate. + top_p: Nucleus sampling probability. + stream: Whether to stream the response. + stop: Stop sequences. + provider: Provider name (auto-detected from model if not set). + api_key: API key (falls back to environment variable). + base_url: Base URL override for the provider. + endpoint_url: Azure-specific endpoint URL. + deployment_name: Azure-specific deployment name. + api_version: Azure API version. + tenant_id: Azure tenant ID. + token_scope: Azure token scope. + max_retries: Maximum retry attempts for API calls. + log: Whether to log provider initialization and responses. + extra: Provider-specific params without explicit fields. + """ + + model: str = "gpt-4o" + temperature: float = 0.0 + max_tokens: int = 2000 + top_p: float = 1.0 + stream: bool = False + stop: Optional[List[str]] = None + + # Provider & connection + provider: Optional[str] = None + api_key: Optional[str] = None + base_url: Optional[str] = None + + # Azure-specific + endpoint_url: Optional[str] = None + deployment_name: Optional[str] = None + api_version: str = "2025-01-01-preview" + tenant_id: Optional[str] = None + token_scope: Optional[str] = None + + # Retry + max_retries: int = 3 + + log: bool = True + + # Provider-specific params that don't have explicit fields + extra: Dict[str, Any] = field(default_factory=dict) + + def resolve_provider(self) -> str: + """Return effective provider, auto-detecting from model name if not explicitly set.""" + if self.provider: + return self.provider + return infer_provider( + self.model, self.base_url or self.endpoint_url + ) + + def to_dict(self) -> Dict[str, Any]: + """Serialize to a plain dictionary.""" + d: Dict[str, Any] = { + "model": self.model, + "temperature": self.temperature, + "max_tokens": self.max_tokens, + "top_p": self.top_p, + "stream": self.stream, + "stop": self.stop, + "provider": self.provider, + "api_key": self.api_key, + "base_url": self.base_url, + "endpoint_url": self.endpoint_url, + "deployment_name": self.deployment_name, + "api_version": self.api_version, + "tenant_id": self.tenant_id, + "token_scope": self.token_scope, + "max_retries": self.max_retries, + "log": self.log, + } + if self.extra: + d["extra"] = self.extra + return d + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> LLMConfig: + """Create an LLMConfig from a dictionary. + + Unknown keys are placed into the ``extra`` dict. + """ + valid_fields = {f.name for f in cls.__dataclass_fields__.values()} + filtered: Dict[str, Any] = {} + extra: Dict[str, Any] = {} + for k, v in data.items(): + if k in valid_fields: + filtered[k] = v + else: + extra[k] = v + cfg = cls(**filtered) + if extra: + cfg.extra.update(extra) + return cfg + + @classmethod + def from_source( + cls, source: Union[str, Dict[str, Any], LLMConfig] + ) -> LLMConfig: + """Create an LLMConfig from various source types. + + Supports: + - LLMConfig instance -> return as-is + - dict -> from_dict + - JSON/YAML string -> parse + - JSON/YAML file path -> read & parse + """ + if isinstance(source, cls): + return source + if isinstance(source, dict): + return cls.from_dict(source) + + if isinstance(source, str): + if os.path.exists(source): + with open(source, "r", encoding="utf-8") as f: + text = f.read() + else: + text = source + + try: + return cls.from_dict(json.loads(text)) + except json.JSONDecodeError: + pass + + # Try YAML (optional dependency) + try: + import yaml + + parsed = yaml.safe_load(text) + if isinstance(parsed, dict): + return cls.from_dict(parsed) + except Exception: + pass + + raise ValueError( + "Cannot parse config: not valid JSON / YAML / dict / LLMConfig" + ) + + raise TypeError(f"Unsupported config type: {type(source)}") + + def save(self, path: str) -> None: + """Save configuration to a file (JSON or YAML based on extension).""" + data = self.to_dict() + if path.endswith((".yml", ".yaml")): + import yaml + + with open(path, "w", encoding="utf-8") as f: + yaml.safe_dump(data, f, allow_unicode=True) + else: + with open(path, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2, ensure_ascii=False) + + +# ============================================================================ +# BaseLLMClient (Abstract Provider Base) +# ============================================================================ + +class BaseLLMClient(ABC): + """Abstract base class for all LLM provider clients. + + Concrete implementations must provide ``set_chat_history`` and ``chat``. + """ + + def __init__(self, config: LLMConfig): + self.config = config + self.model: str = config.model.strip() + self.api_key: str | None = config.api_key + self.base_url: str | None = config.base_url + self.api_version: str | None = config.api_version + + @abstractmethod + def set_chat_history(self, messages: list[LLMMessage]) -> None: + """Set the chat history.""" + + @abstractmethod + def chat( + self, + messages: list[LLMMessage], + tools: list | None = None, + reuse_history: bool = True, + ) -> LLMResponse: + """Send chat messages to the LLM and return a structured response.""" + + +# ============================================================================ +# OpenAI Compatible Base +# ============================================================================ + +class ProviderConfig(ABC): + """Abstract base class for provider-specific configurations.""" + + @abstractmethod + def create_client(self, config: LLMConfig) -> Any: + """Create the OpenAI-compatible client instance.""" + + @abstractmethod + def get_service_name(self) -> str: + """Get the service name for retry logging.""" + + @abstractmethod + def get_provider_name(self) -> str: + """Get the provider name for identification.""" + + def get_extra_headers(self) -> dict[str, str]: + """Get any extra headers needed for the API call.""" + return {} + + def is_reasoning_model(self, model: str) -> bool: + """Check if this model is a reasoning model (needs special param handling).""" + return False + + +class OpenAICompatibleClient(BaseLLMClient): + """Base class for OpenAI-compatible clients with shared logic. + + Handles message parsing, tool schemas, retry, and response construction + for any provider that uses the OpenAI chat completions API format. + """ + + def __init__( + self, config: LLMConfig, provider_config: ProviderConfig + ): + super().__init__(config) + self.provider_config = provider_config + self.client = provider_config.create_client(config) + self.message_history: list = [] + + def set_chat_history(self, messages: list[LLMMessage]) -> None: + """Set the chat history.""" + self.message_history = self.parse_messages(messages) + + def _create_response( + self, + tool_schemas: list | None, + extra_headers: dict[str, str] | None = None, + ) -> Any: + """Create a response using the provider's API.""" + import openai + + # Re-create client to refresh credentials (e.g. Azure token) + self.client = self.provider_config.create_client(self.config) + + kwargs: dict = { + "model": self.model, + "messages": self.message_history, + "tools": tool_schemas if tool_schemas else openai.NOT_GIVEN, + "top_p": self.config.top_p, + "n": 1, + } + + # Reasoning models don't support temperature; use max_completion_tokens + if self.provider_config.is_reasoning_model(self.model): + kwargs["temperature"] = openai.NOT_GIVEN + kwargs["reasoning_effort"] = self.config.extra.get( + "reasoning_effort", "high" + ) + kwargs["max_completion_tokens"] = self.config.max_tokens + else: + kwargs["temperature"] = self.config.temperature + kwargs["max_tokens"] = self.config.max_tokens + + if extra_headers: + kwargs["extra_headers"] = extra_headers + + if self.config.stop: + kwargs["stop"] = self.config.stop + + return self.client.chat.completions.create(**kwargs) + + def chat( + self, + messages: list[LLMMessage], + tools: list | None = None, + reuse_history: bool = True, + ) -> LLMResponse: + """Send chat messages with optional tool support.""" + from openai.types.chat import ( + ChatCompletionAssistantMessageParam, + ChatCompletionMessageToolCallParam, + ChatCompletionToolParam, + ) + from openai.types.chat.chat_completion_message_tool_call_param import ( + Function, + ) + from openai.types.shared_params.function_definition import ( + FunctionDefinition, + ) + + parsed_messages = self.parse_messages(messages) + if reuse_history: + self.message_history = self.message_history + parsed_messages + else: + self.message_history = parsed_messages + + tool_schemas = None + if tools: + tool_schemas = [ + ChatCompletionToolParam( + function=FunctionDefinition( + name=tool.get_name(), + description=tool.get_description(), + parameters=tool.get_input_schema(), + ), + type="function", + ) + for tool in tools + ] + + extra_headers = self.provider_config.get_extra_headers() + + # Apply retry decorator to the API call + retry_fn = retry_with( + func=self._create_response, + provider_name=self.provider_config.get_service_name(), + max_retries=self.config.max_retries, + ) + response = retry_fn(tool_schemas, extra_headers) + + choice = response.choices[0] + + tool_calls: list[ToolCall] | None = None + if choice.message.tool_calls: + tool_calls = [] + for tool_call in choice.message.tool_calls: + tool_calls.append( + ToolCall( + name=tool_call.function.name, + call_id=tool_call.id, + arguments=( + json.loads(tool_call.function.arguments) + if tool_call.function.arguments + else {} + ), + ) + ) + + llm_response = LLMResponse( + content=choice.message.content or "", + tool_calls=tool_calls, + finish_reason=choice.finish_reason, + model=response.model, + usage=( + LLMUsage( + input_tokens=response.usage.prompt_tokens or 0, + output_tokens=response.usage.completion_tokens or 0, + ) + if response.usage + else None + ), + ) + + # Update message history + if llm_response.tool_calls: + self.message_history.append( + ChatCompletionAssistantMessageParam( + role="assistant", + content=llm_response.content, + tool_calls=[ + ChatCompletionMessageToolCallParam( + id=tc.call_id, + function=Function( + name=tc.name, + arguments=json.dumps(tc.arguments), + ), + type="function", + ) + for tc in llm_response.tool_calls + ], + ) + ) + elif llm_response.content: + self.message_history.append( + ChatCompletionAssistantMessageParam( + content=llm_response.content, role="assistant" + ) + ) + + return llm_response + + def parse_messages(self, messages: list[LLMMessage]) -> list: + """Parse LLM messages to OpenAI format.""" + from openai.types.chat import ( + ChatCompletionAssistantMessageParam, + ChatCompletionFunctionMessageParam, + ChatCompletionSystemMessageParam, + ChatCompletionUserMessageParam, + ) + from openai.types.chat.chat_completion_tool_message_param import ( + ChatCompletionToolMessageParam, + ) + + openai_messages: list = [] + for msg in messages: + if msg.tool_call is not None: + # Tool call message + if msg.tool_call: + openai_messages.append( + ChatCompletionFunctionMessageParam( + content=json.dumps( + { + "name": msg.tool_call.name, + "arguments": msg.tool_call.arguments, + } + ), + role="function", + name=msg.tool_call.name, + ) + ) + elif msg.tool_result is not None: + # Tool result message + if msg.tool_result: + result_text: str = "" + if msg.tool_result.result: + result_text = result_text + msg.tool_result.result + "\n" + if msg.tool_result.error: + result_text += "Tool call failed with error:\n" + result_text += msg.tool_result.error + result_text = result_text.strip() + openai_messages.append( + ChatCompletionToolMessageParam( + content=result_text, + role="tool", + tool_call_id=msg.tool_result.call_id, + ) + ) + else: + # Standard role-based message + if msg.role == "system": + if not msg.content: + raise ValueError( + "System message content is required" + ) + openai_messages.append( + ChatCompletionSystemMessageParam( + content=msg.content, role="system" + ) + ) + elif msg.role == "user": + if not msg.content: + raise ValueError( + "User message content is required" + ) + openai_messages.append( + ChatCompletionUserMessageParam( + content=msg.content, role="user" + ) + ) + elif msg.role == "assistant": + if not msg.content: + raise ValueError( + "Assistant message content is required" + ) + openai_messages.append( + ChatCompletionAssistantMessageParam( + content=msg.content, role="assistant" + ) + ) + else: + raise ValueError(f"Invalid message role: {msg.role}") + return openai_messages + + +# ============================================================================ +# OpenAI Provider +# ============================================================================ + +class OpenAIProvider(ProviderConfig): + """OpenAI provider configuration.""" + + def create_client(self, config: LLMConfig) -> Any: + import openai + + api_key = config.api_key or os.getenv("OPENAI_API_KEY") + kwargs: dict = {} + if api_key: + kwargs["api_key"] = api_key + if config.base_url: + kwargs["base_url"] = config.base_url + return openai.OpenAI(**kwargs) + + def get_service_name(self) -> str: + return "OpenAI" + + def get_provider_name(self) -> str: + return "openai" + + def is_reasoning_model(self, model: str) -> bool: + return "o3" in model or "o4-mini" in model or "gpt-5" in model + + +class OpenAIClient(OpenAICompatibleClient): + """OpenAI client using chat.completions API.""" + + def __init__(self, config: LLMConfig): + super().__init__(config, OpenAIProvider()) + + +# ============================================================================ +# Anthropic Provider +# ============================================================================ + +class AnthropicClient(BaseLLMClient): + """Anthropic client with tool support. + + Uses the Anthropic SDK directly (not OpenAI-compatible). + """ + + def __init__(self, config: LLMConfig): + super().__init__(config) + import anthropic + + api_key = config.api_key or os.getenv("ANTHROPIC_API_KEY") + kwargs: dict = {} + if api_key: + kwargs["api_key"] = api_key + if config.base_url: + kwargs["base_url"] = config.base_url + self.client: anthropic.Anthropic = anthropic.Anthropic(**kwargs) + self.message_history: list = [] + self.system_message: str | Any = anthropic.NOT_GIVEN + + def set_chat_history(self, messages: list[LLMMessage]) -> None: + """Set the chat history.""" + self.message_history = self.parse_messages(messages) + + def _create_anthropic_response(self, tool_schemas: Any) -> Any: + """Raw API call (decorated with retry by caller).""" + import anthropic + + return self.client.messages.create( + model=self.model, + messages=self.message_history, + max_tokens=self.config.max_tokens, + system=self.system_message, + tools=tool_schemas, + temperature=self.config.temperature, + top_p=self.config.top_p, + ) + + def chat( + self, + messages: list[LLMMessage], + tools: list | None = None, + reuse_history: bool = True, + ) -> LLMResponse: + """Send chat messages with optional tool support.""" + import anthropic + + anthropic_messages = self.parse_messages(messages) + self.message_history = ( + self.message_history + anthropic_messages + if reuse_history + else anthropic_messages + ) + + # Build tool schemas + tool_schemas: Any = anthropic.NOT_GIVEN + if tools: + tool_schemas = [] + for tool in tools: + tool_schemas.append( + anthropic.types.ToolParam( + name=tool.name, + description=tool.description, + input_schema=tool.get_input_schema(), + ) + ) + + # Call with retry + retry_fn = retry_with( + func=self._create_anthropic_response, + provider_name="Anthropic", + max_retries=self.config.max_retries, + ) + response = retry_fn(tool_schemas) + + # Parse response + content = "" + tool_calls: list[ToolCall] = [] + + for content_block in response.content: + if content_block.type == "text": + content += content_block.text + self.message_history.append( + anthropic.types.MessageParam( + role="assistant", content=content_block.text + ) + ) + elif content_block.type == "tool_use": + tool_calls.append( + ToolCall( + call_id=content_block.id, + name=content_block.name, + arguments=content_block.input, + ) + ) + self.message_history.append( + anthropic.types.MessageParam( + role="assistant", content=[content_block] + ) + ) + + usage = None + if response.usage: + usage = LLMUsage( + input_tokens=response.usage.input_tokens or 0, + output_tokens=response.usage.output_tokens or 0, + cache_creation_input_tokens=getattr( + response.usage, "cache_creation_input_tokens", 0 + ) + or 0, + cache_read_input_tokens=getattr( + response.usage, "cache_read_input_tokens", 0 + ) + or 0, + ) + + return LLMResponse( + content=content, + usage=usage, + model=response.model, + finish_reason=response.stop_reason, + tool_calls=tool_calls if tool_calls else None, + ) + + def parse_messages( + self, messages: list[LLMMessage] + ) -> list: + """Parse LLMMessage list to Anthropic format.""" + import anthropic + + anthropic_messages: list = [] + for msg in messages: + if msg.role == "system": + self.system_message = ( + msg.content if msg.content else anthropic.NOT_GIVEN + ) + elif msg.tool_result: + anthropic_messages.append( + anthropic.types.MessageParam( + role="user", + content=[self._parse_tool_result(msg.tool_result)], + ) + ) + elif msg.tool_call: + anthropic_messages.append( + anthropic.types.MessageParam( + role="assistant", + content=[self._parse_tool_call(msg.tool_call)], + ) + ) + else: + if msg.role not in ("user", "assistant"): + raise ValueError(f"Invalid message role: {msg.role}") + if not msg.content: + raise ValueError("Message content is required") + anthropic_messages.append( + anthropic.types.MessageParam( + role=msg.role, content=msg.content + ) + ) + return anthropic_messages + + @staticmethod + def _parse_tool_call(tool_call: ToolCall) -> Any: + """Convert ToolCall to Anthropic ToolUseBlockParam.""" + import anthropic + + return anthropic.types.ToolUseBlockParam( + type="tool_use", + id=tool_call.call_id, + name=tool_call.name, + input=json.dumps(tool_call.arguments), + ) + + @staticmethod + def _parse_tool_result(tool_result: ToolResult) -> Any: + """Convert ToolResult to Anthropic ToolResultBlockParam.""" + import anthropic + + result_text: str = "" + if tool_result.result: + result_text += tool_result.result + "\n" + if tool_result.error: + result_text += "Tool call failed with error:\n" + tool_result.error + result_text = result_text.strip() + if not tool_result.success and not result_text: + result_text = ( + "Tool execution failed without providing error details." + ) + + return anthropic.types.ToolResultBlockParam( + tool_use_id=tool_result.call_id, + type="tool_result", + content=result_text, + is_error=not tool_result.success, + ) + + +# ============================================================================ +# APILLMClient โ€” Factory / Router +# ============================================================================ + +class APILLMClient: + """Unified API-based LLM client supporting multiple providers. + + Factory pattern: lazy-imports the correct provider implementation + based on the resolved provider name from LLMConfig. + + This is the API-based counterpart to the existing CLI-based + ``LLMClient`` in ``llm_client.py``. + + Public API: + - generate(memory) -> Optional[str] + - call_with_structure_output(memory, response_model) -> (Optional[Dict], str) + """ + + def __init__( + self, + config: Optional[Union[LLMConfig, Dict[str, Any], str]] = None, + ): + self.config = LLMConfig.from_source(config or {}) + self.model = self.config.model.strip() + self.provider_name = self.config.resolve_provider() + self.provider = LLMProvider(self.provider_name) + + # Lazy import -- only the selected provider's SDK is loaded + match self.provider: + case LLMProvider.OPENAI: + self.client: BaseLLMClient = OpenAIClient(self.config) + case LLMProvider.ANTHROPIC: + self.client = AnthropicClient(self.config) + case _: + # For unsupported providers, attempt OpenAI-compatible + logger.warning( + "Provider '%s' not natively supported, " + "falling back to OpenAI-compatible client.", + self.provider_name, + ) + self.client = OpenAIClient(self.config) + + if self.config.log: + logger.info( + "Initialized API LLM client: provider='%s' model='%s'", + self.provider_name, + self.model, + ) + + # ---------------------------------------------------------------- + # Token usage + # ---------------------------------------------------------------- + + @property + def last_usage(self) -> Dict[str, int]: + """Backward-compatible usage dict.""" + resp = getattr(self, "_last_response", None) + if resp and resp.usage: + return resp.usage.to_dict() + return {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0} + + # ---------------------------------------------------------------- + # Core call + # ---------------------------------------------------------------- + + def _call(self, messages: List[LLMMessage]) -> Optional[str]: + """Call provider with LLMMessage list, return text.""" + response: LLMResponse = self.client.chat( + messages, reuse_history=False + ) + self._last_response = response + return response.content.strip() if response.content else None + + # ---------------------------------------------------------------- + # Public API + # ---------------------------------------------------------------- + + def generate( + self, + memory: Memory, + max_retries: int = 8, + retry_delay: float = 20.0, + ) -> Optional[str]: + """Generate response from memory context with retry logic. + + Args: + memory: Conversational memory containing message history. + max_retries: Maximum number of retry attempts. + retry_delay: Base delay between retries in seconds. + + Returns: + Response text, or None if all retries fail. + """ + messages = memory.to_llm_messages() + retries = 0 + start = time.time() + + while retries < max_retries: + try: + result = self._call(messages) + + if self.config.log: + duration = round(time.time() - start, 2) + logger.info( + "Model '%s' response in %ss", self.model, duration + ) + + if not result: + retries += 1 + if retries >= max_retries: + logger.error( + "Maximum retries reached (empty result). Aborting." + ) + return None + delay = retry_delay + random.uniform(0, 10) + logger.warning( + "Empty result, retry %d/%d in %.2f seconds...", + retries, + max_retries, + delay, + ) + time.sleep(delay) + continue + + return result + + except Exception as e: + error_str = str(e).lower() + + if ( + "context_length_exceeded" in error_str + or "context_length" in error_str + or "list index out of range" in error_str + ): + messages = self._truncate_context(messages) + if messages is None: + logger.error( + "Context too long and no more messages to " + "remove. Aborting." + ) + return None + logger.warning( + "Context truncated, remaining %d messages. " + "Retrying...", + len(messages), + ) + continue + + retries += 1 + logger.error( + "Error calling model '%s': %s", self.model, e + ) + if retries >= max_retries: + logger.error("Maximum retries reached. Aborting.") + return None + + delay = retry_delay + random.uniform(0, 10) + logger.warning("Retrying in %.2f seconds...", delay) + time.sleep(delay) + + return None + + def _truncate_context( + self, messages: List[LLMMessage] + ) -> Optional[List[LLMMessage]]: + """Remove oldest user-assistant pair to reduce context length.""" + system_msgs = [m for m in messages if m.role == "system"] + other_msgs = [m for m in messages if m.role != "system"] + + if len(other_msgs) <= 2: + return None + + removed_count = 0 + while removed_count < 2 and other_msgs: + removed_msg = other_msgs.pop(0) + removed_count += 1 + content_len = ( + len(removed_msg.content) if removed_msg.content else 0 + ) + logger.info( + "Removed %s message (length: %d chars)", + removed_msg.role, + content_len, + ) + if ( + removed_msg.role == "user" + and other_msgs + and other_msgs[0].role == "assistant" + ): + removed_msg = other_msgs.pop(0) + removed_count += 1 + content_len = ( + len(removed_msg.content) if removed_msg.content else 0 + ) + logger.info( + "Removed %s message (length: %d chars)", + removed_msg.role, + content_len, + ) + break + + if not other_msgs: + return None + + return system_msgs + other_msgs + + def call_with_structure_output( + self, + memory: Memory, + response_model: Type[BaseModel], + max_retries: int = 3, + retry_delay: float = 40.0, + ) -> tuple: + """Generate structured output matching a Pydantic model. + + Args: + memory: Conversational memory containing message history. + response_model: Pydantic model class for response validation. + max_retries: Maximum number of retry attempts. + retry_delay: Base delay between retries in seconds. + + Returns: + Tuple of (validated_dict, raw_response_string), + or (None, "") on failure. + """ + messages = memory.to_llm_messages() + + retries = 0 + start = time.time() + + while retries < max_retries: + try: + raw_response = self._call(messages) + + if not raw_response: + raise ValueError("Empty response from model") + text = raw_response.strip() + + # Strip ```json wrapper + if text.startswith("```"): + lines = text.splitlines() + if lines and lines[0].lstrip().startswith("```"): + lines = lines[1:] + if lines and lines[-1].rstrip().startswith("```"): + lines = lines[:-1] + text = "\n".join(lines).strip() + + # Parse JSON + try: + data = json.loads(text) + except json.JSONDecodeError: + start_brace = text.find("{") + end_brace = text.rfind("}") + if ( + start_brace != -1 + and end_brace != -1 + and end_brace > start_brace + ): + json_str = text[start_brace : end_brace + 1] + data = json.loads(json_str) + else: + raise + + # Validate with Pydantic (v2 compatible) + try: + model_instance = response_model.model_validate(data) + except AttributeError: + model_instance = response_model.parse_obj(data) + + try: + result = model_instance.model_dump() + except AttributeError: + result = model_instance.dict() + + if self.config.log: + duration = round(time.time() - start, 2) + logger.info( + "Structured output from '%s' in %ss", + self.model, + duration, + ) + + return result, raw_response + + except Exception as e: + logger.error( + "Error in call_with_structure_output: %s", e + ) + if retries >= max_retries: + logger.error( + "Maximum retries reached for structured output." + ) + return None, "" + + delay = retry_delay + random.uniform(0, 10) + logger.warning( + "Retrying structured call in %.2f seconds...", delay + ) + time.sleep(delay) + finally: + retries += 1 + + return None, "" + + # ---------------------------------------------------------------- + # Serialization + # ---------------------------------------------------------------- + + def to_dict(self) -> Dict[str, Any]: + """Serialize client configuration.""" + return {"config": self.config.to_dict()} + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> APILLMClient: + """Create client from serialized configuration.""" + config = LLMConfig.from_dict(data.get("config", {})) + return cls(config=config) + + def __repr__(self) -> str: + return ( + f"" + ) diff --git a/RPG-Kit/scripts/common/llm_client.py b/RPG-Kit/scripts/common/llm_client.py new file mode 100644 index 0000000..a6f16b5 --- /dev/null +++ b/RPG-Kit/scripts/common/llm_client.py @@ -0,0 +1,888 @@ +#!/usr/bin/env python3 +"""LLM Client Module for RPG-Kit. + +This module provides a common LLM client with trajectory recording support. +All LLM calls (prompts and responses) are recorded in the trajectory when +a trajectory instance is provided. +""" + +import json +import logging +import os as _os +import re +import shlex +import signal as _signal +import subprocess +import time +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple +from dataclasses import dataclass, field, asdict + +from common.llm_types import Memory +from common.session_manager import create_session_manager +from .paths import REPO_DIR as _REPO_DIR, WORKSPACE_ROOT as _WORKSPACE_ROOT + + +def _set_pdeathsig() -> None: + """Preexec hook: ask the kernel to send SIGTERM to this child when its parent dies (including SIGKILL). Called after fork() but before exec() so it runs in the child's address space. Silently ignored on non-Linux.""" + try: + import ctypes, signal as _s + ctypes.CDLL("libc.so.6").prctl(1, _s.SIGTERM) # PR_SET_PDEATHSIG = 1 + except Exception: + pass + + +# Default AI assistant command +AI_CLI_CMD = "" + + +# Mapping from the first token of AI_CLI_CMD to the canonical agent name +_CLI_TO_AGENT = { + "copilot": "copilot", + "claude": "claude", + "gemini": "gemini", + "qwen": "qwen", + "agent": "cursor", # cursor-agent uses "agent -p" + "augment": "auggie", + "codex": "codex", + "codebuddy": "codebuddy", + "qodercli": "qoder", + "opencode": "opencode", + "amp": "amp", +} + + +def detect_agent_type() -> str: + """Detect which AI coding agent is being used based on AI_CLI_CMD. + + AI_CLI_CMD is a placeholder that gets replaced per-agent during + release packaging (e.g. "claude -p", "copilot -p", "codex exec"). + + Returns one of: claude, gemini, copilot, cursor, codex, auggie, + amp, opencode, codebuddy, qoder, qwen, unknown + """ + if not AI_CLI_CMD: + return "unknown" + + first_token = AI_CLI_CMD.strip().split()[0] + return _CLI_TO_AGENT.get(first_token, "unknown") + + +# ============================================================================ +# LLM Interaction Record (for standalone use without Trajectory) +# ============================================================================ + +@dataclass +class LLMCallRecord: + """Record of a single LLM call with full prompt and response. + + This is used to track LLM interactions either within a Trajectory + or independently for debugging/analysis purposes. + """ + call_id: int + timestamp: str + purpose: str + prompt: str + response: Optional[str] = None + parsed_result: Optional[Dict[str, Any]] = None + success: bool = False + error: Optional[str] = None + duration_seconds: Optional[float] = None + model: Optional[str] = None + metadata: Dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> Dict[str, Any]: + return asdict(self) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "LLMCallRecord": + return cls(**data) + + +# ============================================================================ +# LLM Client with Trajectory Integration +# ============================================================================ + +class LLMClient: + """Client for interacting with AI CLI tools. + + Supports trajectory recording of all LLM calls (prompts and responses) + for debugging, analysis, and reproducibility. + + Usage: + # Without trajectory (calls are still tracked internally) + client = LLMClient() + response = client.generate("What is Python?") + + # With trajectory recording + from common.trajectory import Trajectory + traj = Trajectory("my_command") + traj.start() + step = traj.add_step("generate_code", "Generate some code") + traj.start_step(step.step_id) + + client = LLMClient(trajectory=traj, step_id=step.step_id) + response = client.generate("Write hello world", purpose="code_generation") + # LLM call is automatically recorded in trajectory + """ + + # Workspace root โ€” sourced from common.paths so that symlink-based + # dev workflows resolve correctly (see paths._find_workspace_root). + # Used for session trace storage (.rpgkit/logs//) and to + # express captured-trace paths relative to the workspace. + _INFERRED_PROJECT_DIR: Path = _WORKSPACE_ROOT + + def __init__( + self, + tool: str = None, + trajectory: Optional[Any] = None, # Trajectory instance + step_id: Optional[int] = None, + logger: Optional[logging.Logger] = None + ): + """Initialize LLM Client. + + Args: + tool: CLI tool command (default: "llm") + trajectory: Trajectory instance for recording LLM calls + step_id: Current step ID in the trajectory + logger: Logger instance + """ + self.tool = tool or AI_CLI_CMD + self.trajectory = trajectory + self.step_id = step_id + self.logger = logger or logging.getLogger(__name__) + + # Session manager โ€” auto-determined from AI_CLI_CMD. + # project_dir must match the subprocess cwd (workspace root == REPO_DIR) + # so that Claude CLI's session file path + # (~/.claude/projects//) can be correctly located by + # the session manager. + self._session_manager = create_session_manager( + agent_type=detect_agent_type(), + project_dir=_REPO_DIR, + trace_filename_builder=self._build_trace_filename, + logger=self.logger, + ) + + # Internal call tracking + self._call_counter = 0 + self._call_history: List[LLMCallRecord] = [] + + def set_trajectory(self, trajectory: Any, step_id: int = None) -> None: + """Set or update the trajectory for recording LLM calls. + + Args: + trajectory: Trajectory instance + step_id: Current step ID (optional, can be set later) + """ + self.trajectory = trajectory + if step_id is not None: + self.step_id = step_id + + def set_step_id(self, step_id: int) -> None: + """Set the current step ID for trajectory recording.""" + self.step_id = step_id + + # ==================================================================== + # Session tracer helpers + # ==================================================================== + + def _build_trace_filename(self, purpose: str) -> str: + """Build a semantically meaningful filename for the captured session. + + Format: ``--.jsonl`` + + Date-first layout ensures ``ls`` / ``sort`` orders files chronologically. + + * ``step_name`` is taken from the current trajectory step (if any). + * ``purpose`` is the value passed to ``generate()`` / ``call_structured()``. + * The timestamp is the completion time (now). + """ + parts: List[str] = [] + + # Try to get step name from trajectory + if self.trajectory and self.step_id is not None: + try: + step = self.trajectory.get_step(self.step_id) + if step and step.name: + parts.append(step.name) + except Exception: + pass + + # Append purpose (avoid duplicating the step name) + if purpose and purpose not in parts: + parts.append(purpose) + + if not parts: + parts.append("llm_call") + + ts = datetime.now().strftime("%Y%m%d-%H%M%S") + safe = "-".join(parts) + # Sanitise: keep alphanumerics, hyphens, underscores + safe = re.sub(r"[^\w\-]", "_", safe) + return f"{ts}-{safe}.jsonl" + + def generate( + self, + prompt: str, + purpose: str = "general", + max_retries: int = 3, + timeout: Optional[int] = 1800, + metadata: Dict[str, Any] = None + ) -> str: + """Generate response from LLM. + + Args: + prompt: The prompt to send to the LLM + purpose: Description of the purpose of this call (for trajectory) + max_retries: Number of retry attempts + timeout: Timeout in seconds + metadata: Additional metadata to store with the call record + + Returns: + Response text from LLM + + Raises: + RuntimeError: If LLM call fails after all retries + """ + # Create call record + self._call_counter += 1 + call_record = LLMCallRecord( + call_id=self._call_counter, + timestamp=datetime.now().isoformat(), + purpose=purpose, + prompt=prompt, + metadata=metadata or {} + ) + + # Record interaction start in trajectory + interaction_id = None + if self.trajectory and self.step_id is not None: + try: + interaction_id = self.trajectory.start_llm_interaction( + self.step_id, + purpose, + prompt + ) + except Exception as e: + self.logger.warning(f"Failed to record LLM interaction start: {e}") + + start_time = time.time() + response = None + error = None + + with self._session_manager.trace(prompt, purpose=purpose) as trace_ctx: + for attempt in range(max_retries): + try: + self.logger.debug(f"Calling LLM (attempt {attempt + 1})") + + # Build command with any extra args from session manager. + # On retries, refresh_for_retry() regenerates any + # single-use tokens (e.g., Claude's --session-id) + # and resets stdin for re-reading the prompt. + if attempt > 0: + trace_ctx.refresh_for_retry() + cmd = shlex.split(self.tool) + trace_ctx.extra_args + + # Sub-agent runs in the project repo directory. + # start_new_session=True puts the child in its own process + # group so killpg kills the whole tree on parent exit. + # preexec_fn=_set_pdeathsig handles the SIGKILL case via + # PR_SET_PDEATHSIG (kernel sends SIGTERM to child on parent death). + proc = subprocess.Popen( + cmd, + stdin=trace_ctx.stdin, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + env=trace_ctx.env, + cwd=_REPO_DIR, + start_new_session=True, + preexec_fn=_set_pdeathsig, + ) + try: + stdout, stderr = proc.communicate(timeout=timeout) + except BaseException: + # Kill the entire process group (agent + any pytest children) + try: + _os.killpg(_os.getpgid(proc.pid), _signal.SIGTERM) + except Exception: + proc.kill() + proc.wait() + raise + result = subprocess.CompletedProcess( + cmd, proc.returncode, stdout, stderr + ) + + if result.returncode != 0: + error = f"LLM call failed with return code {result.returncode}: {result.stderr}" + self.logger.warning(error) + continue + + response = result.stdout.strip() + if response: + break + else: + error = "LLM returned empty response" + self.logger.warning(error) + + except subprocess.TimeoutExpired: + error = f"LLM call timed out after {timeout}s" + self.logger.warning(f"LLM call timed out (attempt {attempt + 1})") + except Exception as e: + error = str(e) + self.logger.warning(f"LLM call error: {e}") + + # Session trace captured automatically by context manager + captured_path = trace_ctx.captured_path + + duration = time.time() - start_time + + # Update call record + call_record.response = response + call_record.duration_seconds = duration + call_record.success = response is not None + call_record.error = error if not response else None + if captured_path: + call_record.metadata["session_trace"] = str( + captured_path.relative_to(self._INFERRED_PROJECT_DIR) + ) + + # Store in history + self._call_history.append(call_record) + + # Record interaction completion in trajectory + if self.trajectory and self.step_id is not None and interaction_id is not None: + try: + self.trajectory.complete_llm_interaction( + self.step_id, + interaction_id, + response=response or "", + parsed_result=None, + success=response is not None, + error=error if not response else None, + duration_seconds=duration + ) + except Exception as e: + self.logger.warning(f"Failed to record LLM interaction completion: {e}") + + if not response: + raise RuntimeError(f"Failed to get response from LLM after {max_retries} retries: {error}") + + return response + + def generate_with_record( + self, + prompt: str, + purpose: str = "general", + max_retries: int = 3, + timeout: Optional[int] = 1800, + metadata: Dict[str, Any] = None + ) -> Tuple[str, LLMCallRecord]: + """Generate response from LLM and return both response and call record. + + This is useful when you need access to the full call record for + custom processing or when not using trajectory. + + Returns: + Tuple of (response_text, call_record) + """ + initial_count = len(self._call_history) + response = self.generate( + prompt=prompt, + purpose=purpose, + max_retries=max_retries, + timeout=timeout, + metadata=metadata + ) + # Get the call record that was just added + if len(self._call_history) > initial_count: + call_record = self._call_history[-1] + else: + # Create a minimal record if something went wrong + call_record = LLMCallRecord( + call_id=self._call_counter, + timestamp=datetime.now().isoformat(), + purpose=purpose, + prompt=prompt, + response=response, + success=True + ) + return response, call_record + + # ==================================================================== + # Memory-based generation (for Encoder / RPGAgent) + # ==================================================================== + + @staticmethod + def _flatten_memory(memory: Memory) -> str: + """Flatten Memory messages into a single prompt string. + + Converts the multi-turn conversation stored in Memory into a single + prompt suitable for CLI subprocess invocation. System messages are + placed first without role prefix; user/assistant messages are + clearly delimited with role headers. + """ + parts = [] + for msg in memory.history: + if msg.role == "system": + parts.append(msg.content) + elif msg.role == "user": + parts.append(f"\n[User]\n{msg.content}") + elif msg.role == "assistant": + parts.append(f"\n[Assistant]\n{msg.content}") + return "\n".join(parts) + + def generate_with_memory( + self, + memory: Memory, + purpose: str = "general", + max_retries: int = 3, + timeout: Optional[int] = 1800, + metadata: Dict[str, Any] = None, + ) -> Optional[str]: + """Generate from a Memory object by flattening to a single prompt. + + Encoder and RPGAgent use Memory to manage multi-turn conversations. + This method flattens the message sequence into a single prompt + string, then invokes the CLI subprocess via ``generate()``. + + Differences from ``generate()``: + - Input: Memory object (multiple messages) instead of a single string + - Return: ``Optional[str]`` (None on failure) instead of raising + + Args: + memory: Memory instance with SystemMessage / UserMessage / + AssistantMessage entries. + purpose, max_retries, timeout, metadata: Same as ``generate()``. + + Returns: + LLM response text, or None if all retries failed. + """ + prompt = self._flatten_memory(memory) + try: + return self.generate( + prompt=prompt, + purpose=purpose, + max_retries=max_retries, + timeout=timeout, + metadata=metadata, + ) + except RuntimeError: + return None + + @property + def last_usage(self) -> Dict[str, int]: + """Token usage stub (CLI subprocess cannot track tokens).""" + return {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0} + + def generate_and_record_parsed( + self, + prompt: str, + purpose: str = "general", + max_retries: int = 3, + timeout: Optional[int] = 1800, + metadata: Dict[str, Any] = None, + parsed_result: Dict[str, Any] = None + ) -> str: + """Generate response from LLM and update the record with parsed result. + + Use this when you parse the response after receiving it and want + to record the parsed result in the trajectory. + + Args: + prompt: The prompt to send to the LLM + purpose: Description of the purpose of this call + parsed_result: Parsed result to store (can be set after parsing) + + Returns: + Response text from LLM + """ + response = self.generate( + prompt=prompt, + purpose=purpose, + max_retries=max_retries, + timeout=timeout, + metadata=metadata + ) + + # Update the last call record with parsed result + if self._call_history and parsed_result: + self._call_history[-1].parsed_result = parsed_result + + # Also update trajectory if available + if self.trajectory and self.step_id is not None: + # Find the interaction and update it + step = self.trajectory.get_step(self.step_id) + if step and step.llm_interactions: + # Update the last interaction's parsed_result + step.llm_interactions[-1].parsed_result = parsed_result + self.trajectory.save() + + return response + + def update_last_parsed_result(self, parsed_result: Dict[str, Any]) -> None: + """Update the parsed result for the last LLM call. + + Call this after parsing the LLM response to store the parsed + result in both the call history and trajectory. + """ + if self._call_history: + self._call_history[-1].parsed_result = parsed_result + + if self.trajectory and self.step_id is not None: + step = self.trajectory.get_step(self.step_id) + if step and step.llm_interactions: + step.llm_interactions[-1].parsed_result = parsed_result + self.trajectory.save() + + def parse_result_json(self, response: str) -> Optional[Dict[str, Any]]: + """Extract JSON block from response. + + Includes resilient JSON repair for common LLM output errors: + trailing commas, Python literals (True/False/None), missing commas, + and newlines inside strings. + + Args: + response: LLM response text + + Returns: + Parsed JSON dict or None if not found/invalid + """ + match = re.search(r"\s*(.*?)\s*", response, re.DOTALL) + if not match: + if "" in response: + # found but no โ€” likely truncated response + self.logger.warning( + "Found but no - response may be truncated" + ) + start = response.find("") + len("") + raw = response[start:].strip() + # Use brace counting to find where JSON ends + brace_count = 0 + json_end = -1 + for i, char in enumerate(raw): + if char == "{": + brace_count += 1 + elif char == "}": + brace_count -= 1 + if brace_count == 0: + json_end = i + 1 + break + if json_end > 0: + json_str = raw[:json_end] + else: + self.logger.error("Could not find valid JSON in truncated response") + return None + else: + # Fallback: try to extract JSON object directly (no result_json tags) + self.logger.debug( + "No block found, trying to extract JSON directly..." + ) + json_match = re.search( + r'\{\s*"thinking".*?\}(?=\s*$|\s*```)', response, re.DOTALL + ) + if not json_match: + json_match = re.search( + r'\{\s*"summary".*?\}(?=\s*$|\s*```)', response, re.DOTALL + ) + if json_match: + json_str = json_match.group(0) + else: + return None + else: + json_str = match.group(1).strip() + + # Clean up markdown code blocks if present + json_str = re.sub(r'^```json?\s*', '', json_str) + json_str = re.sub(r'\s*```$', '', json_str) + + # First attempt: try parsing as-is + try: + data = json.loads(json_str) + # Extract parameters if wrapped + if "parameters" in data: + return data["parameters"] + return data + except json.JSONDecodeError as first_err: + self.logger.debug(f"First parse attempt failed: {first_err}") + + # ----- JSON repair pass ----- + # Remove trailing commas before closing brackets + json_str = re.sub(r",\s*}", "}", json_str) + json_str = re.sub(r",\s*]", "]", json_str) + + # Fix Python literals -> JSON + json_str = re.sub(r"\bTrue\b", "true", json_str) + json_str = re.sub(r"\bFalse\b", "false", json_str) + json_str = re.sub(r"\bNone\b", "null", json_str) + + # Fix missing commas between key-value pairs + json_str = re.sub( + r'(true|false|null|\d+\.?\d*|"[^"]*"|\]|\})\s*\n\s*"', r'\1,\n"', json_str + ) + json_str = re.sub( + r'(true|false|null|\d+\.?\d*|"[^"]*"|\]|\})\s+"', r'\1, "', json_str + ) + + # Second attempt after basic repair + try: + data = json.loads(json_str) + if "parameters" in data: + return data["parameters"] + return data + except json.JSONDecodeError as second_err: + self.logger.debug(f"Second parse attempt failed: {second_err}") + + # Third attempt: handle newlines inside strings by joining split lines + lines = json_str.split("\n") + cleaned_lines = [] + in_string = False + current_line = "" + + for line in lines: + quote_count = 0 + i = 0 + while i < len(line): + if line[i] == '"' and (i == 0 or line[i - 1] != "\\"): + quote_count += 1 + i += 1 + + if in_string: + current_line += " " + line.strip() + else: + if current_line: + cleaned_lines.append(current_line) + current_line = line + + if quote_count % 2 == 1: + in_string = not in_string + + if current_line: + cleaned_lines.append(current_line) + + json_str = "\n".join(cleaned_lines) + + try: + data = json.loads(json_str) + if "parameters" in data: + return data["parameters"] + return data + except json.JSONDecodeError as e: + self.logger.warning(f"JSON string: {json_str[:500]}") + self.logger.error(f"JSON parse failed after all repair attempts: {e}") + return None + + def parse_json_block(self, response: str) -> Optional[Dict[str, Any]]: + """Extract JSON from response (looks for ```json blocks or raw JSON). + + Args: + response: LLM response text + + Returns: + Parsed JSON dict or None if not found/invalid + """ + # Try ```json blocks first + match = re.search(r"```json\s*(.*?)\s*```", response, re.DOTALL) + if match: + try: + return json.loads(match.group(1).strip()) + except json.JSONDecodeError: + pass + + # Try raw JSON (find first { to last }) + try: + start = response.find('{') + end = response.rfind('}') + if start >= 0 and end > start: + return json.loads(response[start:end+1]) + except json.JSONDecodeError: + pass + + return None + + def call_structured( + self, + system_prompt: str, + user_prompt: str, + response_model: type, + max_retries: int = 3, + purpose: str = "structured_call", + timeout: Optional[int] = 1800, + ) -> Tuple[str, Optional[Any], str]: + """Call LLM and return structured output using a Pydantic model. + + Combines the system and user prompts, sends them to the LLM, + parses the ```` block, applies ``trim_dict_keys`` and + validates against the *response_model*. + + Also extracts the optional ```` block for diagnostics. + + Args: + system_prompt: System prompt for LLM + user_prompt: User prompt for LLM + response_model: Pydantic model class for response validation + max_retries: Number of retry attempts + purpose: Purpose of this call for logging/trajectory + timeout: Timeout in seconds + + Returns: + ``(think_content, validated_model_or_None, raw_response)`` + """ + combined_prompt = f"{system_prompt}\n\n{user_prompt}" + last_think = "" + last_response = "" + + for attempt in range(max_retries): + try: + self.logger.debug(f"LLM structured call attempt {attempt + 1}/{max_retries}") + + # Use generate to get response (with trajectory recording) + response = self.generate( + prompt=combined_prompt, + purpose=purpose, + max_retries=1, # Handle retries at this level + timeout=timeout + ) + + last_response = response + last_think = self.extract_think_block(response) + + # Parse the result_json block + parsed_data = self.parse_result_json(response) + if parsed_data: + result = self.validate_structure(parsed_data, response_model) + if result is not None: + self.update_last_parsed_result(parsed_data) + self.logger.info("[OK] LLM structured call successful") + return last_think, result, response + else: + self.logger.warning( + f"[FAIL] Validation failed (attempt {attempt + 1})" + ) + else: + self.logger.warning( + f"[FAIL] Unable to parse block (attempt {attempt + 1})" + ) + + except subprocess.TimeoutExpired: + self.logger.warning(f"[FAIL] LLM call timeout (attempt {attempt + 1})") + except RuntimeError as e: + self.logger.warning(f"[FAIL] LLM call failed (attempt {attempt + 1}): {e}") + except Exception as e: + self.logger.warning(f"[FAIL] Error (attempt {attempt + 1}): {str(e)[:200]}") + + self.logger.error(f"[FAIL] All {max_retries} attempts failed") + return last_think, None, last_response + + # ==================================================================== + # Utility helpers (think extraction, key trimming, validation) + # ==================================================================== + + @staticmethod + def extract_think_block(response: str) -> str: + """Extract the optional block from the AI response. + + This does not affect result_json parsing โ€” it is only for + logging / analysis. + + Returns: + The text inside โ€ฆ, or "" if not found. + """ + try: + if "" in response and "" in response: + start = response.find("") + len("") + end = response.find("", start) + if end != -1: + return response[start:end].strip() + except Exception: + pass + return "" + + @staticmethod + def trim_dict_keys(d): + """Recursively strip whitespace from all dictionary keys. + + This fixes a common LLM output error where keys contain + leading / trailing spaces. + """ + if isinstance(d, dict): + return { + (k.strip() if isinstance(k, str) else k): LLMClient.trim_dict_keys(v) + for k, v in d.items() + } + if isinstance(d, list): + return [LLMClient.trim_dict_keys(item) for item in d] + return d + + def validate_structure( + self, data: Dict[str, Any], response_model: type + ) -> Optional[Any]: + """Validate parsed data against a Pydantic model. + + Applies ``trim_dict_keys`` before validation so that + whitespace-padded keys produced by the LLM do not cause + spurious failures. + + Args: + data: Parsed dictionary (e.g. from ``parse_result_json``). + response_model: Pydantic ``BaseModel`` subclass. + + Returns: + Validated model instance, or ``None`` on failure. + """ + try: + data = self.trim_dict_keys(data) + result = response_model(**data) + self.logger.info( + f"[OK] Structure validation passed: {type(result).__name__}" + ) + return result + except Exception as e: + self.logger.warning(f"[FAIL] Structure validation failed: {e}") + return None + + def get_call_history(self) -> List[LLMCallRecord]: + """Get all LLM call records.""" + return self._call_history.copy() + + def get_last_call(self) -> Optional[LLMCallRecord]: + """Get the most recent LLM call record.""" + return self._call_history[-1] if self._call_history else None + + def get_call_summary(self) -> Dict[str, Any]: + """Get a summary of all LLM calls.""" + total_duration = sum( + c.duration_seconds or 0 for c in self._call_history + ) + successful = sum(1 for c in self._call_history if c.success) + failed = len(self._call_history) - successful + + return { + "total_calls": len(self._call_history), + "successful_calls": successful, + "failed_calls": failed, + "total_duration_seconds": total_duration, + "purposes": list(set(c.purpose for c in self._call_history)) + } + + def export_call_history(self, filepath: str) -> None: + """Export call history to a JSON file. + + Useful for debugging and analysis when not using trajectory. + """ + data = { + "export_timestamp": datetime.now().isoformat(), + "summary": self.get_call_summary(), + "calls": [c.to_dict() for c in self._call_history] + } + + filepath.parent.mkdir(parents=True, exist_ok=True) + with open(filepath, 'w', encoding='utf-8') as f: + json.dump(data, f, indent=2, ensure_ascii=False) + + def clear_call_history(self) -> None: + """Clear the internal call history.""" + self._call_history.clear() + self._call_counter = 0 diff --git a/RPG-Kit/scripts/common/llm_types.py b/RPG-Kit/scripts/common/llm_types.py new file mode 100644 index 0000000..7bfb9f4 --- /dev/null +++ b/RPG-Kit/scripts/common/llm_types.py @@ -0,0 +1,425 @@ +#!/usr/bin/env python3 +"""LLM Type Definitions for RPG-Kit. + +This module provides unified data structures for LLM interactions, including +messages, responses, token usage tracking, and conversational memory. + +Ported from RPG-ZeroRepo (zerorepo/rpg_gen/base/llm_client/) with adaptations +for RPG-Kit's project structure and coding conventions. + +Key components: +- LLMMessage: Standard message format for LLM interactions +- LLMResponse: Standard LLM response format +- LLMUsage: Token usage tracking +- ToolCall / ToolResult: Tool calling data structures +- Message / UserMessage / SystemMessage / AssistantMessage / ToolMessage: + Higher-level message wrappers with metadata and timestamps +- Memory: Conversational memory with context-window management +""" + +from __future__ import annotations + +import json +import logging +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Any, Dict, List, Optional + +logger = logging.getLogger(__name__) + + +# ============================================================================ +# Tool Call / Result (used by LLMMessage) +# ============================================================================ + +ToolCallArguments = dict[str, Any] +"""Type alias for tool call argument dictionaries.""" + + +@dataclass +class ToolCall: + """A parsed tool call from the model. + + Attributes: + name: The name of the tool to invoke. + call_id: A unique identifier for this call. + arguments: The argument dictionary. + id: Optional cross-provider identifier (e.g. OpenAI-specific). + """ + + name: str + call_id: str + arguments: ToolCallArguments = field(default_factory=dict) + id: str | None = None # OpenAI-specific + + def __str__(self) -> str: + return ( + f"ToolCall(name={self.name}, call_id={self.call_id}, " + f"arguments={self.arguments})" + ) + + +@dataclass +class ToolResult: + """Result of a tool execution. + + Attributes: + call_id: Identifier for the tool call this result corresponds to. + name: The tool name that was invoked. + success: Whether the execution succeeded. + result: Textual result on success. + error: Error message on failure. + id: Optional cross-provider identifier (e.g. OpenAI-specific). + """ + + call_id: str + name: str + success: bool + result: str | None = None + error: str | None = None + id: str | None = None # OpenAI-specific + + +# ============================================================================ +# Core LLM Types +# ============================================================================ + +@dataclass +class LLMMessage: + """Standard message format for LLM interactions. + + Attributes: + role: The message role (system, user, assistant, tool). + content: The text content of the message. + tool_call: Optional parsed tool call from the model. + tool_result: Optional tool execution result. + """ + + role: str + content: str | None = None + tool_call: ToolCall | None = None + tool_result: ToolResult | None = None + + +@dataclass +class LLMUsage: + """Token usage from an LLM call. + + Attributes: + input_tokens: Number of input (prompt) tokens. + output_tokens: Number of output (completion) tokens. + cache_creation_input_tokens: Tokens used to create cache. + cache_read_input_tokens: Tokens read from cache. + reasoning_tokens: Tokens used for reasoning (e.g. o3 models). + """ + + input_tokens: int = 0 + output_tokens: int = 0 + cache_creation_input_tokens: int = 0 + cache_read_input_tokens: int = 0 + reasoning_tokens: int = 0 + + @property + def total_tokens(self) -> int: + """Total tokens (input + output).""" + return self.input_tokens + self.output_tokens + + def __add__(self, other: LLMUsage) -> LLMUsage: + return LLMUsage( + input_tokens=self.input_tokens + other.input_tokens, + output_tokens=self.output_tokens + other.output_tokens, + cache_creation_input_tokens=( + self.cache_creation_input_tokens + + other.cache_creation_input_tokens + ), + cache_read_input_tokens=( + self.cache_read_input_tokens + other.cache_read_input_tokens + ), + reasoning_tokens=self.reasoning_tokens + other.reasoning_tokens, + ) + + def to_dict(self) -> dict[str, int]: + """Serialize to a plain dictionary.""" + return { + "input_tokens": self.input_tokens, + "output_tokens": self.output_tokens, + "total_tokens": self.total_tokens, + "cache_creation_input_tokens": self.cache_creation_input_tokens, + "cache_read_input_tokens": self.cache_read_input_tokens, + "reasoning_tokens": self.reasoning_tokens, + } + + def __str__(self) -> str: + return ( + f"LLMUsage(input_tokens={self.input_tokens}, " + f"output_tokens={self.output_tokens}, " + f"cache_creation_input_tokens={self.cache_creation_input_tokens}, " + f"cache_read_input_tokens={self.cache_read_input_tokens}, " + f"reasoning_tokens={self.reasoning_tokens})" + ) + + +@dataclass +class LLMResponse: + """Standard LLM response format. + + Attributes: + content: The text content of the response. + usage: Token usage information. + model: The model that generated the response. + finish_reason: Why the model stopped generating (e.g. "stop", "tool_use"). + tool_calls: List of tool calls requested by the model. + """ + + content: str + usage: LLMUsage | None = None + model: str | None = None + finish_reason: str | None = None + tool_calls: list[ToolCall] | None = None + + +# ============================================================================ +# High-Level Message Wrappers +# ============================================================================ + +@dataclass +class Message: + """General message structure for LLM conversations. + + Extends LLMMessage with metadata and timestamps for richer + conversation tracking. + + Roles: system / user / assistant / tool + + Attributes: + role: The message role. + content: The text content. + name: Optional sender name. + tool_call: Optional tool call from the model. + tool_result: Optional tool execution result. + metadata: Arbitrary metadata dictionary. + timestamp: ISO-format timestamp (auto-generated). + """ + + role: str + content: str + name: Optional[str] = None + tool_call: Optional[ToolCall] = None + tool_result: Optional[ToolResult] = None + metadata: Dict[str, Any] = field(default_factory=dict) + timestamp: str = field( + default_factory=lambda: datetime.now(timezone.utc).isoformat() + ) + + def to_dict(self) -> Dict[str, Any]: + """Serialize to a plain dictionary (role + content only).""" + data: Dict[str, Any] = {"role": self.role, "content": self.content} + if self.name: + data["name"] = self.name + return data + + def to_llm_message(self) -> LLMMessage: + """Convert to LLMMessage for provider chat() calls.""" + return LLMMessage( + role=self.role, + content=self.content, + tool_call=self.tool_call, + tool_result=self.tool_result, + ) + + +@dataclass +class UserMessage(Message): + """Convenience wrapper for user messages.""" + + def __init__(self, content: str, name: Optional[str] = None, **meta: Any): + super().__init__(role="user", content=content, name=name, metadata=meta) + + +@dataclass +class SystemMessage(Message): + """Convenience wrapper for system messages.""" + + def __init__(self, content: str, **meta: Any): + super().__init__(role="system", content=content, metadata=meta) + + +@dataclass +class AssistantMessage(Message): + """Convenience wrapper for assistant messages.""" + + def __init__(self, content: str, **meta: Any): + super().__init__(role="assistant", content=content, metadata=meta) + + +@dataclass +class ToolMessage(Message): + """Convenience wrapper for tool result messages.""" + + def __init__( + self, + content: str, + tool_result: Optional[ToolResult] = None, + **meta: Any, + ): + super().__init__( + role="tool", + content=content, + tool_result=tool_result, + metadata=meta, + ) + + +# ============================================================================ +# Conversational Memory +# ============================================================================ + +class Memory: + """General-purpose conversational memory for LLM agents. + + Keeps a full ``_history`` of ``Message`` objects and exposes a + context-limited ``.history`` property. ``to_llm_messages()`` returns + ``list[LLMMessage]`` ready for provider ``chat()`` calls. + + Args: + context_window: Number of message pairs (user + assistant) to include + in active context. If <= 0, no limit is applied. + """ + + def __init__(self, context_window: int = 5): + self._history: List[Message] = [] + self.context_window = context_window + + # ---------------------------------------------------------------- + # Message Management + # ---------------------------------------------------------------- + + def add_message(self, message: Message) -> None: + """Add a ``Message`` instance to memory.""" + self._history.append(message) + + def add(self, role: str, content: str) -> None: + """Quickly add a plain message without creating the object manually.""" + self.add_message(Message(role=role, content=content)) + + def last(self, role: Optional[str] = None) -> Optional[Message]: + """Return the most recent message, optionally filtered by role.""" + if not self._history: + return None + if role: + for m in reversed(self._history): + if m.role == role: + return m + return self._history[-1] + + # ---------------------------------------------------------------- + # Context Handling + # ---------------------------------------------------------------- + + def keep_message_window( + self, messages: List[Message] + ) -> List[Message]: + """Return a context-trimmed view of messages. + + Keeps: + - the first system message (if any) + - the most recent N * 2 dialogue messages (user/assistant) + - the last user message (if exists) + """ + if not messages: + return [] + + has_system = messages[0].role == "system" + context_limit = ( + 2 * self.context_window if self.context_window > 0 else 0 + ) + + last_message = ( + messages[-1] if messages[-1].role == "user" else None + ) + + start_index = 1 if has_system else 0 + context_messages = ( + messages[start_index:-1] if last_message + else messages[start_index:] + ) + context_messages = ( + context_messages[-context_limit:] if context_limit else [] + ) + + result: List[Message] = [] + if has_system: + result.append(messages[0]) + result.extend(context_messages) + if last_message: + result.append(last_message) + return result + + @property + def history(self) -> List[Message]: + """Expose trimmed history.""" + return self.keep_message_window(self._history) + + def to_llm_messages(self) -> List[LLMMessage]: + """Return history as ``list[LLMMessage]`` for provider ``chat()`` calls.""" + return [ + m.to_llm_message() + for m in self.keep_message_window(self._history) + if m.role in ("system", "user", "assistant", "tool") + ] + + def to_messages(self) -> List[Dict[str, str]]: + """Return history as a list of message dicts (backward compatible).""" + return [ + {"role": m.role, "content": m.content} + for m in self.keep_message_window(self._history) + if m.role in ("system", "user", "assistant") + ] + + # ---------------------------------------------------------------- + # Persistence + # ---------------------------------------------------------------- + + def snapshot(self) -> Dict[str, Any]: + """Return a serializable snapshot of memory.""" + return {"history": [m.__dict__ for m in self._history]} + + def load_snapshot(self, data: Dict[str, Any]) -> None: + """Restore memory from snapshot data.""" + self._history = [Message(**h) for h in data.get("history", [])] + + def save_to_file(self, path: str) -> None: + """Save full memory to disk.""" + with open(path, "w", encoding="utf-8") as f: + json.dump(self.snapshot(), f, ensure_ascii=False, indent=2) + + def load_from_file(self, path: str) -> None: + """Load full memory from file.""" + with open(path, "r", encoding="utf-8") as f: + data = json.load(f) + self.load_snapshot(data) + + # ---------------------------------------------------------------- + # Maintenance + # ---------------------------------------------------------------- + + def clear_memory(self) -> None: + """Completely clear the stored conversation history.""" + self._history.clear() + + # ---------------------------------------------------------------- + # Display / Debug + # ---------------------------------------------------------------- + + def show(self, n: int = 10) -> None: + """Print the latest messages (untrimmed).""" + logger.info("Memory Snapshot:") + for m in self._history[-n:]: + logger.info("[%s] %s: %s", m.timestamp, m.role, m.content) + + def to_dict(self) -> Dict[str, Any]: + """Return the entire memory as a serializable Python dict.""" + return { + "context_window": self.context_window, + "history": [m.__dict__ for m in self._history], + } diff --git a/RPG-Kit/scripts/common/logging_setup.py b/RPG-Kit/scripts/common/logging_setup.py new file mode 100644 index 0000000..b931815 --- /dev/null +++ b/RPG-Kit/scripts/common/logging_setup.py @@ -0,0 +1,131 @@ +"""Centralized logging configuration for RPG-Kit scripts. + +All scripts that produce non-trivial work should call +:func:`setup_file_logging` once in their ``main()`` so that logs are +captured to ``/.rpgkit/logs/.log`` for later inspection. + +Design goals +------------ +* **Idempotent** โ€” calling the function multiple times in one process + (e.g. when scripts import each other and each runs ``main``) does not + produce duplicated log lines or duplicate file handlers. +* **Console-friendly** โ€” does not change console handlers; scripts that + output ``--json`` to stdout still work. Console verbosity is each + script's own decision; this helper only attaches a *file* handler. +* **Symlink-safe** โ€” the log directory comes from ``common.paths``, + which already resolves the workspace root correctly when + ``.rpgkit/scripts`` is a symlink in dev workflows. +* **Non-blocking on read-only filesystems** โ€” if ``LOGS_DIR`` cannot be + created or written to (e.g. CI, container, sandbox), the helper logs + one warning to stderr and returns ``None`` instead of raising; the + caller's business logic is never blocked by a log-setup failure. + +Typical usage +------------- +:: + + from common.logging_setup import setup_file_logging + + def main(): + setup_file_logging("rpg_edit") # โ†’ .rpgkit/logs/rpg_edit.log + # โ€ฆ rest of script โ€ฆ + +The single ``log_name`` argument is the file *stem*; the ``.log`` +extension is added automatically. +""" + +from __future__ import annotations + +import logging +import sys +from pathlib import Path +from typing import Optional, Set + +from .paths import LOGS_DIR + +_DEFAULT_FORMAT = "%(asctime)s [%(name)s] %(levelname)s: %(message)s" +_DEFAULT_DATEFMT = "%Y-%m-%d %H:%M:%S" + +# Track which absolute log-file paths already have a file handler attached +# in *this* process so repeated calls in the same Python interpreter (e.g. +# scripts importing each other) don't duplicate handlers. +_ATTACHED: Set[Path] = set() + + +def setup_file_logging( + log_name: str, + *, + level: int = logging.DEBUG, + fmt: str = _DEFAULT_FORMAT, + datefmt: str = _DEFAULT_DATEFMT, + logs_dir: Optional[Path] = None, +) -> Optional[Path]: + """Attach a file handler that writes script logs under ``.rpgkit/logs/``. + + The root logger is reconfigured (only its level, never its existing + handlers) so the new file handler actually receives records at + ``level``. Existing handlers (e.g. the console handler set by + ``logging.basicConfig``) are left untouched. + + Args: + log_name: Stem for the log file (``"rpg_edit"`` โ†’ + ``.rpgkit/logs/rpg_edit.log``). + level: Minimum level the file handler captures. Defaults to + ``DEBUG`` so verbose runs are inspectable after the fact. + fmt: ``logging.Formatter`` format string. + datefmt: ``logging.Formatter`` datefmt string. + logs_dir: Override the destination directory. Defaults to + :data:`common.paths.LOGS_DIR`. Useful only for tests. + + Returns: + Absolute path to the log file that will receive records, or + ``None`` if the helper could not attach a file handler (e.g. + the destination is read-only). In the failure case a single + warning has already been printed to ``stderr`` โ€” the caller + does not need to handle the return value. + """ + target_dir = logs_dir if logs_dir is not None else LOGS_DIR + + try: + target_dir.mkdir(parents=True, exist_ok=True) + except OSError as exc: + print( + f"[rpgkit logging_setup] could not create {target_dir}: {exc}; " + "file logging disabled (console logs unaffected).", + file=sys.stderr, + ) + return None + + log_path = target_dir / f"{log_name}.log" + # Use absolute() instead of resolve() so that workspace symlinks are + # preserved (mirrors common.paths.WORKSPACE_ROOT logic). + resolved = log_path.absolute() + + # Idempotent guard โ€” never attach two file handlers for the same path. + if resolved in _ATTACHED: + return log_path + + try: + file_handler = logging.FileHandler(log_path, mode="a", encoding="utf-8") + except OSError as exc: + print( + f"[rpgkit logging_setup] could not open {log_path}: {exc}; " + "file logging disabled (console logs unaffected).", + file=sys.stderr, + ) + return None + + file_handler.setLevel(level) + file_handler.setFormatter(logging.Formatter(fmt, datefmt=datefmt)) + + root_logger = logging.getLogger() + # Allow records at the requested level through to handlers; if the + # root level is stricter than what we want to capture, lower it. + # Handlers retain their own level filtering, so existing console + # handlers are unaffected. + if root_logger.level == logging.WARNING or root_logger.level > level: + root_logger.setLevel(level) + root_logger.addHandler(file_handler) + + _ATTACHED.add(resolved) + return log_path diff --git a/RPG-Kit/scripts/common/paths.py b/RPG-Kit/scripts/common/paths.py new file mode 100644 index 0000000..5480f1f --- /dev/null +++ b/RPG-Kit/scripts/common/paths.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python3 +"""Centralized Path Definitions. + +This module contains all file path constants used across RPG-Kit scripts. + +Directory layout (workspace == repo): + / โ† user's source repo + RPG-Kit data + โ”œโ”€โ”€ .rpgkit/ โ† scripts, data, state (machine-local) + โ”œโ”€โ”€ .claude/ or .vscode/ โ† agent instructions + โ”œโ”€โ”€ src/ tests/ โ€ฆ โ† project code (user-owned) + โ””โ”€โ”€ .git/ โ† single git repo at the workspace root + +All paths under ``.rpgkit/`` and ``.claude/`` are relative to +``WORKSPACE_ROOT``. ``REPO_DIR`` is an alias for ``WORKSPACE_ROOT`` kept for +backwards-compatibility with call sites that use "project repo root" +phrasing; both refer to the same directory. +""" + +import os +from pathlib import Path + + +# ============================================================================ +# Workspace Root (absolute) +# ============================================================================ +# +# WORKSPACE_ROOT is the directory that contains ``.rpgkit/``. Resolving it +# from ``__file__`` is unreliable in dev workflows where ``.rpgkit/scripts`` +# is a symlink to a shared code repo: Python 3.11+ realpath-normalizes the +# script's ``__file__`` at launch, which silently strips the symlink and +# makes ``WORKSPACE_ROOT`` point at the *code* repo instead of the user's +# workspace โ€” every ``DATA_DIR`` / ``REPO_DIR`` derivation then breaks. +# +# Strategy (in order): +# 1. Walk up from ``cwd`` looking for a ``.rpgkit/`` marker โ€” works for +# all normal invocations (rpgkit slash-commands and git hooks launch +# with cwd at the workspace root). Authoritative when found, even +# if a stale ``RPGKIT_WORKSPACE`` env var inherited from a parent +# process points elsewhere. +# 2. ``RPGKIT_WORKSPACE`` env var โ€” explicit override / fallback +# when cwd doesn't contain ``.rpgkit/`` (e.g. running CLI scripts +# from outside the workspace). +# 3. ``__file__`` fallback โ€” preserves the standard deployment +# layout when neither of the above applies. + +def _find_workspace_root() -> Path: + # Prefer cwd-based detection so subprocesses always see the workspace + # they were launched against, not a stale value inherited from the + # parent process's environment. This matters for git hooks, which + # are spawned by ``git`` (cwd = repo root) from arbitrary parent + # contexts that may have set RPGKIT_WORKSPACE long ago. + cwd = Path.cwd().absolute() + for cand in [cwd, *cwd.parents]: + if (cand / ".rpgkit").is_dir(): + return cand + + env = os.environ.get("RPGKIT_WORKSPACE") + if env: + p = Path(env).absolute() + if p.is_dir(): + return p + + # Last resort: standard deployment layout + # /.rpgkit/scripts/common/paths.py + return Path(__file__).absolute().parent.parent.parent.parent + + +WORKSPACE_ROOT = _find_workspace_root() + + +# ============================================================================ +# Project Repo Directory +# ============================================================================ +# +# Historically the user's code lived at ``/repo/``, with +# a separate inner git repo. That layout has been retired: the +# workspace root **is** the project repo root, so ``REPO_DIR`` and +# ``WORKSPACE_ROOT`` are now aliases for the same directory. Callers +# may prefer one name over the other based on which concept reads +# more naturally at the call site. + +REPO_DIR = WORKSPACE_ROOT + + +# ============================================================================ +# Scripts Directory (absolute, for embedding in next_action messages) +# ============================================================================ + +# Anchor SCRIPTS_DIR to WORKSPACE_ROOT so that paths embedded in +# next_action messages (read by the AI agent) reference the user's +# workspace path โ€” not the symlink target. +SCRIPTS_DIR = WORKSPACE_ROOT / ".rpgkit" / "scripts" +TOOLS_DIR = SCRIPTS_DIR / "tools" + + +def get_scripts_dir() -> str: + """Get the scripts directory path as string for use in next_action messages.""" + return str(SCRIPTS_DIR) + + +# ============================================================================ +# .rpgkit Directory Structure (absolute, derived from WORKSPACE_ROOT) +# ============================================================================ + +RPGKIT_DIR = WORKSPACE_ROOT / ".rpgkit" +DATA_DIR = RPGKIT_DIR / "data" +LOGS_DIR = RPGKIT_DIR / "logs" +COPILOT_LOGS_DIR = LOGS_DIR / "copilot" +CLAUDE_LOGS_DIR = LOGS_DIR / "claude" + + +# ============================================================================ +# Dev Virtual Environment +# ============================================================================ +# +# The codegen pipeline creates an isolated venv under the project repo so +# tests run against an exact dependency set without polluting the user's +# global Python. ``DEV_VENV_NAME`` is the directory name (used in +# ``.gitignore`` patterns); ``DEV_VENV_DIR`` is the absolute path. + +DEV_VENV_NAME = ".venv_dev" +DEV_VENV_DIR = REPO_DIR / DEV_VENV_NAME + + +# ============================================================================ +# Feature Specification & Build (data/ subfolder) +# ============================================================================ + +FEATURE_SPEC_FILE = DATA_DIR / "feature_spec.json" +FEATURE_BUILD_FILE = DATA_DIR / "feature_build.json" +FEATURE_TREE_FILE = DATA_DIR / "feature_tree.json" + + +# ============================================================================ +# Skeleton Files +# ============================================================================ + +SKELETON_FILE = DATA_DIR / "skeleton.json" +SKELETON_SUMMARY_FILE = DATA_DIR / "skeleton_summary.txt" + + +# ============================================================================ +# Data Flow & Interfaces +# ============================================================================ + +DATA_FLOW_FILE = DATA_DIR / "data_flow.json" +DATA_FLOW_VIZ_FILE = DATA_DIR / "data_flow_viz.html" +INTERFACES_FILE = DATA_DIR / "interfaces.json" +BASE_CLASSES_FILE = DATA_DIR / "base_classes.json" + + +# ============================================================================ +# RPG (Repository Program Graph) +# ============================================================================ + +RPG_FILE = DATA_DIR / "rpg.json" +REPO_RPG_FILE = RPG_FILE # Unified: both encoder and decoder use rpg.json +DEP_GRAPH_FILE = DATA_DIR / "dep_graph.json" +REPO_INFO_FILE = DATA_DIR / "repo_info.json" + + +# ============================================================================ +# Task Planning & Execution +# ============================================================================ + +TASKS_FILE = DATA_DIR / "tasks.json" +CODE_GEN_STATE_FILE = DATA_DIR / "code_gen_state.jsonl" + + +# ============================================================================ +# Trajectory & Logging +# ============================================================================ + +TRAJECTORY_DIR = DATA_DIR / "trajectory" + + +# ============================================================================ +# Telemetry (JSONL append-only logs for usage statistics) +# ============================================================================ + +MCP_CALLS_LOG = LOGS_DIR / "mcp_calls.jsonl" +HOOK_CALLS_LOG = LOGS_DIR / "hook_calls.jsonl" +REPORTS_DIR = RPGKIT_DIR / "reports" + + +# ============================================================================ +# Helper Functions +# ============================================================================ + +def ensure_rpgkit_dir() -> Path: + """Ensure .rpgkit/data directory exists and return its path.""" + DATA_DIR.mkdir(parents=True, exist_ok=True) + return DATA_DIR + + +def get_trajectory_file(script_name: str) -> Path: + """Get trajectory file path for a specific script.""" + return TRAJECTORY_DIR / f"{script_name}_trajectory.json" diff --git a/RPG-Kit/scripts/common/project_types.py b/RPG-Kit/scripts/common/project_types.py new file mode 100644 index 0000000..03e144d --- /dev/null +++ b/RPG-Kit/scripts/common/project_types.py @@ -0,0 +1,114 @@ +"""Project-type tokens carried in ``feature_spec.json`` ``meta``. + +The LLM that generates ``feature_spec`` declares which user-facing surfaces +the project exposes. Downstream stages (skeleton design, plan_tasks +UI_POLISH, run_batch sub-agent prompts, rpg_edit visual recon) read these +tokens to decide whether to inject web-specific guidance, GUI tooling, +data-pipeline checks, etc. + +See ``plans/20260508-1-rpgkit-optimization*.md`` ยง B3 for the full design +and acceptance criteria. + +This module is intentionally tiny โ€” no dependency on RPG/dataflow code so +it stays cheap to import from validation utilities. +""" + +from __future__ import annotations + +import logging +from typing import Iterable, List, Tuple + +logger = logging.getLogger(__name__) + + +# 8-token whitelist (UPPERCASE). Multiple may be selected per project. +ALLOWED_PROJECT_TYPES: frozenset[str] = frozenset({ + "WEB", # HTTP endpoints rendering HTML pages for browsers + "API", # JSON / GraphQL endpoints, no HTML rendering + "SERVICE", # long-running daemon / worker / bot / scheduler + "PIPELINE", # batch ETL / DAG / Spark job / ML training (clear start+end) + "CLI", # command-line entry point with subcommands + "GUI", # desktop window with widgets + "GAME", # interactive real-time application with rendering loop + "LIBRARY", # importable package, no end-user interface +}) + +MAX_PROJECT_NOTES_LEN = 500 + + +class ProjectTypesError(ValueError): + """Raised when ``meta.project_types`` cannot be normalized to a non-empty set of whitelisted tokens. Callers (e.g. feature_build_validation) treat this as a hard stop โ€” feature_spec must be regenerated.""" + + +def validate_project_types(meta: dict) -> Tuple[List[str], str]: + """Normalize ``meta.project_types`` and ``meta.project_notes``. + + Behaviour: + * Tokens are upper-cased and trimmed. + * Tokens not in :data:`ALLOWED_PROJECT_TYPES` are dropped with a warning. + * Notes longer than :data:`MAX_PROJECT_NOTES_LEN` are truncated. + * Empty notes log a warning but do not fail. + * Empty/missing ``project_types`` (or all rejected) raises + :class:`ProjectTypesError`. + + Returns: + ------- + ``(types, notes)`` โ€” ``types`` is a deduplicated, alphabetically sorted + list of valid uppercase tokens; ``notes`` is the (possibly truncated) + free-form description. + """ + raw_types: Iterable = meta.get("project_types") or [] + if isinstance(raw_types, str): + # Tolerate "WEB,CLI" string form even though the spec asks for a list. + raw_types = [s for s in raw_types.replace(";", ",").split(",")] + + normalized: List[str] = [] + for tok in raw_types: + if not isinstance(tok, str): + continue + upper = tok.strip().upper() + if upper: + normalized.append(upper) + + accepted = sorted({t for t in normalized if t in ALLOWED_PROJECT_TYPES}) + rejected = sorted({t for t in normalized if t not in ALLOWED_PROJECT_TYPES}) + + if rejected: + logger.warning( + "meta.project_types contains unknown tokens (ignored): %s", + rejected, + ) + + if not accepted: + raise ProjectTypesError( + "meta.project_types must contain at least one of " + f"{sorted(ALLOWED_PROJECT_TYPES)}; got {list(raw_types)!r}. " + "Re-run feature_spec or fix the source documents." + ) + + notes_raw = meta.get("project_notes") + notes = (notes_raw or "").strip() if isinstance(notes_raw, str) else "" + if len(notes) > MAX_PROJECT_NOTES_LEN: + logger.warning( + "meta.project_notes truncated to %d chars (was %d)", + MAX_PROJECT_NOTES_LEN, len(notes), + ) + notes = notes[:MAX_PROJECT_NOTES_LEN] + if not notes: + logger.warning( + "meta.project_notes is empty; downstream prompts will lack context" + ) + + return accepted, notes + + +def has_type(meta: dict, token: str) -> bool: + """Convenience: True iff ``meta.project_types`` includes ``token`` (case- insensitive). Returns False on any validation failure so callers don't need to handle exceptions when probing optional behaviour.""" + target = token.strip().upper() + if target not in ALLOWED_PROJECT_TYPES: + return False + try: + types, _ = validate_project_types(meta) + except Exception: + return False + return target in types diff --git a/RPG-Kit/scripts/common/session_manager.py b/RPG-Kit/scripts/common/session_manager.py new file mode 100644 index 0000000..5a3f411 --- /dev/null +++ b/RPG-Kit/scripts/common/session_manager.py @@ -0,0 +1,496 @@ +#!/usr/bin/env python3 +"""Session Manager Module for RPG-Kit. + +Provides a base class and CLI-specific subclasses for managing AI CLI +sessions: injecting tool-specific CLI arguments, preparing prompt +delivery (stdin vs command-line prompt), and capturing session traces +(e.g. JSONL logs) produced during LLM subprocess calls. + +The primary interface is the ``trace(prompt, purpose)`` context manager: + + manager = create_session_manager("claude", project_dir=project_dir) + with manager.trace(prompt, purpose="code_gen") as ctx: + subprocess.run(cmd + ctx.extra_args, stdin=ctx.stdin, env=ctx.env) + captured = ctx.captured_path # Path | None + +Each subclass encapsulates the convention of a specific CLI tool for +locating, snapshotting, and copying session files. +""" + +from __future__ import annotations + +import logging +import os +import re +import shutil +import tempfile +import uuid +from abc import ABC, abstractmethod +from contextlib import contextmanager +from datetime import datetime +from pathlib import Path +from typing import Any, Callable, Dict, Iterator, List, Optional + +from .paths import CLAUDE_LOGS_DIR, COPILOT_LOGS_DIR + + +logger = logging.getLogger(__name__) + + +# ============================================================================ +# Trace context โ€” the object yielded by the ``trace()`` context manager +# ============================================================================ + +class TraceContext: + """Holds the result of a session-trace capture. + + Attributes: + extra_args: Additional CLI arguments the manager wants injected + into the subprocess command (e.g. ``["--session-id", ""]``). + Populated by ``before()``; the caller should append these to + the command list. + env: Complete environment dict for ``subprocess.run(env=...)``. + Initialised to ``os.environ.copy()`` by the base-class + ``before()``; subclasses may modify it further. + stdin: A file-like object (or *None*) to pass directly as the + ``stdin`` parameter of ``subprocess.run()``. When set by + ``before()``, the caller should use it as-is; when *None*, + the prompt was placed into ``extra_args`` instead and no + stdin redirection is needed. + captured_path: Destination path of the copied session file, + or *None* if nothing was captured. + """ + + def __init__(self) -> None: + self.extra_args: List[str] = [] + self.env: Dict[str, str] = os.environ.copy() + self.stdin: Optional[Any] = None + self.captured_path: Optional[Path] = None + + def reset_stdin(self) -> None: + """Reset stdin read position for retry attempts.""" + if self.stdin is not None and hasattr(self.stdin, "seek"): + self.stdin.seek(0) + + def refresh_for_retry(self) -> None: + """Refresh context for a retry attempt. + + Resets stdin and calls the optional ``_refresh_hook`` set by the + session manager (e.g., to regenerate a session ID that the CLI + tool requires to be unique per invocation). + """ + self.reset_stdin() + if hasattr(self, "_refresh_hook") and self._refresh_hook: + self._refresh_hook(self) + + +# ============================================================================ +# Abstract base class +# ============================================================================ + +class SessionManager(ABC): + """Base class for CLI session managers. + + Subclasses must implement two hooks: + + * ``before(ctx, prompt)`` โ€” called **before** the LLM subprocess call. The + subclass can inject CLI arguments, prepare prompt delivery, snapshot + whatever state it needs (e.g. directory listings, timestamps, + etc.). + * ``after(purpose)`` โ€” called **after** the LLM subprocess call. + The subclass compares the current state against whatever was + saved in ``before()``, performs any copying / archiving, and + returns the destination ``Path`` (or ``None``). + + The ``trace(prompt, purpose)`` context manager wires those two + hooks together so that callers get a clean ``with`` block. + """ + + # Default destination for captured traces. May be relative + # (interpreted under ``project_dir`` by :meth:`_dest_dir`) or + # absolute (used as-is โ€” see e.g. :class:`ClaudeSessionManager`). + DEFAULT_TRAJECTORY_DIR: Path = Path("trajectory") + + def __init__( + self, + project_dir: Path, + trace_filename_builder: Optional[Callable[[str], str]] = None, + logger: Optional[logging.Logger] = None, + ) -> None: + """Args: project_dir: Absolute path to the project root. + + trace_filename_builder: Optional callable ``(purpose) -> filename`` + used to name the copied file. If *None*, a default builder + based on purpose + timestamp is used. + logger: Logger instance. + """ + self.project_dir = project_dir + self.trajectory_dir = self.DEFAULT_TRAJECTORY_DIR + self._build_filename = trace_filename_builder or self._default_filename_builder + self.logger = logger or logging.getLogger(self.__class__.__name__) + + # ------------------------------------------------------------------ + # Abstract interface + # ------------------------------------------------------------------ + + @abstractmethod + def before(self, ctx: TraceContext, prompt: str) -> None: + """Prepare / snapshot state before the LLM call. + + The subclass may populate ``ctx.extra_args`` with CLI flags + that need to be injected into the subprocess command, or + set ``ctx.stdin`` to the prompt content for stdin-based input. + + Args: + ctx: The trace context to populate. + prompt: The prompt text to send to the LLM. Subclasses + decide whether to add it to ``ctx.extra_args`` or + ``ctx.stdin`` based on the CLI tool's conventions. + """ + ... + + @abstractmethod + def after(self, purpose: str) -> Optional[Path]: + """Capture session trace after the LLM call. + + Returns: + Destination path of the captured trace, or *None*. + """ + ... + + # ------------------------------------------------------------------ + # Context manager + # ------------------------------------------------------------------ + + @contextmanager + def trace(self, prompt: str, purpose: str = "general") -> Iterator[TraceContext]: + """Context manager that brackets an LLM call with session tracing. + + Args: + purpose: A short label describing this LLM call. + prompt: The prompt text to send to the LLM. Passed to + ``before()`` so that the subclass can decide how to + deliver it (via ``extra_args`` or ``stdin``). + + Usage:: + + with manager.trace(prompt=my_prompt, purpose="code_gen") as ctx: + cmd = ["cli"] + ctx.extra_args + subprocess.run(cmd, stdin=ctx.stdin, env=ctx.env) + print(ctx.captured_path) + """ + ctx = TraceContext() + self.before(ctx, prompt) + try: + yield ctx + finally: + ctx.captured_path = self.after(purpose) + + # ------------------------------------------------------------------ + # Helpers + # ------------------------------------------------------------------ + + @staticmethod + def _default_filename_builder(purpose: str) -> str: + """Build ``-.jsonl``.""" + ts = datetime.now().strftime("%Y%m%d-%H%M%S") + safe = re.sub(r"[^\w\-]", "_", purpose) if purpose else "llm_call" + return f"{safe}-{ts}.jsonl" + + def _dest_dir(self) -> Path: + """Return (and create) the trajectory destination directory. + + ``trajectory_dir`` may be either: + * relative โ€” interpreted under ``project_dir`` (legacy default); + * absolute โ€” used as-is (e.g. ``CLAUDE_LOGS_DIR`` anchored at + the workspace root via ``common.paths``). + """ + if self.trajectory_dir.is_absolute(): + d = self.trajectory_dir + else: + d = self.project_dir / self.trajectory_dir + d.mkdir(parents=True, exist_ok=True) + return d + + +# ============================================================================ +# Null manager (no-op, used when no CLI-specific handling is needed) +# ============================================================================ + +class NullSessionManager(SessionManager): + """A no-op manager that never captures anything. + + Used when the CLI tool does not produce session files or when + session management is not desired. + """ + + def before(self, ctx: TraceContext, prompt: str) -> None: + pass + + def after(self, purpose: str) -> Optional[Path]: + return None + + +# ============================================================================ +# Claude CLI manager +# ============================================================================ + +class ClaudeSessionManager(SessionManager): + """Captures Claude CLI session JSONL files. + + Uses ``--session-id `` to deterministically locate the session + log file after the subprocess completes, instead of scanning for + new files. + + The Claude CLI writes per-session JSONL logs under:: + + ~/.claude/projects//.jsonl + + where ```` replaces ``/`` and ``_`` with ``-``. + """ + + # Captured traces live under ``.rpgkit/logs/claude/`` so all + # RPG-Kit-managed artefacts stay inside ``.rpgkit/`` (single ignore + # rule, single cleanup target). ``CLAUDE_LOGS_DIR`` is an absolute + # path anchored at the workspace root (see ``common.paths``); the + # base :meth:`_dest_dir` detects this and uses it as-is rather than + # joining under ``project_dir``. + DEFAULT_TRAJECTORY_DIR: Path = CLAUDE_LOGS_DIR + + def __init__( + self, + project_dir: Path, + trace_filename_builder: Optional[Callable[[str], str]] = None, + logger: Optional[logging.Logger] = None, + ) -> None: + super().__init__( + project_dir=project_dir, + trace_filename_builder=trace_filename_builder, + logger=logger, + ) + self._sessions_dir: Optional[Path] = self._get_projects_dir() + # UUID generated by before(), consumed by after() + self._session_id: Optional[str] = None + # Temp file path / handle for prompt stdin; cleaned up in after() + self._tmp_prompt_path: Optional[str] = None + self._tmp_prompt_fh: Optional[Any] = None + + # ------------------------------------------------------------------ + # Claude-specific helpers + # ------------------------------------------------------------------ + + @staticmethod + def encode_path(abs_path: str) -> str: + """Encode an absolute path to the Claude project-directory name. + + Convention:: + + /home/user/My_Project -> -home-user-My-Project + """ + return abs_path.replace("/", "-").replace("_", "-") + + def _get_projects_dir(self) -> Optional[Path]: + """Return ``~/.claude/projects//`` or *None*.""" + claude_base = Path.home() / ".claude" / "projects" + encoded = self.encode_path(str(self.project_dir)) + candidate = claude_base / encoded + return candidate if candidate.is_dir() else None + + # ------------------------------------------------------------------ + # SessionManager interface + # ------------------------------------------------------------------ + + def before(self, ctx: TraceContext, prompt: str) -> None: + """Generate a UUID session-id and inject ``--session-id`` arg. + + Also removes the ``CLAUDECODE`` env var so that nested Claude + Code sessions are allowed. The prompt is written to a temp + file and exposed as ``ctx.stdin``. + """ + self._session_id = str(uuid.uuid4()) + # --dangerously-skip-permissions: required for autonomous sub-agent + # execution in the TDD workflow. The sub-agent must read/write files + # and run pytest without interactive permission prompts. This flag + # should ONLY be used in controlled, single-tenant environments. + ctx.extra_args.extend([ + "-p", "--session-id", self._session_id, + "--dangerously-skip-permissions", + ]) + ctx.env.pop("CLAUDECODE", None) + + # Register a refresh hook so retries get a fresh session ID. + # Claude CLI rejects a session-id that was already used. + ctx._refresh_hook = self._regenerate_session_id + + # Clean up any leftover from a previous call, then write prompt + # to a fresh temp file and open it for reading. + self._cleanup_tmp_prompt() + + fd, tmp_path = tempfile.mkstemp(suffix=".txt", prefix="llm_prompt_") + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + f.write(prompt) + self._tmp_prompt_path = tmp_path + self._tmp_prompt_fh = open(tmp_path, "r", encoding="utf-8") + except Exception: + # Best-effort removal on failure + try: + os.unlink(tmp_path) + except OSError: + pass + self._tmp_prompt_path = None + self._tmp_prompt_fh = None + raise + + ctx.stdin = self._tmp_prompt_fh + + def _regenerate_session_id(self, ctx: TraceContext) -> None: + """Replace the session ID in extra_args with a fresh UUID. + + Called by ``TraceContext.refresh_for_retry()`` before each retry + so that the Claude CLI doesn't reject a reused session ID. + """ + new_id = str(uuid.uuid4()) + # Find and replace the old session-id value in extra_args + args = ctx.extra_args + for i, arg in enumerate(args): + if arg == "--session-id" and i + 1 < len(args): + args[i + 1] = new_id + break + self._session_id = new_id + + def _cleanup_tmp_prompt(self) -> None: + """Close and remove the temporary prompt file, if any.""" + if self._tmp_prompt_fh is not None: + try: + self._tmp_prompt_fh.close() + except Exception: + pass + self._tmp_prompt_fh = None + if self._tmp_prompt_path is not None: + try: + os.unlink(self._tmp_prompt_path) + except OSError: + pass + self._tmp_prompt_path = None + + def after(self, purpose: str) -> Optional[Path]: + """Locate the JSONL by UUID and copy it to the trajectory directory.""" + self._cleanup_tmp_prompt() + + # Lazy-resolve: the projects dir may not exist at __init__ time + # (created by the first claude call), so re-check here. + if self._sessions_dir is None: + self._sessions_dir = self._get_projects_dir() + + if self._sessions_dir is None or self._session_id is None: + return None + + source = self._sessions_dir / f"{self._session_id}.jsonl" + + try: + if not source.exists(): + self.logger.debug( + f"Claude session file not found: {source.name}" + ) + return None + + # Build destination + dest_dir = self._dest_dir() + dest_name = self._build_filename(purpose) + dest = dest_dir / dest_name + + shutil.copy2(source, dest) + + # Also copy companion subagents directory if it exists + subagent_dir = self._sessions_dir / self._session_id / "subagents" + if subagent_dir.is_dir(): + dest_sub = dest_dir / dest_name.replace(".jsonl", "") / "subagents" + if dest_sub.exists(): + shutil.rmtree(dest_sub) + shutil.copytree(subagent_dir, dest_sub) + + self.logger.info( + f"Captured Claude session trace: {source.name} -> {dest}" + ) + return dest + + except Exception as e: + self.logger.warning(f"Failed to capture Claude session trace: {e}") + return None + + +# ============================================================================ +# Copilot CLI manager +# ============================================================================ + +class CopilotSessionManager(SessionManager): + """Session manager for the GitHub Copilot CLI. + + Injects Copilot-specific CLI arguments (``--log-dir``, + ``--log-level``, ``--allow-all``) into ``extra_args`` and appends + the prompt as the final argument. + """ + + def before(self, ctx: TraceContext, prompt: str) -> None: + """Inject Copilot-specific CLI flags and append prompt. + + Adds ``--log-dir``, ``--log-level``, ``--allow-all`` and the + prompt text itself to ``extra_args``. + """ + log_dir = COPILOT_LOGS_DIR + log_dir.mkdir(parents=True, exist_ok=True) + ctx.extra_args.extend([ + "--log-dir", str(log_dir), + "--log-level", "all", + "--allow-all", + "-p", prompt, + ]) + + def after(self, purpose: str) -> Optional[Path]: + """No trace capture yet.""" + return None + + +# ============================================================================ +# Factory +# ============================================================================ + +# Registry mapping agent type names to manager classes. +_MANAGER_REGISTRY: Dict[str, type] = { + "claude": ClaudeSessionManager, + "copilot": CopilotSessionManager, +} + + +def register_manager(agent_type: str, manager_cls: type) -> None: + """Register a custom manager class for an agent type.""" + _MANAGER_REGISTRY[agent_type] = manager_cls + + +def create_session_manager( + agent_type: str, + project_dir: Path, + trace_filename_builder: Optional[Callable[[str], str]] = None, + logger: Optional[logging.Logger] = None, +) -> SessionManager: + """Factory that returns the appropriate ``SessionManager`` subclass based on the agent type. + + Args: + agent_type: Canonical agent name returned by ``detect_agent_type()`` + (e.g. ``"claude"``, ``"copilot"``, ``"unknown"``). + project_dir: Absolute path to the project root. + trace_filename_builder: Optional custom filename builder. + logger: Logger instance. + + Returns: + A ``SessionManager`` subclass instance, or ``NullSessionManager`` if + the agent type is not recognised. Each subclass determines its own + ``trajectory_dir`` via ``DEFAULT_TRAJECTORY_DIR``. + """ + manager_cls = _MANAGER_REGISTRY.get(agent_type, NullSessionManager) + return manager_cls( + project_dir=project_dir, + trace_filename_builder=trace_filename_builder, + logger=logger, + ) diff --git a/RPG-Kit/scripts/common/task_batch.py b/RPG-Kit/scripts/common/task_batch.py new file mode 100644 index 0000000..295396a --- /dev/null +++ b/RPG-Kit/scripts/common/task_batch.py @@ -0,0 +1,196 @@ +#!/usr/bin/env python3 +"""PlannedTask Data Class for RPG-Kit. + +Represents a single planned implementation task. +Each task contains one or more units from a single file to be +implemented together. Multiple tasks may be grouped into one +execution batch at runtime (see file-merge mode). +""" + +import json +from datetime import datetime +from pathlib import Path +from typing import Dict, List, Any, Optional +from dataclasses import dataclass, field + +from rpg import uuid8 + + +@dataclass +class PlannedTask: + """Represents a single planned implementation task. + + This is the core unit of work in the code generation phase. + Each task contains one or more units from a single file that + should be implemented together. At runtime, one or more + PlannedTasks may be grouped into a single execution batch. + """ + task_id: str = field(init=False) + task: str # Task description (GitHub-style) + file_path: str # Target file path + units_key: List[str] # List of unit keys to implement + unit_to_code: Dict[str, str] # Unit key -> interface code + unit_to_features: Dict[str, List] # Unit key -> feature list + priority: int = 0 # Execution priority (lower = earlier) + subtree: str = "" # Subtree/component name + task_type: str = "implementation" # Task type: + # "implementation" - Core code implementation + # "integration_test" - Integration testing + # "final_test_docs" - Final tests and documentation + # "main_entry" - main.py entry point (run test) + # Project file types (after core + main entry): + # "project_requirements" - requirements.txt + # Needs import validation test + # "project_docs" - README.md + # No tests needed + + def __post_init__(self): + """Generate unique task_id and validate inputs.""" + unique_suffix = uuid8() + safe_path = self.file_path.replace('/', '_').replace('\\', '_') + self.task_id = f"{safe_path}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{unique_suffix}" + + # Validation + if not isinstance(self.units_key, list) or not self.units_key: + raise ValueError("PlannedTask validation error: 'units_key' must be a non-empty list.") + + missing_in_code = [k for k in self.units_key if k not in self.unit_to_code] + if missing_in_code: + raise ValueError( + f"PlannedTask validation error: units_key contains keys not present " + f"in unit_to_code: {missing_in_code}" + ) + + # Auto-fill missing unit_to_features keys (informational only) + for k in self.units_key: + if k not in self.unit_to_features: + self.unit_to_features[k] = [] + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary for JSON serialization.""" + return { + "task_id": self.task_id, + "task": self.task, + "file_path": self.file_path, + "units_key": self.units_key, + "unit_to_code": self.unit_to_code, + "unit_to_features": self.unit_to_features, + "priority": self.priority, + "subtree": self.subtree, + "task_type": self.task_type, + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "PlannedTask": + """Create PlannedTask from dictionary.""" + obj = cls( + task=data["task"], + file_path=data["file_path"], + units_key=data["units_key"], + unit_to_code=data["unit_to_code"], + unit_to_features=data["unit_to_features"], + priority=data.get("priority", 0), + subtree=data.get("subtree", ""), + task_type=data.get("task_type", "implementation"), + ) + # Restore original task_id if present + if "task_id" in data: + obj.task_id = data["task_id"] + return obj + + def get_units_summary(self) -> str: + """Get a summary of units in this task.""" + return ", ".join(self.units_key) + + def get_interface_code(self) -> str: + """Get combined interface code for all units.""" + code_parts = [] + for unit_key in self.units_key: + code = self.unit_to_code.get(unit_key, "") + if code: + code_parts.append(f"# {unit_key}\n{code}") + return "\n\n".join(code_parts) + + + + +def load_tasks_from_tasks_json(tasks_path: Path) -> List[PlannedTask]: + """Load all PlannedTask objects from tasks.json file. + + Args: + tasks_path: Path to tasks.json file + + Returns: + List of PlannedTask objects in execution order + """ + if not tasks_path.exists(): + raise FileNotFoundError(f"Tasks file not found: {tasks_path}") + + with open(tasks_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + planned_tasks_dict = data.get("planned_tasks_dict", {}) + subtree_order = data.get("subtree_order", list(planned_tasks_dict.keys())) + + all_tasks = [] + + for subtree in subtree_order: + if subtree not in planned_tasks_dict: + continue + + files_dict = planned_tasks_dict[subtree] + + # Get files in order (if available) or use dict order + for file_path, task_list in files_dict.items(): + for task_data in task_list: + # Skip tasks with empty units_key (e.g., __init__.py package + # files that the LLM planned but have nothing to implement) + if not task_data.get("units_key"): + continue + try: + task = PlannedTask.from_dict(task_data) + all_tasks.append(task) + except Exception as e: + print(f"Warning: Failed to load task from {file_path}: {e}") + + return all_tasks + + +def get_task_by_id(tasks_path: Path, task_id: str) -> Optional[PlannedTask]: + """Get a specific PlannedTask by its task_id. + + Args: + tasks_path: Path to tasks.json file + task_id: The task_id to find + + Returns: + PlannedTask if found, None otherwise + """ + all_tasks = load_tasks_from_tasks_json(tasks_path) + for t in all_tasks: + if t.task_id == task_id: + return t + return None + + +def get_next_pending_task( + tasks_path: Path, + completed_ids: List[str] +) -> Optional[PlannedTask]: + """Get the next task that hasn't been completed yet. + + Args: + tasks_path: Path to tasks.json file + completed_ids: List of already completed task IDs + + Returns: + Next pending PlannedTask, or None if all completed + """ + all_tasks = load_tasks_from_tasks_json(tasks_path) + completed_set = set(completed_ids) + + for t in all_tasks: + if t.task_id not in completed_set: + return t + + return None diff --git a/RPG-Kit/scripts/common/tools.py b/RPG-Kit/scripts/common/tools.py new file mode 100644 index 0000000..7efc80c --- /dev/null +++ b/RPG-Kit/scripts/common/tools.py @@ -0,0 +1,533 @@ +#!/usr/bin/env python3 +"""Tool Abstraction Layer for RPG-Kit. + +This module provides a unified tool abstraction for the RPG Agent, enabling +standardized tool definition, parameter validation, execution, and result handling. + +Ported from RPG-ZeroRepo (zerorepo/rpg_gen/base/tools/) with adaptations for +RPG-Kit's project structure and coding conventions. + +Key components: +- Tool (ABC): Abstract base class for all agent tools +- ToolExecutor: Registry and executor for tool instances +- ToolHandler: Parses LLM text output to extract tool calls +- ToolCall / ToolResult / ToolExecResult: Data types for tool invocation flow +""" + +from __future__ import annotations + +import asyncio +import logging +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Any, Dict, List, Optional, Type, Union + +from pydantic import BaseModel, ValidationError + + +logger = logging.getLogger(__name__) + + +# ============================================================================ +# Error Hierarchy +# ============================================================================ + +class ToolError(Exception): + """Base class for tool-related errors.""" + + +class ToolNotFoundError(ToolError): + """Raised when a requested tool is not registered.""" + + +class ToolValidationError(ToolError): + """Raised when tool arguments fail validation.""" + + +class ToolExecutionError(ToolError): + """Raised when a tool execution encounters an error.""" + + +# ============================================================================ +# Core Data Types +# ============================================================================ + +ToolCallArguments = Dict[str, Any] +"""Type alias for tool call argument dictionaries.""" + + +@dataclass +class ToolExecResult: + """Intermediate result of a tool execution. + + Attributes: + output: The textual output on success. + error: Error message on failure. + error_code: 0 indicates success; non-zero indicates failure. + state: Optional state dict to carry between tool invocations. + """ + output: Optional[str] = None + error: Optional[str] = None + error_code: int = 0 + state: Optional[Dict[str, Any]] = None + + +@dataclass +class ToolResult: + """Final result of a tool call, surfaced by the executor. + + Attributes: + name: The tool name that was invoked. + success: Whether the execution succeeded. + call_id: Identifier for this particular call. + result: Textual result on success. + error: Error message on failure. + id: Optional cross-provider identifier (e.g., OpenAI tool_call id). + state: Optional state dict carried from execution. + """ + name: str + success: bool + call_id: Optional[str] = None + result: Optional[str] = None + error: Optional[str] = None + id: Optional[str] = None + state: Optional[Dict[str, Any]] = None + + +@dataclass +class ToolCall: + """Represents a parsed tool call input. + + Attributes: + name: The tool name to invoke. + call_id: A unique identifier for this call. + arguments: The argument dictionary. + id: Optional cross-provider identifier. + """ + name: str + call_id: str + arguments: ToolCallArguments + id: Optional[str] = None + + def __str__(self) -> str: + return ( + f"ToolCall(name={self.name}, call_id={self.call_id}, " + f"arguments={self.arguments}, id={self.id})" + ) + + def to_dict(self) -> Dict[str, Any]: + """Serialize to a plain dictionary.""" + return { + "call_id": self.call_id, + "name": self.name, + "arguments": self.arguments, + } + + +# ============================================================================ +# Parameter Model Base +# ============================================================================ + +class ToolParameter(BaseModel): + """Abstract base for tool argument models. + + This class is intentionally empty; concrete tools should define their own + Pydantic models by subclassing ``BaseModel`` or this class. Keeping this + around makes it easy to add shared mixins / validators later. + """ + pass + + +# ============================================================================ +# Tool Abstract Base Class +# ============================================================================ + +class Tool(ABC): + """Abstract base for tools / actions with runtime Pydantic validation. + + Each concrete Tool may set ``ParamModel`` to a Pydantic model type. + If ``ParamModel`` is None the raw ``dict`` is passed through to ``execute``. + + Class attributes: + ParamModel: Optional Pydantic model for argument validation. + name: Canonical tool name (must be unique within an executor). + description: Human-readable description of the tool's purpose. + """ + + ParamModel: Optional[Type[BaseModel]] = None + name: str = "" + description: str = "" + + # --- Required metadata --------------------------------------------------- + + @classmethod + def get_name(cls) -> str: + """Return the canonical tool name.""" + return cls.name + + @classmethod + def get_description(cls) -> str: + """Return a concise human-readable description of the tool.""" + return cls.description + + # --- Core execution ------------------------------------------------------ + + @classmethod + @abstractmethod + async def execute( + cls, + arguments: Union[ToolCallArguments, BaseModel], + env: Optional[Any] = None, + **kwargs: Any, + ) -> ToolExecResult: + """Run the tool with validated arguments. + + Implementations can type-narrow ``arguments`` to their custom ``ParamModel``. + """ + + # --- Validation hook ----------------------------------------------------- + + @classmethod + async def check(cls, arguments: ToolCallArguments) -> ToolCallArguments: + """Validate / normalize input arguments using ``ParamModel`` if provided. + + Returns the validated / normalized payload (a dict from the Pydantic model + dump, or the original dict if no ``ParamModel`` is set). + + Raises: + ToolValidationError: If the arguments fail validation. + """ + if cls.ParamModel is None: + return arguments + try: + return cls.ParamModel(**arguments).model_dump() + except ValidationError as exc: + raise ToolValidationError(str(exc)) from exc + + # --- Optional lifecycle hooks -------------------------------------------- + + @classmethod + async def before_execute( + cls, + payload: Union[BaseModel, ToolCallArguments], + env: Optional[Any] = None, + **kwargs: Any, + ) -> None: + """Hook called right before ``execute``. Override if needed.""" + return None + + @classmethod + async def after_execute( + cls, + payload: ToolCallArguments, + result: ToolExecResult, + env: Optional[Any] = None, + **kwargs: Any, + ) -> None: + """Hook called right after ``execute``. Override if needed.""" + return None + + # --- Resource cleanup ---------------------------------------------------- + + @classmethod + async def close(cls) -> None: + """Override to release resources if necessary.""" + return None + + # --- Custom text parsing ------------------------------------------------- + + @classmethod + @abstractmethod + def custom_parse(cls, raw: str) -> Optional[Union[ToolCallArguments, List[ToolCallArguments]]]: + """Parse tool arguments from raw LLM text output. + + Returns: + A single argument dict, a list of argument dicts, or None + if this tool cannot be parsed from the given text. + """ + + +# ============================================================================ +# ToolExecutor โ€” Registration and Invocation +# ============================================================================ + +class ToolExecutor: + """Async executor that manages tool registration, invocation, and shared state. + + Args: + tools: Optional list of Tool classes to register on construction. + max_concurrency: Optional semaphore limit for parallel execution. + """ + + def __init__( + self, + tools: Optional[List[type[Tool]]] = None, + *, + max_concurrency: Optional[int] = None, + ): + self._tool_map: Dict[str, type[Tool]] = {} + if tools: + for tool in tools: + self.register(tool) + self._sem = asyncio.Semaphore(max_concurrency) if max_concurrency else None + + # --- Registration -------------------------------------------------------- + + def register(self, tool: type[Tool]) -> None: + """Register a tool class. + + Raises: + ValueError: If a tool with the same normalized name is already registered. + """ + key = self._normalize_name(tool.name) + if key in self._tool_map: + raise ValueError(f"Tool already registered: {tool.name}") + self._tool_map[key] = tool + + def _normalize_name(self, name: str) -> str: + """Normalize a tool name for case- and underscore-insensitive lookup.""" + return name.lower().replace("_", "") + + @property + def tools(self) -> List[type[Tool]]: + """Return a list of all registered tool classes.""" + return list(self._tool_map.values()) + + def list_tools(self) -> List[str]: + """Return a list of registered tool names.""" + return [t.name for t in self._tool_map.values()] + + # --- Close all tools ----------------------------------------------------- + + async def close(self) -> None: + """Close all registered tools (release resources).""" + await asyncio.gather(*(t.close() for t in self._tool_map.values())) + + # --- Single call --------------------------------------------------------- + + async def execute_tool_call( + self, + tool_call: ToolCall, + env: Optional[Any] = None, + **kwargs: Any, + ) -> ToolResult: + """Execute a single tool call. + + Looks up the tool by normalized name, validates arguments, runs lifecycle + hooks (before_execute / execute / after_execute), and wraps the result. + """ + key = self._normalize_name(tool_call.name) + tool = self._tool_map.get(key) + if not tool: + available = [t.name for t in self._tool_map.values()] + return ToolResult( + name=tool_call.name, + success=False, + error=f"Tool '{tool_call.name}' not found. Available: {available}", + call_id=tool_call.call_id, + id=tool_call.id, + ) + + async def _run() -> ToolResult: + try: + payload: ToolCallArguments = await tool.check(tool_call.arguments) + await tool.before_execute(payload, env, **kwargs) + + exec_result = await tool.execute(payload, env, **kwargs) + await tool.after_execute(payload, exec_result, env, **kwargs) + + return ToolResult( + name=tool_call.name, + success=(exec_result.error_code == 0), + result=exec_result.output, + state=exec_result.state, + error=exec_result.error, + call_id=tool_call.call_id, + id=tool_call.id, + ) + except ToolError as exc: + return ToolResult( + name=tool_call.name, + success=False, + error=str(exc), + call_id=tool_call.call_id, + id=tool_call.id, + ) + except Exception as exc: + logger.exception("Unhandled error in tool '%s'", tool_call.name) + return ToolResult( + name=tool_call.name, + success=False, + error=f"Unhandled error in tool '{tool_call.name}': {exc}", + call_id=tool_call.call_id, + id=tool_call.id, + ) + + if self._sem is None: + return await _run() + async with self._sem: + return await _run() + + # --- Multiple calls ------------------------------------------------------ + + async def parallel_tool_call( + self, + tool_calls: List[ToolCall], + env_params: Optional[List[Any]] = None, + extra_kwargs: Optional[List[Dict[str, Any]]] = None, + ) -> List[ToolResult]: + """Execute multiple tool calls in parallel (concurrency-limited). + + Args: + tool_calls: List of tool calls to execute. + env_params: Per-call environment objects (defaults to None for each). + extra_kwargs: Per-call extra keyword arguments (defaults to empty dict). + """ + count = len(tool_calls) + if env_params is None: + env_params = [None] * count + if extra_kwargs is None: + extra_kwargs = [{}] * count + + tasks = [ + self.execute_tool_call(call, env_param, **kw) + for call, env_param, kw in zip(tool_calls, env_params, extra_kwargs) + ] + return list(await asyncio.gather(*tasks)) + + async def sequential_tool_call( + self, + tool_calls: List[ToolCall], + env_params: Optional[List[Any]] = None, + extra_kwargs: Optional[List[Dict[str, Any]]] = None, + ) -> List[ToolResult]: + """Execute multiple tool calls sequentially. + + Args: + tool_calls: List of tool calls to execute in order. + env_params: Per-call environment objects (defaults to None for each). + extra_kwargs: Per-call extra keyword arguments (defaults to empty dict). + """ + count = len(tool_calls) + if env_params is None: + env_params = [None] * count + if extra_kwargs is None: + extra_kwargs = [{}] * count + + results: List[ToolResult] = [] + for call, env_param, kw in zip(tool_calls, env_params, extra_kwargs): + result = await self.execute_tool_call(call, env_param, **kw) + results.append(result) + return results + + +# ============================================================================ +# ToolHandler โ€” Parse LLM Text Output into ToolCalls +# ============================================================================ + +class ToolHandler: + """Parses LLM text output and matches it against registered tools. + + Each tool defines its own ``custom_parse`` method to extract arguments + from free-form text. The handler iterates over all registered tools, + collects successful parses, validates their arguments, and returns a + list of ``ToolCall`` objects. + + Args: + tools: List of Tool classes to register for parsing. + """ + + def __init__(self, tools: List[type[Tool]]): + self._tool_map: Dict[str, type[Tool]] = { + t.name.lower(): t for t in tools + } + + # --- Parsing ------------------------------------------------------------- + + def parse_and_match_tool(self, llm_output: str) -> List[ToolCall]: + """Try to parse tool calls from LLM output. + + Each registered tool's ``custom_parse`` is invoked. Successful parses + whose arguments pass validation are collected and returned. + + Args: + llm_output: Raw LLM text output. + + Returns: + List of parsed and validated ToolCall objects (may be empty). + """ + all_parsed_tools: List[ToolCall] = [] + + for tool_name, tool in self._tool_map.items(): + try: + parsed_args = tool.custom_parse(llm_output) + if parsed_args is None: + continue + + # Normalize to a list + if not isinstance(parsed_args, list): + parsed_args = [parsed_args] + + for idx, parsed_arg in enumerate(parsed_args): + if not parsed_arg: + continue + if self._validate_arguments(tool, parsed_arg): + all_parsed_tools.append( + ToolCall( + name=tool.name, + call_id=f"call_{tool_name}_idx_{idx + 1}", + arguments=parsed_arg, + ) + ) + except Exception as exc: + logger.warning( + "%s.custom_parse() error: %s", tool_name, exc + ) + + if not all_parsed_tools: + logger.debug("No tool could parse this output.") + + return all_parsed_tools + + # --- Validation ---------------------------------------------------------- + + def _validate_arguments( + self, tool: type[Tool], arguments: Dict[str, Any] + ) -> bool: + """Validate whether the arguments conform to the tool's ParamModel.""" + try: + if tool.ParamModel: + tool.ParamModel(**arguments) + return True + except Exception as exc: + logger.warning( + "Argument validation error (%s): %s", tool.name, exc + ) + return False + + # --- Dynamic registration ------------------------------------------------ + + def register_tool(self, tool: type[Tool]) -> None: + """Dynamically register a new tool.""" + self._tool_map[tool.name.lower()] = tool + + def unregister_tool(self, name: str) -> None: + """Remove a tool by name.""" + self._tool_map.pop(name.lower(), None) + + def list_registered(self) -> List[str]: + """List names of registered tools.""" + return list(self._tool_map.keys()) + + def describe_registered_tools(self) -> str: + """Return descriptions of currently registered tools. + + Useful for displaying available tools to the LLM or for logging. + """ + if not self._tool_map: + return "No tools registered." + + lines = [] + for _name, tool in self._tool_map.items(): + lines.append(tool.description) + return "\n".join(lines) diff --git a/RPG-Kit/scripts/common/trajectory.py b/RPG-Kit/scripts/common/trajectory.py new file mode 100644 index 0000000..1a4ec76 --- /dev/null +++ b/RPG-Kit/scripts/common/trajectory.py @@ -0,0 +1,711 @@ +#!/usr/bin/env python3 +"""Trajectory Recording Module for RPG-Kit. + +This module provides utilities for recording command execution trajectories, +including: +- Step-by-step execution status (pending, in_progress, completed, failed) +- Script invocations with commands and outputs +- LLM interactions (prompts and responses) +- Component/target states +- Resume support for interrupted executions + +Each command (build_feature, refactor_feature, build_skeleton, etc.) +maintains its own trajectory file in .rpgkit/trajectory/ +""" + +import json +import time +import logging +from datetime import datetime +from pathlib import Path +from typing import Dict, Any, List, Optional +from dataclasses import dataclass, field, asdict +from enum import Enum + +from .paths import TRAJECTORY_DIR +from .paths import WORKSPACE_ROOT + + +# ============================================================================ +# Enums and Constants +# ============================================================================ + +class StepStatus(str, Enum): + """Status of a step in the trajectory.""" + PENDING = "pending" + IN_PROGRESS = "in_progress" + COMPLETED = "completed" + FAILED = "failed" + SKIPPED = "skipped" + + +class CommandStatus(str, Enum): + """Status of the overall command execution.""" + NOT_STARTED = "not_started" + IN_PROGRESS = "in_progress" + COMPLETED = "completed" + FAILED = "failed" + + +# ============================================================================ +# Data Classes +# ============================================================================ + +@dataclass +class ScriptCall: + """Record of a script/command invocation.""" + command: str + started_at: str + finished_at: Optional[str] = None + exit_code: Optional[int] = None + stdout: str = "" + stderr: str = "" + + def to_dict(self) -> Dict[str, Any]: + return asdict(self) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "ScriptCall": + return cls(**data) + + +@dataclass +class LLMInteraction: + """Record of a single LLM interaction.""" + interaction_id: int + timestamp: str + purpose: str # e.g., "generate_structure", "assign_features" + prompt: str + response: Optional[str] = None + parsed_result: Optional[Dict[str, Any]] = None + success: bool = False + error: Optional[str] = None + duration_seconds: Optional[float] = None + + def to_dict(self) -> Dict[str, Any]: + return asdict(self) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "LLMInteraction": + return cls(**data) + + +@dataclass +class Step: + """Record of a step in the command execution.""" + step_id: int + name: str + description: str = "" + status: str = StepStatus.PENDING.value + started_at: Optional[str] = None + finished_at: Optional[str] = None + script_calls: List[ScriptCall] = field(default_factory=list) + llm_interactions: List[LLMInteraction] = field(default_factory=list) + error: Optional[str] = None + metadata: Dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> Dict[str, Any]: + return { + "step_id": self.step_id, + "name": self.name, + "description": self.description, + "status": self.status, + "started_at": self.started_at, + "finished_at": self.finished_at, + "script_calls": [sc.to_dict() for sc in self.script_calls], + "llm_interactions": [li.to_dict() for li in self.llm_interactions], + "error": self.error, + "metadata": self.metadata + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "Step": + step = cls( + step_id=data["step_id"], + name=data["name"], + description=data.get("description", ""), + status=data.get("status", StepStatus.PENDING.value), + started_at=data.get("started_at"), + finished_at=data.get("finished_at"), + error=data.get("error"), + metadata=data.get("metadata", {}) + ) + step.script_calls = [ScriptCall.from_dict(sc) for sc in data.get("script_calls", [])] + step.llm_interactions = [LLMInteraction.from_dict(li) for li in data.get("llm_interactions", [])] + return step + + +@dataclass +class TargetState: + """State of a target (component, file, etc.).""" + name: str + status: str = StepStatus.PENDING.value + details: Dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> Dict[str, Any]: + return asdict(self) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "TargetState": + return cls(**data) + + +@dataclass +class ResumePoint: + """Information needed to resume an interrupted execution.""" + step_id: int + step_name: str + target: Optional[str] = None + context: Dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> Dict[str, Any]: + return asdict(self) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "ResumePoint": + return cls(**data) + + +# ============================================================================ +# Main Trajectory Class +# ============================================================================ + +class Trajectory: + """Manages trajectory recording for a single command execution. + + Usage: + traj = Trajectory("build_skeleton") + traj.start() + + step = traj.add_step("generate_structure", "Generate directory structure") + traj.start_step(step.step_id) + + # Record LLM interaction + interaction_id = traj.start_llm_interaction(step.step_id, "generate_structure", prompt) + traj.complete_llm_interaction(step.step_id, interaction_id, response, parsed_result) + + traj.complete_step(step.step_id) + traj.complete() + """ + + def __init__(self, command_name: str, base_dir: Path = None): + """Initialize a trajectory for a command. + + Args: + command_name: Name of the command (e.g., "build_skeleton") + base_dir: Base directory for trajectory files (default: current dir) + """ + self.command_name = command_name + self.base_dir = Path(base_dir) if base_dir else WORKSPACE_ROOT + self.trajectory_dir = self.base_dir / TRAJECTORY_DIR + + # Generate filename with timestamp (human-readable, to seconds) + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + self.trajectory_file = self.trajectory_dir / f"{command_name}_trajectory_{timestamp}.json" + + # Trajectory data + self.status: str = CommandStatus.NOT_STARTED.value + self.started_at: Optional[str] = None + self.finished_at: Optional[str] = None + self.steps: List[Step] = [] + self.targets_state: Dict[str, TargetState] = {} + self.resume_point: Optional[ResumePoint] = None + self.error: Optional[str] = None + self.metadata: Dict[str, Any] = {} + + # Runtime tracking + self._llm_interaction_counter = 0 + self._step_counter = 0 + self.logger = logging.getLogger(__name__) + + # ======================================================================== + # File Operations + # ======================================================================== + + def exists(self) -> bool: + """Check if trajectory file already exists.""" + return self.trajectory_file.exists() + + def load(self) -> bool: + """Load existing trajectory from file. + + Returns: + True if loaded successfully, False if file doesn't exist or is invalid + """ + if not self.trajectory_file.exists(): + return False + + try: + with open(self.trajectory_file, 'r', encoding='utf-8') as f: + data = json.load(f) + + self.status = data.get("status", CommandStatus.NOT_STARTED.value) + self.started_at = data.get("started_at") + self.finished_at = data.get("finished_at") + self.error = data.get("error") + self.metadata = data.get("metadata", {}) + + self.steps = [Step.from_dict(s) for s in data.get("steps", [])] + self.targets_state = { + k: TargetState.from_dict(v) + for k, v in data.get("targets_state", {}).items() + } + + if data.get("resume_point"): + self.resume_point = ResumePoint.from_dict(data["resume_point"]) + + # Restore counters + if self.steps: + self._step_counter = max(s.step_id for s in self.steps) + for step in self.steps: + if step.llm_interactions: + max_id = max(li.interaction_id for li in step.llm_interactions) + self._llm_interaction_counter = max(self._llm_interaction_counter, max_id) + + return True + + except (json.JSONDecodeError, KeyError) as e: + self.logger.warning(f"Failed to load trajectory: {e}") + return False + + def save(self) -> None: + """Save current trajectory to file.""" + self.trajectory_dir.mkdir(parents=True, exist_ok=True) + + data = { + "command": self.command_name, + "status": self.status, + "started_at": self.started_at, + "finished_at": self.finished_at, + "error": self.error, + "metadata": self.metadata, + "steps": [s.to_dict() for s in self.steps], + "targets_state": {k: v.to_dict() for k, v in self.targets_state.items()}, + "resume_point": self.resume_point.to_dict() if self.resume_point else None + } + + with open(self.trajectory_file, 'w', encoding='utf-8') as f: + json.dump(data, f, indent=2, ensure_ascii=False) + + def delete(self) -> bool: + """Delete the trajectory file.""" + if self.trajectory_file.exists(): + self.trajectory_file.unlink() + return True + return False + + # ======================================================================== + # Command Lifecycle + # ======================================================================== + + def start(self, metadata: Dict[str, Any] = None) -> None: + """Start the command execution.""" + self.status = CommandStatus.IN_PROGRESS.value + self.started_at = datetime.now().isoformat() + self.finished_at = None + self.error = None + if metadata: + self.metadata.update(metadata) + self.save() + + def complete(self, metadata: Dict[str, Any] = None) -> None: + """Mark command as successfully completed.""" + self.status = CommandStatus.COMPLETED.value + self.finished_at = datetime.now().isoformat() + self.resume_point = None + if metadata: + self.metadata.update(metadata) + self.save() + + def fail(self, error: str, metadata: Dict[str, Any] = None) -> None: + """Mark command as failed.""" + self.status = CommandStatus.FAILED.value + self.finished_at = datetime.now().isoformat() + self.error = error + if metadata: + self.metadata.update(metadata) + self.save() + + def is_resumable(self) -> bool: + """Check if this trajectory can be resumed.""" + return ( + self.status == CommandStatus.IN_PROGRESS.value and + self.resume_point is not None + ) + + # ======================================================================== + # Step Management + # ======================================================================== + + def add_step(self, name: str, description: str = "", metadata: Dict[str, Any] = None) -> Step: + """Add a new step to the trajectory.""" + self._step_counter += 1 + step = Step( + step_id=self._step_counter, + name=name, + description=description, + metadata=metadata or {} + ) + self.steps.append(step) + self.save() + return step + + def get_step(self, step_id: int) -> Optional[Step]: + """Get a step by its ID.""" + for step in self.steps: + if step.step_id == step_id: + return step + return None + + def get_step_by_name(self, name: str) -> Optional[Step]: + """Get a step by its name.""" + for step in self.steps: + if step.name == name: + return step + return None + + def start_step(self, step_id: int) -> None: + """Mark a step as in progress.""" + step = self.get_step(step_id) + if step: + step.status = StepStatus.IN_PROGRESS.value + step.started_at = datetime.now().isoformat() + self.save() + + def complete_step(self, step_id: int, metadata: Dict[str, Any] = None) -> None: + """Mark a step as completed.""" + step = self.get_step(step_id) + if step: + step.status = StepStatus.COMPLETED.value + step.finished_at = datetime.now().isoformat() + if metadata: + step.metadata.update(metadata) + self.save() + + def fail_step(self, step_id: int, error: str) -> None: + """Mark a step as failed.""" + step = self.get_step(step_id) + if step: + step.status = StepStatus.FAILED.value + step.finished_at = datetime.now().isoformat() + step.error = error + self.save() + + def skip_step(self, step_id: int, reason: str = "") -> None: + """Mark a step as skipped.""" + step = self.get_step(step_id) + if step: + step.status = StepStatus.SKIPPED.value + step.finished_at = datetime.now().isoformat() + if reason: + step.metadata["skip_reason"] = reason + self.save() + + # ======================================================================== + # Script Call Recording + # ======================================================================== + + def record_script_start(self, step_id: int, command: str) -> int: + """Record the start of a script call. + + Returns: + Index of the script call in the step's script_calls list + """ + step = self.get_step(step_id) + if not step: + return -1 + + script_call = ScriptCall( + command=command, + started_at=datetime.now().isoformat() + ) + step.script_calls.append(script_call) + self.save() + return len(step.script_calls) - 1 + + def record_script_end( + self, + step_id: int, + call_index: int, + exit_code: int, + stdout: str = "", + stderr: str = "" + ) -> None: + """Record the completion of a script call.""" + step = self.get_step(step_id) + if step and 0 <= call_index < len(step.script_calls): + sc = step.script_calls[call_index] + sc.finished_at = datetime.now().isoformat() + sc.exit_code = exit_code + sc.stdout = stdout + sc.stderr = stderr + self.save() + + # ======================================================================== + # LLM Interaction Recording + # ======================================================================== + + def start_llm_interaction( + self, + step_id: int, + purpose: str, + prompt: str + ) -> int: + """Record the start of an LLM interaction. + + Returns: + The interaction_id for this interaction + """ + step = self.get_step(step_id) + if not step: + return -1 + + self._llm_interaction_counter += 1 + interaction = LLMInteraction( + interaction_id=self._llm_interaction_counter, + timestamp=datetime.now().isoformat(), + purpose=purpose, + prompt=prompt + ) + step.llm_interactions.append(interaction) + self.save() + return self._llm_interaction_counter + + def complete_llm_interaction( + self, + step_id: int, + interaction_id: int, + response: str, + parsed_result: Dict[str, Any] = None, + success: bool = True, + error: str = None, + duration_seconds: float = None + ) -> None: + """Record the completion of an LLM interaction.""" + step = self.get_step(step_id) + if not step: + return + + for interaction in step.llm_interactions: + if interaction.interaction_id == interaction_id: + interaction.response = response + interaction.parsed_result = parsed_result + interaction.success = success + interaction.error = error + interaction.duration_seconds = duration_seconds + self.save() + return + + # ======================================================================== + # Target State Management + # ======================================================================== + + def set_target_state( + self, + target_name: str, + status: str, + details: Dict[str, Any] = None + ) -> None: + """Set the state of a target (component, file, etc.).""" + self.targets_state[target_name] = TargetState( + name=target_name, + status=status, + details=details or {} + ) + self.save() + + def get_target_state(self, target_name: str) -> Optional[TargetState]: + """Get the state of a target.""" + return self.targets_state.get(target_name) + + def update_target_details(self, target_name: str, details: Dict[str, Any]) -> None: + """Update details for a target.""" + if target_name in self.targets_state: + self.targets_state[target_name].details.update(details) + self.save() + + # ======================================================================== + # Resume Point Management + # ======================================================================== + + def set_resume_point( + self, + step_id: int, + step_name: str, + target: str = None, + context: Dict[str, Any] = None + ) -> None: + """Set a resume point for potential recovery.""" + self.resume_point = ResumePoint( + step_id=step_id, + step_name=step_name, + target=target, + context=context or {} + ) + self.save() + + def clear_resume_point(self) -> None: + """Clear the resume point.""" + self.resume_point = None + self.save() + + # ======================================================================== + # Utility Methods + # ======================================================================== + + def get_summary(self) -> Dict[str, Any]: + """Get a summary of the trajectory.""" + completed_steps = sum(1 for s in self.steps if s.status == StepStatus.COMPLETED.value) + failed_steps = sum(1 for s in self.steps if s.status == StepStatus.FAILED.value) + total_llm_interactions = sum(len(s.llm_interactions) for s in self.steps) + total_script_calls = sum(len(s.script_calls) for s in self.steps) + + return { + "command": self.command_name, + "status": self.status, + "started_at": self.started_at, + "finished_at": self.finished_at, + "total_steps": len(self.steps), + "completed_steps": completed_steps, + "failed_steps": failed_steps, + "total_llm_interactions": total_llm_interactions, + "total_script_calls": total_script_calls, + "targets_count": len(self.targets_state), + "is_resumable": self.is_resumable(), + "error": self.error + } + + def print_summary(self) -> None: + """Print a human-readable summary.""" + summary = self.get_summary() + print(f"\n{'='*60}") + print(f"Trajectory: {summary['command']}") + print(f"{'='*60}") + print(f"Status: {summary['status']}") + print(f"Started: {summary['started_at'] or 'N/A'}") + print(f"Finished: {summary['finished_at'] or 'N/A'}") + print(f"Steps: {summary['completed_steps']}/{summary['total_steps']} completed") + if summary['failed_steps'] > 0: + print(f"Failed steps: {summary['failed_steps']}") + print(f"LLM interactions: {summary['total_llm_interactions']}") + print(f"Script calls: {summary['total_script_calls']}") + if summary['is_resumable']: + print(f"[WARNING] Can be resumed from: {self.resume_point.step_name}") + if summary['error']: + print(f"Error: {summary['error']}") + print(f"{'='*60}\n") + + +# ============================================================================ +# Convenience Functions +# ============================================================================ + +def find_latest_trajectory(command_name: str, base_dir: Path = None) -> Optional[Path]: + """Find the most recent trajectory file for a command. + + Returns the path to the latest trajectory file, or None if not found. + """ + base = Path(base_dir) if base_dir else WORKSPACE_ROOT + traj_dir = base / TRAJECTORY_DIR + + if not traj_dir.exists(): + return None + + # Find all trajectory files matching the pattern + pattern = f"{command_name}_trajectory_*.json" + files = list(traj_dir.glob(pattern)) + + if not files: + # Also check for old-style filename (without timestamp) + old_file = traj_dir / f"{command_name}_trajectory.json" + if old_file.exists(): + return old_file + return None + + # Sort by modification time (newest first) + files.sort(key=lambda f: f.stat().st_mtime, reverse=True) + return files[0] + + +def load_or_create_trajectory(command_name: str, base_dir: Path = None) -> Trajectory: + """Load an existing trajectory or create a new one. + + If an in-progress trajectory exists, it will be loaded for potential resume. + Otherwise, a fresh trajectory will be created. + """ + # First, check if there's an existing in-progress trajectory + latest_file = find_latest_trajectory(command_name, base_dir) + + if latest_file: + # Try to load and check if it's resumable + traj = Trajectory(command_name, base_dir) + traj.trajectory_file = latest_file # Override with found file + if traj.load() and traj.is_resumable(): + return traj + + # Create a new trajectory (with timestamp) + return Trajectory(command_name, base_dir) + + +def get_trajectory_status(command_name: str, base_dir: Path = None) -> Dict[str, Any]: + """Get the status of a trajectory without fully loading it.""" + traj = Trajectory(command_name, base_dir) + if traj.load(): + return traj.get_summary() + return {"command": command_name, "status": "not_found"} + + +# ============================================================================ +# Main (for testing) +# ============================================================================ + +if __name__ == "__main__": + # Demo/test usage + import tempfile + + with tempfile.TemporaryDirectory() as tmpdir: + traj = Trajectory("test_command", Path(tmpdir)) + + # Start command + traj.start({"version": "1.0"}) + + # Add and execute steps + step1 = traj.add_step("check_input", "Validate input files") + traj.start_step(step1.step_id) + + # Record script call + call_idx = traj.record_script_start(step1.step_id, "python check.py --json") + traj.record_script_end(step1.step_id, call_idx, 0, '{"valid": true}', "") + + traj.complete_step(step1.step_id) + + # Step with LLM interaction + step2 = traj.add_step("generate", "Generate output") + traj.start_step(step2.step_id) + + interaction_id = traj.start_llm_interaction(step2.step_id, "generate_code", "Write hello world") + traj.complete_llm_interaction( + step2.step_id, + interaction_id, + 'print("Hello, World!")', + {"language": "python"}, + success=True, + duration_seconds=2.5 + ) + + # Set target state + traj.set_target_state("component_a", StepStatus.COMPLETED.value, {"files": 3}) + + traj.complete_step(step2.step_id) + + # Complete command + traj.complete() + + # Print summary + traj.print_summary() + + # Load and verify + traj2 = Trajectory("test_command", Path(tmpdir)) + traj2.load() + print("Loaded trajectory summary:") + traj2.print_summary() diff --git a/RPG-Kit/scripts/common/utils.py b/RPG-Kit/scripts/common/utils.py new file mode 100644 index 0000000..360fff9 --- /dev/null +++ b/RPG-Kit/scripts/common/utils.py @@ -0,0 +1,1619 @@ +#!/usr/bin/env python3 +"""Common Utility Functions. + +This module contains shared utility functions used across multiple scripts: +- Skeleton traversal and formatting functions +- Python code validation functions +- Prompt formatting functions +- Display/printing functions +- Repository info loading functions +- Path normalization and file filtering functions (ported from RPG-ZeroRepo) +- Text / LLM output parsing functions (ported from RPG-ZeroRepo) +- Code skeleton extraction functions (ported from RPG-ZeroRepo) +- AST node range helpers (ported from RPG-ZeroRepo) +""" + +import ast +import json +import logging +import os +import random +import re +from pathlib import Path, PurePosixPath +from typing import Dict, List, Optional, Tuple, Union, Any + +from .paths import SKELETON_FILE, FEATURE_TREE_FILE, FEATURE_SPEC_FILE +from collections import defaultdict +logger = logging.getLogger(__name__) + + +# ============================================================================ +# Repository Info Functions +# ============================================================================ + +def get_repo_info_from_files() -> Tuple[str, str]: + """Load repository info from available files. + + Tries skeleton.json first, then feature_tree.json as backup. + + Returns: + Tuple of (repo_name, repo_info) + """ + repo_name = "project" + repo_info = "" + + # Try skeleton.json first + if SKELETON_FILE.exists(): + try: + with open(SKELETON_FILE, "r", encoding="utf-8") as f: + data = json.load(f) + repo_name = data.get("repository_name", repo_name) + repo_info = data.get("repository_purpose", "") + except Exception: + pass + + # Also check feature_tree.json for backup + if FEATURE_TREE_FILE.exists(): + try: + with open(FEATURE_TREE_FILE, "r", encoding="utf-8") as f: + data = json.load(f) + if not repo_name or repo_name == "project": + repo_name = data.get("repository_name", repo_name) + if not repo_info: + repo_info = data.get("repository_purpose", "") + except Exception: + pass + + return repo_name, repo_info + + +def get_project_background_context( + feature_spec_path=None, +) -> str: + """Load project background and technology context from feature_spec.json. + + Reads ``background_and_overview`` and ``non_functional_requirements`` + from *feature_spec_path* (defaults to ``FEATURE_SPEC_FILE``). + + The returned string is suitable for direct injection into LLM prompts. + Returns an empty string when the file does not exist or contains no + background entries โ€” callers need no special-casing. + + Args: + feature_spec_path: Optional override for the feature_spec.json location. + + Returns: + A formatted multi-line string summarising the project background, or "". + """ + path = Path(feature_spec_path) if feature_spec_path else FEATURE_SPEC_FILE + if not path.exists(): + return "" + + try: + with open(path, "r", encoding="utf-8") as f: + data = json.load(f) + except Exception: + return "" + + parts: List[str] = [] + + # Background & overview (contains technology stack, architecture, etc.) + bg_items = data.get("background_and_overview", []) + if bg_items: + bg_lines: List[str] = [] + for item in bg_items: + title = item.get("title", "") + desc = item.get("description", "") + if desc: + bg_lines.append(f"- **{title}**: {desc}" if title else f"- {desc}") + if bg_lines: + parts.append("### Project Background & Technology") + parts.extend(bg_lines) + + # Non-functional requirements (security, performance, etc.) + nfr_items = data.get("non_functional_requirements", []) + if nfr_items: + nfr_lines: List[str] = [] + for item in nfr_items: + title = item.get("title", "") + desc = item.get("description", "") + if desc: + nfr_lines.append(f"- **{title}**: {desc}" if title else f"- {desc}") + if nfr_lines: + parts.append("") + parts.append("### Non-Functional Requirements") + parts.extend(nfr_lines) + + if not parts: + return "" + + return "\n".join(parts) + "\n" + + +# ============================================================================ +# Tree Traversal Functions +# ============================================================================ + +def get_leaf_name(item) -> str: + """Extract feature name from a leaf node item. + + Supports both old format (str) and new format (dict with "name" key). + + Args: + item: A leaf node item โ€” either a string or a dict like {"name": "...", "description": "..."} + + Returns: + The feature name string + """ + if isinstance(item, dict): + return item.get("name", "") + return str(item) + + +def get_leaf_description(item) -> str: + """Extract description from a leaf node item. + + Args: + item: A leaf node item โ€” either a string or a dict with "description" key + + Returns: + The description string, or empty string if not available + """ + if isinstance(item, dict): + return item.get("description", "") + return "" + + +def get_all_leaf_descriptions(tree: Dict[str, Any], prefix: str = "") -> Dict[str, str]: + """Collect all leaf descriptions as {full_path: description}. + + Args: + tree: Feature tree dictionary + prefix: Current path prefix + + Returns: + Dict mapping full leaf paths to their descriptions + """ + descriptions = {} + if isinstance(tree, dict): + for key, value in tree.items(): + new_prefix = f"{prefix}/{key}" if prefix else key + descriptions.update(get_all_leaf_descriptions(value, new_prefix)) + elif isinstance(tree, list): + for item in tree: + name = get_leaf_name(item) + desc = get_leaf_description(item) + if name and desc: + path = f"{prefix}/{name}" if prefix else name + descriptions[path] = desc + return descriptions + + +def get_all_leaf_paths(tree: Dict[str, Any], prefix: str = "") -> List[str]: + """Get all complete paths to leaf nodes. + + Args: + tree: Feature tree dictionary + prefix: Current path prefix + + Returns: + List of full paths to all leaf nodes + """ + paths = [] + if isinstance(tree, dict): + if not tree: + if prefix: + paths.append(prefix) + else: + for key, value in tree.items(): + new_prefix = f"{prefix}/{key}" if prefix else key + paths.extend(get_all_leaf_paths(value, new_prefix)) + elif isinstance(tree, list): + if not tree: + if prefix: + paths.append(prefix) + else: + for item in tree: + name = get_leaf_name(item) + path = f"{prefix}/{name}" if prefix else name + paths.append(path) + else: + if prefix: + paths.append(prefix) + return paths + + +# ============================================================================ +# Code Analysis Functions +# ============================================================================ + +def extract_class_names(code: str) -> List[str]: + """Extract class names from Python code. + + Args: + code: Python source code string + + Returns: + List of class names found in the code + """ + try: + tree = ast.parse(code) + return [node.name for node in ast.walk(tree) if isinstance(node, ast.ClassDef)] + except SyntaxError: + return [] + + +# ============================================================================ +# Display Functions +# ============================================================================ + +def print_unicode_table(headers: List[str], rows: List[List[Any]], title: str = ""): + """Print a table with Unicode box drawing characters. + + Args: + headers: List of column headers + rows: List of rows, each row is a list of values + title: Optional table title + """ + if not rows: + return + + # Calculate column widths + col_widths = [len(str(h)) for h in headers] + for row in rows: + for i, cell in enumerate(row): + if i < len(col_widths): + col_widths[i] = max(col_widths[i], len(str(cell))) + + # Add padding + col_widths = [w + 2 for w in col_widths] + + # Print title if provided + if title: + print(f"\n {title}") + + # Top border + print(" โ”Œ" + "โ”ฌ".join("โ”€" * w for w in col_widths) + "โ”") + + # Headers + header_row = " โ”‚" + for i, header in enumerate(headers): + header_row += f" {str(header).ljust(col_widths[i] - 1)}โ”‚" + print(header_row) + + # Separator after headers + print(" โ”œ" + "โ”ผ".join("โ”€" * w for w in col_widths) + "โ”ค") + + # Data rows + for idx, row in enumerate(rows): + data_row = " โ”‚" + for i, cell in enumerate(row): + if i < len(col_widths): + data_row += f" {str(cell).ljust(col_widths[i] - 1)}โ”‚" + print(data_row) + + # Add separator between rows (except for last row) + if idx < len(rows) - 1: + print(" โ”œ" + "โ”ผ".join("โ”€" * w for w in col_widths) + "โ”ค") + + # Bottom border + print(" โ””" + "โ”ด".join("โ”€" * w for w in col_widths) + "โ”˜") + + +# ============================================================================ +# Skeleton Utility Functions +# ============================================================================ + +def get_skeleton_tree_string(skeleton: Dict[str, Any], max_depth: int = 3) -> str: + """Generate a tree string representation of the skeleton. + + Args: + skeleton: Skeleton dictionary with nested structure + max_depth: Maximum depth to traverse + + Returns: + Tree-formatted string representation (limited to 50 lines) + """ + lines = [] + + def traverse(node: Dict[str, Any], prefix: str = "", depth: int = 0): + if depth > max_depth: + return + + name = node.get("name", "") + node_type = node.get("type", "") + + if node_type == "directory": + lines.append(f"{prefix}{name}/") + children = node.get("children", []) + for i, child in enumerate(children): + is_last = i == len(children) - 1 + connector = "โ””โ”€โ”€ " if is_last else "โ”œโ”€โ”€ " + child_prefix = prefix + (" " if is_last else "โ”‚ ") + lines.append(f"{prefix}{connector}{child.get('name', '')}") + if child.get("type") == "directory": + traverse(child, child_prefix, depth + 1) + else: + lines.append(f"{prefix}{name}") + + root = skeleton.get("root", skeleton) + traverse(root) + + return "\n".join(lines[:50]) # Limit output + + +def extract_functional_areas_from_skeleton(skeleton: Dict[str, Any]) -> List[str]: + """Extract functional area names from skeleton by analyzing feature paths. + + Args: + skeleton: Skeleton dictionary with file nodes containing feature_paths + + Returns: + Sorted list of unique functional area names + """ + components = set() + + def traverse(node: Dict[str, Any]): + if node.get("type") == "file": + feature_paths = node.get("feature_paths", []) + for fp in feature_paths: + # Component is the first part of the feature path + if "/" in fp: + component = fp.split("/")[0] + components.add(component) + else: + components.add(fp) + elif node.get("type") == "directory": + for child in node.get("children", []): + traverse(child) + + root = skeleton.get("root", skeleton) + traverse(root) + + return sorted(list(components)) + + +def format_functional_graph_overview(skeleton: Dict[str, Any]) -> str: + """Extract a hierarchical functional graph overview from the skeleton. + + Groups feature_paths by component (level 1) and shows unique + sub-feature categories (level 2) under each component, formatted + as an indented tree. + + Example output:: + + Functional Graph Overview: + Expression Processing + โ”œโ”€ output + โ”œโ”€ parsing + โ”œโ”€ representation + โ””โ”€ validation + + Runtime Environment + โ”œโ”€ configuration + โ”œโ”€ constants + โ””โ”€ persistence + + Args: + skeleton: Skeleton dictionary with file nodes containing feature_paths + + Returns: + Formatted tree string + """ + tree: Dict[str, set] = defaultdict(set) + + def traverse(node: Dict[str, Any]): + if node.get("type") == "file": + for fp in node.get("feature_paths", []): + parts = fp.split("/") + component = parts[0] + if len(parts) >= 2: + tree[component].add(parts[1]) + else: + tree[component] # ensure key exists + elif node.get("type") == "directory": + for child in node.get("children", []): + traverse(child) + + root = skeleton.get("root", skeleton) + traverse(root) + + if not tree: + return "(no functional areas found)" + + lines = [] + for component in sorted(tree): + lines.append(component) + subs = sorted(tree[component]) + for i, sub in enumerate(subs): + prefix = "โ””โ”€" if i == len(subs) - 1 else "โ”œโ”€" + lines.append(f" {prefix} {sub}") + lines.append("") # blank line between components + + return "\n".join(lines) + + +def extract_component_directories(skeleton: Dict[str, Any]) -> Dict[str, str]: + """Extract component to directory mapping from skeleton. + + Args: + skeleton: Skeleton dictionary with file nodes + + Returns: + Dict mapping component names to their directory paths + """ + component_dirs = {} + + def traverse(node: Dict[str, Any]): + if node.get("type") == "file": + component = node.get("component", "") + if component: + path = node.get("path", "") + # Get the directory containing this file + if "/" in path: + dir_path = "/".join(path.split("/")[:-1]) + if component not in component_dirs: + component_dirs[component] = dir_path + elif node.get("type") == "directory": + for child in node.get("children", []): + traverse(child) + + root = skeleton.get("root", skeleton) + traverse(root) + + return component_dirs + + +# ============================================================================ +# Code Validation Functions +# ============================================================================ + +def validate_python_syntax(code: str) -> Tuple[bool, str]: + """Validate Python code syntax using AST parser. + + Args: + code: Python source code string + + Returns: + Tuple of (is_valid, error_message) + - is_valid: True if code parses successfully + - error_message: Empty string on success, error details on failure + """ + try: + ast.parse(code) + return True, "" + except SyntaxError as e: + return False, f"Line {e.lineno}, column {e.offset}: {e.msg}" + + +# ============================================================================ +# Prompt Formatting Functions +# ============================================================================ + +def format_data_flow_edges(data_flow: list) -> str: + """Format data flow edges for display in prompts. + + Args: + data_flow: List of edge dicts with source, target, data_type keys + + Returns: + Formatted string representation of data flow edges + """ + if not data_flow: + return "No data flow defined." + + lines = [] + for edge in data_flow: + source = edge.get("source", "") + target = edge.get("target", "") + data_type = edge.get("data_type", "") + lines.append(f" {source} โ†’ {target}: {data_type}") + return "\n".join(lines) + + +def format_base_classes(base_classes: list) -> str: + """Format base classes for context display in prompts. + + Args: + base_classes: List of base class dicts with file_path and code keys + + Returns: + Formatted markdown string with code blocks + """ + if not base_classes: + return "No base classes available." + + lines = [] + for bc in base_classes: + if isinstance(bc, dict): + file_path = bc.get("file_path", "unknown") + code = bc.get("code", "") + lines.append(f"### {file_path}\n```python\n{code}\n```\n") + + return "\n".join(lines) + + +def format_data_structures(data_structures: list) -> str: + """Format data flow data structures for context display in prompts. + + Args: + data_structures: List of data structure dicts with code, subtree, and data_flow_types keys + + Returns: + Formatted markdown string with code blocks + """ + if not data_structures: + return "No data flow data structures available." + + lines = [] + for ds in data_structures: + if isinstance(ds, dict): + subtree = ds.get("subtree", "unknown") + code = ds.get("code", "") + df_types = ds.get("data_flow_types", []) + types_str = ", ".join(df_types) if df_types else "(unspecified)" + file_path = ds.get("file_path", "") + header = f"### Subtree: {subtree}" + if file_path: + header += f" | File: {file_path}" + lines.append(f"{header}\nCovers data flow types: {types_str}\n```python\n{code}\n```\n") + + return "\n".join(lines) + + +def format_base_classes_and_data_structures(base_classes: list, data_structures: list) -> str: + """Format both base classes and data structures for context display in prompts. + + Args: + base_classes: List of base class dicts + data_structures: List of data structure dicts + + Returns: + Formatted markdown string with code blocks for both sections + """ + parts = [] + + bc_str = format_base_classes(base_classes) + if base_classes: + parts.append("## Base Classes\n" + bc_str) + + ds_str = format_data_structures(data_structures) + if data_structures: + parts.append("## Data Flow Data Structures\n" + ds_str) + + if not parts: + return "No base classes or data structures available." + + return "\n\n".join(parts) + + +# ============================================================================ +# Path Normalization Functions +# (Ported from RPG-ZeroRepo/zerorepo/utils/file.py and +# RPG-ZeroRepo/zerorepo/rpg_gen/base/rpg/util.py) +# ============================================================================ + +def normalize_path(path: Union[str, Path]) -> str: + """Normalize a node id into a relative POSIX-style format. + + Form: rel/posix/path[:qualname.with.dots] + + Rules: + - Compatible with Windows/Linux + - Resolve redundant path components like ".." and "." + - Remove leading "./" prefix for consistency + - Treat the part after ":" as a symbol qualified name, + split by '.', and filter empty segments + + Source: RPG-ZeroRepo/zerorepo/utils/file.py (normalize_path) + """ + s = str(path).strip() + if ":" in s: + left, right = s.split(":", 1) + else: + left, right = s, None + + norm = PurePosixPath(str(left).strip()).as_posix() + norm = norm.removeprefix("./").removeprefix("/") + if norm == "" or norm == ".": + base = "." + else: + base = norm + + if right is not None: + segs = [seg.strip() for seg in right.strip().strip(".").split(".") if seg.strip()] + if segs: + return f"{base}:{'.'.join(segs)}" + return base + + +# ============================================================================ +# File Filtering Functions +# (Ported from RPG-ZeroRepo/zerorepo/utils/repo.py) +# ============================================================================ + +def is_test_file(nid: str) -> bool: + """Check whether a node id belongs to a test file. + + Splits the file path portion by ' ', '_', and '/' and checks if any + segment starts with 'test'. + + Source: RPG-ZeroRepo/zerorepo/utils/repo.py (is_test_file) + """ + file_path = nid.split(":")[0] + word_list = re.split(r" |_|/", file_path.lower()) + return any(word.startswith("test") for word in word_list) + + +def merge_intervals(intervals: List[Tuple[int, int]]) -> List[Tuple[int, int]]: + """Merge overlapping inclusive intervals. + + Given a list of (start, end) tuples where both endpoints are inclusive, + merge overlapping or adjacent intervals and return the merged result + sorted by start position. + + Args: + intervals: List of (start, end) tuples, both inclusive. + + Returns: + Merged list of (start, end) tuples. + + Source: RPG-ZeroRepo/zerorepo/utils/repo.py (merge_intervals) + """ + if not intervals: + return [] + + sorted_intervals = sorted(intervals, key=lambda iv: iv[0]) + merged = [sorted_intervals[0]] + + for current in sorted_intervals[1:]: + last = merged[-1] + if current[0] <= last[1]: + merged[-1] = (last[0], max(last[1], current[1])) + else: + merged.append(current) + + return merged + + +def filter_excluded_files(valid_files: List[str], excluded_files: List[str]) -> List[str]: + """Filter out files that match any path in *excluded_files*. + + *excluded_files* may contain files or directories: + - If it is a file: remove on exact match. + - If it is a directory: remove all files under that directory. + + Args: + valid_files: All valid file paths (typically .py files in the repo). + excluded_files: List of file or directory paths to exclude. + + Returns: + The filtered list of valid_files. + + Source: RPG-ZeroRepo/zerorepo/utils/repo.py (filter_excluded_files) + """ + norm_excluded = [normalize_path(p) for p in excluded_files if p.strip()] + filtered = [] + + for vf in valid_files: + norm_vf = normalize_path(vf) + excluded = False + for excl in norm_excluded: + if norm_vf == excl or norm_vf.startswith(excl + "/"): + excluded = True + break + if not excluded: + filtered.append(vf) + + return filtered + + +# ============================================================================ +# LLM Output Parsing Functions +# (Ported from RPG-ZeroRepo/zerorepo/utils/api.py) +# ============================================================================ + +def parse_solution_output(output: str) -> str: + """Extract the content inside ``...`` tags. + + If the tags are not present the full (stripped) output is returned. + + Args: + output: Raw LLM output string. + + Returns: + Extracted solution text, stripped of leading/trailing whitespace. + + Source: RPG-ZeroRepo/zerorepo/utils/api.py (parse_solution_output) + """ + output = output.split("", 1)[-1] + output = output.split("", 1)[0] + return output.strip() + + +def parse_code_blocks(output: str, type: str = "general") -> List[str]: + """Parse markdown fenced code blocks from a string. + + Args: + output: The text containing code blocks. + type: The language type to match. + - ``"general"``: matches any ````` ... ````` block. + - ``"python"``, ``"javascript"``, etc.: matches only that language. + + Returns: + A list of extracted code block contents, each stripped of + leading/trailing whitespace. + + Source: RPG-ZeroRepo/zerorepo/utils/api.py (parse_code_blocks) + """ + if type == "general": + pattern = r"```(?:\n)?(.*?)```" + else: + pattern = rf"```{type}\s+(.*?)```" + + matches = re.findall(pattern, output, re.DOTALL) + return [m.strip() for m in matches] + + +# ============================================================================ +# Code Skeleton Extraction Functions +# (Ported from RPG-ZeroRepo/zerorepo/utils/compress.py) +# ============================================================================ + +def get_skeleton( + raw_code: str, + keep_constant: bool = True, + keep_indent: bool = False, + compress_assign: bool = False, + keep_docstring: bool = False, + keep_imports: bool = False, + total_lines: int = 100, + prefix_lines: int = 50, + suffix_lines: int = 50, + line_number_mode: str = "none", +) -> str: + """Generate a structural skeleton version of Python source code. + + Uses ``libcst`` to strip function bodies (replaced with ``...``) while + keeping class/function signatures, optional constants, docstrings, and + import statements. Very long module-level assignments can be compressed + to keep only their head and tail. + + Args: + raw_code: Python source code to compress. + keep_constant: Keep short module-level constant assignments. + keep_indent: Preserve indentation when omitting function bodies. + compress_assign: Fold very long module-level assignments. + keep_docstring: Keep module/class/function docstrings. + keep_imports: Keep ``import`` / ``from ... import ...`` statements. + total_lines: Threshold (in lines) above which an assignment is folded. + prefix_lines: Number of head lines to keep when folding. + suffix_lines: Number of tail lines to keep when folding. + line_number_mode: ``"none"`` | ``"original"`` | ``"sequential"``. + + Returns: + The skeleton code string. + + Source: RPG-ZeroRepo/zerorepo/utils/compress.py (get_skeleton) + """ + try: + import libcst as cst + import libcst.matchers as m + except ImportError: + logger.warning( + "libcst is not installed; get_skeleton() will return raw code. " + "Install with: pip install libcst" + ) + return raw_code + + # --- internal transformer (inline to avoid top-level libcst import) --- + replacement_string = '"__FUNC_BODY_REPLACEMENT_STRING__"' + + class _CompressTransformer(cst.CSTTransformer): + """Replace function bodies with ``...`` while preserving structure.""" + + def __init__(self): + pass + + def _is_import_stmt(self, stmt: cst.CSTNode) -> bool: + if not m.matches(stmt, m.SimpleStatementLine()): + return False + return any( + m.matches(s, m.Import()) or m.matches(s, m.ImportFrom()) + for s in getattr(stmt, "body", []) + ) + + def leave_Module(self, original_node, updated_node): + new_body = [] + for i, stmt in enumerate(updated_node.body): + if m.matches(stmt, m.ClassDef()) or m.matches(stmt, m.FunctionDef()): + new_body.append(stmt) + elif ( + keep_constant + and m.matches(stmt, m.SimpleStatementLine()) + and m.matches(stmt.body[0], m.Assign()) + ): + new_body.append(stmt) + elif keep_imports and self._is_import_stmt(stmt): + new_body.append(stmt) + elif ( + keep_docstring + and i == 0 + and m.matches(stmt, m.SimpleStatementLine()) + and m.matches(stmt.body[0], m.Expr()) + and m.matches(stmt.body[0].value, m.SimpleString()) + ): + new_body.append(stmt) + return updated_node.with_changes(body=new_body) + + def leave_ClassDef(self, original_node, updated_node): + new_body = [] + for i, stmt in enumerate(updated_node.body.body): + if ( + i == 0 + and keep_docstring + and m.matches(stmt, m.SimpleStatementLine()) + and m.matches(stmt.body[0], m.Expr()) + and m.matches(stmt.body[0].value, m.SimpleString()) + ): + new_body.append(stmt) + elif not ( + m.matches(stmt, m.SimpleStatementLine()) + and m.matches(stmt.body[0], m.Expr()) + and m.matches(stmt.body[0].value, m.SimpleString()) + ): + new_body.append(stmt) + return updated_node.with_changes(body=cst.IndentedBlock(body=new_body)) + + def leave_FunctionDef(self, original_node, updated_node): + docstring_stmt = None + import_stmts: List = [] + + for i, stmt in enumerate(updated_node.body.body): + if ( + i == 0 + and keep_docstring + and m.matches(stmt, m.SimpleStatementLine()) + and m.matches(stmt.body[0], m.Expr()) + and m.matches(stmt.body[0].value, m.SimpleString()) + ): + docstring_stmt = stmt + else: + if keep_imports and self._is_import_stmt(stmt): + import_stmts.append(stmt) + + rep_expr = cst.Expr(value=cst.SimpleString(value=replacement_string)) + rep_stmt = cst.SimpleStatementLine(body=[rep_expr]) + + if keep_indent: + body = [] + if docstring_stmt: + body.append(docstring_stmt) + body.extend(import_stmts) + body.append(rep_stmt) + return updated_node.with_changes(body=cst.IndentedBlock(body=body)) + + new_body = list(import_stmts) + [rep_stmt] + return updated_node.with_changes(body=cst.IndentedBlock(tuple(new_body))) + + # --- internal helpers for assignment compression --- + class _GlobalVariableVisitor(cst.CSTVisitor): + METADATA_DEPENDENCIES = (cst.metadata.PositionProvider,) + + def __init__(self): + self.assigns: list = [] + + def leave_Assign(self, original_node): + start_pos = self.get_metadata(cst.metadata.PositionProvider, original_node).start + end_pos = self.get_metadata(cst.metadata.PositionProvider, original_node).end + self.assigns.append([original_node, start_pos, end_pos]) + + def _remove_lines(raw: str, remove_intervals): + new_code = "" + for i, line in enumerate(raw.splitlines(), start=1): + if not any(s <= i <= e for s, e in remove_intervals): + new_code += line + "\n" + if any(s == i for s, _ in remove_intervals): + new_code += "...\n" + return new_code + + def _compress_assign_stmts(raw: str) -> str: + try: + tree = cst.parse_module(raw) + except Exception: + return raw + wrapper = cst.metadata.MetadataWrapper(tree) + visitor = _GlobalVariableVisitor() + wrapper.visit(visitor) + intervals = [] + for _, start, end in visitor.assigns: + if end.line - start.line > total_lines: + intervals.append((start.line + prefix_lines, end.line - suffix_lines)) + return _remove_lines(raw, intervals) + + def _add_original_line_numbers(raw: str, skel: str) -> str: + import difflib as _difflib + + orig = raw.splitlines() + skel_lines = skel.splitlines() + sm = _difflib.SequenceMatcher(None, orig, skel_lines, autojunk=False) + width = len(str(len(orig))) + out: List[str] = [] + prev_orig_end = 0 + + def _gap(start_idx, end_idx): + if end_idx <= start_idx: + return + left = str(start_idx + 1).rjust(width) + right = str(end_idx).rjust(width) + out.append(f"{left}..{right} | ...") + + for i_orig, j_skel, n in sm.get_matching_blocks(): + _gap(prev_orig_end, i_orig) + for k in range(n): + raw_ln = i_orig + k + 1 + line = skel_lines[j_skel + k] + out.append(f"{str(raw_ln).rjust(width)} | {line}") + prev_orig_end = i_orig + n + + _gap(prev_orig_end, len(orig)) + return "\n".join(out) + + # --- main logic --- + try: + tree = cst.parse_module(raw_code) + except Exception: + code = raw_code + else: + transformer = _CompressTransformer() + modified_tree = tree.visit(transformer) + code = modified_tree.code + + if compress_assign: + code = _compress_assign_stmts(code) + + if keep_indent: + code = code.replace(replacement_string + "\n", "...\n") + code = code.replace(replacement_string, "...\n") + else: + pattern_re = f"\\n[ \\t]*{replacement_string}" + code = re.sub(pattern_re, "\n...", code) + + if line_number_mode == "original": + return _add_original_line_numbers(raw_code, code) + elif line_number_mode == "sequential": + lines = code.splitlines() + width = len(str(len(lines))) + return "\n".join(f"{str(i).rjust(width)} | {ln}" for i, ln in enumerate(lines, 1)) + + return code + + +# ============================================================================ +# Parsed Feature Tree Functions +# (Ported from RPG-ZeroRepo/zerorepo/utils/tree.py) +# ============================================================================ + +def transfer_parsed_tree( + input_tree: Dict, +) -> Tuple[Dict[str, List[str]], Dict[str, List[str]]]: + """Transform a parsed feature tree into summary and reverse-index mappings. + + Returns: + A tuple of: + - format_tree: ``{ file_summary: [features...] }`` + - feature_to_files: ``{ feature: [file_paths...] }`` + + Merges all nested function/class-level descriptions into the file-level + node and automatically deduplicates feature text. + + Source: RPG-ZeroRepo/zerorepo/utils/tree.py (transfer_parsed_tree) + """ + + def _collect_texts(value: Union[str, List, Dict, None]) -> List[str]: + if value is None: + return [] + if isinstance(value, str): + return [value] + if isinstance(value, list): + result: List[str] = [] + for v in value: + result.extend(_collect_texts(v)) + return result + if isinstance(value, dict): + result = [] + for v in value.values(): + result.extend(_collect_texts(v)) + return result + return [str(value)] + + format_tree: Dict[str, List[str]] = {} + feature_to_files: Dict[str, List[str]] = {} + + for file_path, file_tree in input_tree.items(): + file_summary = file_tree.get( + "_file_summary_", + os.path.basename(file_path).replace(".py", ""), + ) + + all_texts: List[str] = [] + for key, value in file_tree.items(): + # Sidecar keys: ``_file_summary_`` is the file's verbal summary + # (already used as the format-tree key above); ``_feature_descriptions_`` + # stores the LLM-generated descriptions and must NOT be collected + # as features (otherwise descriptions leak into functional-area + # planner prompts). + if key in ("_file_summary_", "_feature_descriptions_"): + continue + all_texts.extend(_collect_texts(value)) + + deduped_texts = sorted(set(all_texts)) + format_tree[file_summary] = deduped_texts + + for feature in deduped_texts: + feature_to_files.setdefault(feature, []).append(file_path) + + return format_tree, feature_to_files + + +def format_parsed_tree( + input_tree: Dict, + omit_full_leaf_nodes: bool = False, + max_features: int = 2, +) -> str: + """Format a parsed feature tree into a condensed, human-readable JSON string. + + Reuses :func:`transfer_parsed_tree` to build the base mapping, then + optionally truncates long feature lists for readability. + + Args: + input_tree: The parsed feature tree (``{ file_path: file_tree }``). + omit_full_leaf_nodes: If True, truncate feature lists longer than 2. + max_features: Number of features to sample when truncating. + + Returns: + A compact JSON string. + + Source: RPG-ZeroRepo/zerorepo/utils/tree.py (format_parsed_tree) + """ + fmt_tree, _ = transfer_parsed_tree(input_tree) + + for key, features in fmt_tree.items(): + if omit_full_leaf_nodes and len(features) > 2: + sampled = random.sample(features, min(max_features, len(features))) + fmt_tree[key] = sampled + ["..."] + + return json.dumps(fmt_tree, ensure_ascii=False, separators=(",", ":")) + + +def iterative_by_folder(parsed_tree: Dict) -> Dict[str, List[str]]: + """Group file paths in a parsed tree by their parent folder. + + Args: + parsed_tree: A dict whose keys are file paths. + + Returns: + ``{ folder_path: [file_paths...] }``. Root-level files are + grouped under ``"(root)"``. + + Source: RPG-ZeroRepo/zerorepo/utils/tree.py (iterative_by_folder) + """ + file_paths = list(parsed_tree.keys()) + grouped: Dict[str, List[str]] = {} + + for p in file_paths: + p_norm = p.rstrip("/") + parent_dir = os.path.dirname(p_norm) + folder = parent_dir if parent_dir else "(root)" + grouped.setdefault(folder, []).append(p) + + return grouped + + +# ============================================================================ +# AST Node Range Helpers +# (Ported from RPG-ZeroRepo/zerorepo/rpg_gen/base/rpg/util.py) +# ============================================================================ + +def _indent_of_line(lines: list, lineno: int) -> int: + """Return the indentation width (in columns, tab=8) of a 1-based line.""" + if lineno <= 0 or lineno > len(lines): + return 0 + s = lines[lineno - 1] + return len(s.expandtabs(8)) - len(s.lstrip().expandtabs(8)) + + +def _is_blank_or_comment(lines: list, lineno: int) -> bool: + """Return True if the 1-based line is blank or a comment.""" + if lineno <= 0 or lineno > len(lines): + return True + s = lines[lineno - 1].strip() + return not s or s.startswith("#") + + +def _first_body_lineno(node: ast.AST) -> Optional[int]: + """Return the line number of the first statement in *node*.body, or None.""" + body = getattr(node, "body", None) + if not body: + return None + return getattr(body[0], "lineno", None) + + +def _node_start_with_decorators(node: ast.AST) -> int: + """Return the start line including any decorators.""" + decos = getattr(node, "decorator_list", None) + if decos: + return min(getattr(d, "lineno", node.lineno) for d in decos) or node.lineno + return node.lineno + + +def _node_end_by_walk(node: ast.AST, fallback_start: int) -> int: + """Walk *node* to find the maximum end_lineno / lineno.""" + max_line = fallback_start + for n in ast.walk(node): + ln = getattr(n, "end_lineno", None) or getattr(n, "lineno", None) + if isinstance(ln, int): + max_line = max(max_line, ln) + return max_line + + +def _expand_block_end_strict( + lines: list, end_inclusive: int, base_indent: Optional[int] +) -> int: + """Expand *end_inclusive* downward while lines have >= base_indent.""" + i = end_inclusive + n = len(lines) + if base_indent is None: + return i + j = i + 1 + while j <= n: + if _is_blank_or_comment(lines, j): + break + ind = _indent_of_line(lines, j) + if ind >= base_indent: + i = j + j += 1 + continue + break + return i + + +def get_node_range_robust(node: ast.AST, source: str) -> Tuple[int, int, int, int]: + """Return the line range of an AST node including decorators. + + Returns: + ``(start_inclusive, header_end_inclusive, body_end_inclusive, end_exclusive)`` + + - The start includes decorators. + - The end stops strictly at the last effective statement + (it does not consume trailing whitespace/comments). + + Source: RPG-ZeroRepo/zerorepo/rpg_gen/base/rpg/util.py (get_node_range_robust) + """ + lines = source.splitlines() + start_inclusive = _node_start_with_decorators(node) + header_end_inclusive = getattr(node, "lineno", start_inclusive) + + body_end_inclusive = getattr(node, "end_lineno", None) + if not isinstance(body_end_inclusive, int): + body_end_inclusive = _node_end_by_walk(node, header_end_inclusive) + + first_body_ln = _first_body_lineno(node) + base_indent = ( + _indent_of_line(lines, first_body_ln) if isinstance(first_body_ln, int) else None + ) + + expanded_end_inclusive = _expand_block_end_strict(lines, body_end_inclusive, base_indent) + end_exclusive = expanded_end_inclusive + 1 + return start_inclusive, header_end_inclusive, body_end_inclusive, end_exclusive + + +def extract_source_by_lines( + source: str, start_inclusive: int, end_inclusive: int +) -> str: + """Extract lines [start_inclusive, end_inclusive] (1-based, inclusive) from *source*. + + Preserves original blank lines, comments, indentation, and newlines. + + Source: RPG-ZeroRepo/zerorepo/rpg_gen/base/rpg/util.py (extract_source_by_lines) + """ + if start_inclusive is None or end_inclusive is None: + return "" + lines = source.splitlines(keepends=True) + n = len(lines) + s = max(1, start_inclusive) + e = min(n, end_inclusive) + if s > e: + return "" + return "".join(lines[s - 1 : e]).strip() + + +# ============================================================================ +# Token Counting and Truncation Functions +# (Ported from RPG-ZeroRepo/zerorepo/utils/api.py) +# ============================================================================ + +def calculate_tokens(text: str, model: str = "gpt-4o") -> int: + """Calculate the number of tokens in the text using tiktoken. + + Args: + text: The text to count tokens for. + model: The tiktoken model to use for encoding. + + Returns: + Number of tokens. + + Source: RPG-ZeroRepo/zerorepo/utils/api.py (calculate_tokens) + """ + try: + import tiktoken + except ImportError: + logger.warning( + "tiktoken is not installed; calculate_tokens() will estimate. " + "Install with: pip install tiktoken" + ) + # Rough estimate: 1 token per 4 characters + return len(text) // 4 + + model_to_encoding = { + "gpt-4": "cl100k_base", + "gpt-4o": "cl100k_base", + "gpt-3.5-turbo": "cl100k_base", + "text-davinci-003": "p50k_base", + "code-davinci-002": "p50k_base", + } + + encoding_name = model_to_encoding.get(model, "cl100k_base") + enc = tiktoken.get_encoding(encoding_name) + + # Remove special tokens before encoding + import re as _re + specials = enc.special_tokens_set + pattern = _re.compile("|".join(_re.escape(s) for s in specials)) + cleaned_text = pattern.sub("", text) + + tokens = enc.encode(cleaned_text, disallowed_special=()) + return len(tokens) + + +def truncate_by_token( + text: str, + max_tokens: int = 50000, + model: str = "gpt-4o", +) -> str: + """Truncate text by token count, keeping head and tail. + + If the token count does not exceed *max_tokens*, returns the text as-is. + Otherwise keeps head and tail tokens and removes a middle segment. + + Args: + text: The text to truncate. + max_tokens: Maximum allowed token count. + model: The tiktoken model to use for encoding. + + Returns: + The (possibly truncated) text. + + Source: RPG-ZeroRepo/zerorepo/utils/api.py (truncate_by_token) + """ + try: + import tiktoken + except ImportError: + logger.warning( + "tiktoken is not installed; truncate_by_token() will return raw text. " + "Install with: pip install tiktoken" + ) + return text + + model_to_encoding = { + "gpt-4": "cl100k_base", + "gpt-4o": "cl100k_base", + "gpt-3.5-turbo": "cl100k_base", + "text-davinci-003": "p50k_base", + "code-davinci-002": "p50k_base", + } + + encoding_name = model_to_encoding.get(model, "cl100k_base") + enc = tiktoken.get_encoding(encoding_name) + + tokens = enc.encode(text) + total = len(tokens) + + if total <= max_tokens: + return text + + keep = max_tokens + head_keep = keep // 2 + keep % 2 + tail_keep = keep // 2 + + if keep >= 2: + head_keep = max(1, head_keep) + tail_keep = max(1, tail_keep) + + removed = total - (head_keep + tail_keep) + if removed <= 0: + return text + + head_tokens = tokens[:head_keep] + tail_tokens = tokens[-tail_keep:] if tail_keep > 0 else [] + + head_str = enc.decode(head_tokens) + tail_str = enc.decode(tail_tokens) + + marker = ( + f"\n\n... [output truncated: {removed} tokens omitted in the middle] ...\n\n" + ) + + return head_str + marker + tail_str + + +# ============================================================================ +# Tree Mutation Functions +# (Ported from RPG-ZeroRepo/zerorepo/utils/tree.py for M7 RPG Encoding) +# ============================================================================ + + +def convert_leaves_to_list(tree): + """Recursively convert empty list leaves into empty dicts. + + Source: RPG-ZeroRepo ``zerorepo/utils/tree.py`` (convert_leaves_to_list) + """ + if isinstance(tree, dict): + return {k: convert_leaves_to_list(v) for k, v in tree.items()} + elif isinstance(tree, list): + if not tree: + return {} + return tree + else: + return tree + + +def _collapse_leaf_dicts(node): + """Collapse dicts where all values are empty lists into a list of keys. + + Source: RPG-ZeroRepo ``zerorepo/utils/tree.py`` (_collapse_leaf_dicts) + """ + if isinstance(node, dict): + if not node: + return {} + collapsed = {k: _collapse_leaf_dicts(v) for k, v in node.items()} + if all(isinstance(v, list) and len(v) == 0 for v in collapsed.values()): + return list(collapsed.keys()) + return collapsed + elif isinstance(node, list): + return [_collapse_leaf_dicts(v) for v in node] + else: + return node + + +def _split_path_by_delimiters(path: str, delimiters) -> List[str]: + """Split a path string by one or more delimiters. + + Source: RPG-ZeroRepo ``zerorepo/utils/tree.py`` (split_path) + """ + if isinstance(delimiters, str): + delimiters = [delimiters] + pattern = "|".join(re.escape(d) for d in delimiters) + parts = [p.strip() for p in re.split(pattern, path) if p.strip()] + return parts + + +def _insert_path(tree: dict, path: str, delimiters="/") -> None: + """Insert a path into a tree structure, supporting multiple delimiters. + + Source: RPG-ZeroRepo ``zerorepo/utils/tree.py`` (insert_path) + """ + if isinstance(delimiters, str): + parts = [p.strip() for p in path.split(delimiters) if p.strip()] + else: + parts = _split_path_by_delimiters(path, delimiters) + + parent, key_in_parent = None, None + node = tree + i = 0 + + while i < len(parts): + part = parts[i] + last = (i == len(parts) - 1) + + if isinstance(node, dict): + mk = next((k for k in node if k.lower() == part.lower()), None) + + if last: + if mk is None: + node[part] = [] + break + else: + if mk is None: + node[part] = {} + mk = part + elif isinstance(node[mk], list): + node[mk] = {x: [] for x in node[mk]} + elif not isinstance(node[mk], dict): + node[mk] = {} + parent, key_in_parent = node, mk + node = node[mk] + i += 1 + continue + + elif isinstance(node, list): + if last: + if part.lower() not in (x.lower() for x in node): + node.append(part) + break + else: + upgraded = {x: [] for x in node} + parent[key_in_parent] = upgraded + node = upgraded + continue + else: + upgraded = {} + parent[key_in_parent] = upgraded + node = upgraded + continue + + +def apply_changes( + tree: dict, + changes, + *, + delimiters="/", + inplace: bool = True, + auto_collapse: bool = True, +) -> dict: + """Batch-insert paths into a tree and optionally normalise leaves. + + Source: RPG-ZeroRepo ``zerorepo/utils/tree.py`` (apply_changes) + """ + import copy + + target = tree if inplace else copy.deepcopy(tree) + if isinstance(changes, str): + changes = [changes] + for p in changes: + _insert_path(target, p, delimiters) + if auto_collapse: + collapsed = _collapse_leaf_dicts(target) + if inplace: + tree.clear() + tree.update(collapsed) + return tree + else: + return collapsed + return target + + +def get_rpg_info( + rpg_tree: List[Dict], + omit_leaf_nodes: bool = True, + sample_size: int = 2, + indent: Optional[int] = None, +) -> str: + """Get a summarised string representation of an RPG tree structure. + + Source: RPG-ZeroRepo ``zerorepo/utils/tree.py`` (get_rpg_info) + """ + + def _prune(node): + if isinstance(node, list): + if not omit_leaf_nodes: + return node + if sample_size <= 0: + return {} + if len(node) > sample_size: + return random.sample(node, sample_size) + ["..."] + return node + + if isinstance(node, dict): + if not node: + return {} + + out: Dict[str, Any] = {} + leaf_keys: List[str] = [] + + for k, v in node.items(): + pv = _prune(v) + if isinstance(pv, dict) and not pv: + leaf_keys.append(k) + else: + out[k] = pv + + if not out and leaf_keys: + return leaf_keys + + if leaf_keys: + out["_"] = leaf_keys + + return out + + return node + + rpg_info: Dict[str, Any] = {} + for sub_tree in rpg_tree: + name = sub_tree.get("name") + tree = sub_tree.get("refactored_subtree", {}) + rpg_info[name] = _prune(tree) + + if indent is None: + return json.dumps(rpg_info, ensure_ascii=False, separators=(",", ":")) + return json.dumps(rpg_info, ensure_ascii=False, indent=indent) + + +def exclude_files(files: List[str]) -> List[str]: + """Filter out common non-essential files from a file list. + + Returns a list of paths that should be excluded (test files, docs, etc.). + + Source: RPG-ZeroRepo ``zerorepo/utils/repo.py`` (exclude_files) + """ + excluded: List[str] = [] + exclude_prefixes = ( + "test/", "tests/", "doc/", "docs/", + "example/", "examples/", "demo/", "demos/", + "bench/", "benchmarks/", + ) + exclude_patterns = ("__pycache__", ".egg-info", "node_modules") + + for f in files: + f_lower = f.lower().replace("\\", "/") + if any(f_lower.startswith(p) for p in exclude_prefixes): + excluded.append(f) + elif any(pat in f_lower for pat in exclude_patterns): + excluded.append(f) + elif is_test_file(f): + excluded.append(f) + return excluded + + +# ============================================================================ +# Text Normalization Functions +# (Ported from RPG-ZeroRepo/zerorepo/utils/repo.py) +# ============================================================================ + +def normalize_text(text: str) -> str: + """Normalize text for matching: strip extensions, split camelCase, replace separators with spaces, and lowercase. + + Source: RPG-ZeroRepo/zerorepo/utils/repo.py (normalize_text) + """ + if not text: + return "" + + # Strip file extension + text = re.sub(r"\.[a-zA-Z0-9]+$", "", text) + # Split camelCase + text = re.sub(r"(?<=[a-z])(?=[A-Z])", " ", text) + # Replace path/code separators with spaces + text = re.sub(r"[/_.\-:]+", " ", text) + # Collapse whitespace + text = re.sub(r"\s+", " ", text) + return text.strip().lower() + + +def wrap_code_snippet(code_snippet: str, start_line: int, end_line: int) -> str: + """Wrap a code snippet with line numbers in a fenced code block. + + Args: + code_snippet: The raw source code string. + start_line: The 1-based starting line number. + end_line: The 1-based ending line number. + + Returns: + A Markdown fenced code block with line-numbered content. + + Source: RPG-ZeroRepo/zerorepo/utils/repo.py (wrap_code_snippet) + """ + lines = code_snippet.split("\n") + max_line_number = start_line + len(lines) - 1 + + if not (start_line == end_line == 1): # which is a file + assert max_line_number == end_line + + number_width = len(str(max_line_number)) + return ( + "```\n" + + "\n".join( + f"{str(i + start_line).rjust(number_width)} | {line}" + for i, line in enumerate(lines) + ) + + "\n```" + ) diff --git a/RPG-Kit/scripts/design_base_classes.py b/RPG-Kit/scripts/design_base_classes.py new file mode 100644 index 0000000..37f1d4a --- /dev/null +++ b/RPG-Kit/scripts/design_base_classes.py @@ -0,0 +1,555 @@ +#!/usr/bin/env python3 +"""Design Base Classes Script - Implementation Level Step 3. + +Function: Design shared base classes and data structures for the repository +- Reads skeleton.json and data_flow.json for context +- Designs functional base classes (behavioral abstractions) +- Designs global data structures (shared data formats) +- Validates Python code syntax + +Input: + - .rpgkit/skeleton.json (file structure) + - .rpgkit/data_flow.json (data flow between components) +Output: .rpgkit/base_classes.json (base class definitions with code) +""" + +import json +import logging +import argparse +from pathlib import Path +from typing import Dict, Any, Optional + +# Import trajectory module +from common.trajectory import Trajectory, load_or_create_trajectory + +# Import common utils +from common import ( + get_skeleton_tree_string, + extract_functional_areas_from_skeleton, + format_functional_graph_overview, + print_unicode_table, + get_repo_info_from_files, +) + +# Import the BaseClassAgent +from func_design.base_class_agent import ( + BaseClassAgent, + extract_class_names +) +from rpg import ( + RPG, Node, NodeType, EdgeType, NodeMetaData, strip_uuid8, uuid8, + class_node_path, +) + +# Import centralized paths +from common.paths import ( + SKELETON_FILE as INPUT_SKELETON, + DATA_FLOW_FILE as INPUT_DATA_FLOW, + BASE_CLASSES_FILE as OUTPUT_FILE, + REPO_RPG_FILE +) +from common import get_project_background_context + + +def load_data_flow() -> Dict[str, Any]: + """Load data flow configuration if available.""" + if INPUT_DATA_FLOW.exists(): + try: + with open(INPUT_DATA_FLOW, "r", encoding="utf-8") as f: + return json.load(f) + except Exception: + pass + return {} + + +# ============================================================================ +# RPG Update Function +# ============================================================================ + +def update_rpg_with_base_classes(base_classes_data: Dict[str, Any], rpg_path: Path): + """Update RPG with newly designed base classes. + + Creates File nodes and Class nodes for base classes and mounts them based on scope: + - "global": directly under repo_node (L0) + - "": directly under the specified L1 subtree node + + NOTE: Does NOT create intermediate directory nodes. File nodes are mounted + directly to the scope parent regardless of file path depth. + + Args: + base_classes_data: Result dict containing base_classes list (with scope field) + rpg_path: Path to the repo_rpg.json file + """ + if not rpg_path.exists(): + logging.warning(f"RPG file not found: {rpg_path}") + return + + try: + rpg = RPG.load_json(str(rpg_path)) + except Exception as e: + logging.error(f"Failed to load RPG: {e}") + return + + # Cleanup old data first + rpg.remove_nodes_by_generator("design_base_classes") + + base_classes = base_classes_data.get("base_classes", []) + if not base_classes: + rpg.save_json(str(rpg_path)) # Save to persist cleanup + return + + added_nodes = 0 + added_edges = 0 + skipped_nodes = 0 + skipped_edges = 0 + + # Build L1 subtree name -> node mapping for scope resolution + subtree_nodes = {} + for node in rpg.nodes.values(): + if node.level == 1 and node.id != rpg.repo_node.id: + subtree_nodes[node.name] = node + # Also add lowercase version for case-insensitive matching + subtree_nodes[node.name.lower()] = node + + # Index existing file nodes by normalized path + file_nodes = {} + for node_id, node in rpg.nodes.items(): + if node.meta and node.meta.type_name == NodeType.FILE and node.meta.path: + p = str(Path(node.meta.path)) + file_nodes[p] = node + + for bc_file in base_classes: + file_path = bc_file.get("file_path") + code = bc_file.get("code", "") + scope = bc_file.get("scope") + + if not file_path or not code or not scope: + logging.warning("Skipping base class: missing file_path, code, or scope") + continue + + # Determine parent node based on scope + if scope == "global" or scope.lower() == "global": + scope_parent = rpg.repo_node + logging.info(f"Base class file '{file_path}' scope: global (L0)") + else: + # Find the L1 subtree node by name - must be exact match + scope_parent = subtree_nodes.get(scope) + if not scope_parent: + # Get unique subtree names (avoid duplicates from lowercase mapping) + unique_subtrees = sorted(node.name for node in subtree_nodes.values()) + error_msg = ( + f"ERROR: Scope '{scope}' for base class '{file_path}' does not match any L1 subtree node. " + f"Available L1 subtrees: {unique_subtrees}" + ) + logging.error(error_msg) + raise ValueError(error_msg) + else: + logging.info(f"Base class file '{file_path}' scope: {scope} (L1)") + + norm_file_path = str(Path(file_path)) + + # Build a composite key that includes scope to distinguish files with same path + # but different scopes (e.g., base.py under different subtrees) + scope_file_key = f"{scope}::{norm_file_path}" + file_node = file_nodes.get(scope_file_key) + + # If file node does not exist, create it + if not file_node: + file_name = Path(file_path).name + file_id_prefix = f"file_{file_name.replace('.', '_')}" + + # Check if file node with same signature exists UNDER THIS SCOPE PARENT + existing_file = rpg.find_node_by_signature(file_name, file_id_prefix, scope_parent.id) + if existing_file: + file_node = existing_file + file_nodes[scope_file_key] = file_node + else: + file_id = f"{file_id_prefix}_{uuid8()}" + file_node = Node( + id=file_id, + name=file_name, + node_type="feature_group", # Base class files are feature_group level + level=None, # Will be set by add_edge based on parent + meta=NodeMetaData( + type_name=NodeType.FILE, + path=file_path, + description=f"Base Class Definition File (scope: {scope})", + generator="design_base_classes" + ) + ) + rpg.add_node(file_node) + + # Mount file node directly to scope_parent (no intermediate directories) + rpg.add_edge(scope_parent.id, file_node.id, EdgeType.CONTAINS) + file_nodes[scope_file_key] = file_node + added_nodes += 1 + + # Extract classes from code + class_names = extract_class_names(code) + + for class_name in class_names: + # Check if class node with same signature already exists under this file + class_id_prefix = f"class_{class_name}" + existing_class = rpg.find_node_by_signature(class_name, class_id_prefix, file_node.id) + if existing_class: + skipped_nodes += 1 + logging.info(f"Class node already exists: {class_name}") + continue + + # Create Class Node with canonical RPG path format + class_id = f"{class_id_prefix}_{uuid8()}" + class_path = class_node_path(file_path, class_name) + + class_node = Node( + id=class_id, + name=class_name, + node_type="feature", # Base classes are feature level + level=None, # Will be set by add_edge based on parent + meta=NodeMetaData( + type_name=NodeType.CLASS, + path=class_path, # Precise path: file::class + description=f"Base Class: {class_name} (scope: {scope})", + content=code, + generator="design_base_classes" + ) + ) + rpg.add_node(class_node) + added_nodes += 1 + + # Check if edge with same signature exists + edge_src_prefix = strip_uuid8(file_node.id) + edge_dst_prefix = class_id_prefix + existing_edge = rpg.find_edge_by_signature(edge_src_prefix, edge_dst_prefix, EdgeType.CONTAINS_BASE_CLASS) + if existing_edge: + skipped_edges += 1 + logging.info(f"Edge already exists: {file_node.name} -> {class_name}") + continue + + # Add CONTAINS_BASE_CLASS edge + rpg.add_edge( + src=file_node.id, + dst=class_node.id, + relation=EdgeType.CONTAINS_BASE_CLASS, + meta=NodeMetaData( + description=f"Defines Base Class (scope: {scope})", + generator="design_base_classes" + ) + ) + added_edges += 1 + + if added_nodes > 0 or added_edges > 0: + rpg.save_json(str(rpg_path)) + print(f"[OK] RPG updated: Added {added_nodes} nodes, {added_edges} edges. Skipped {skipped_nodes} nodes, {skipped_edges} edges.") + else: + rpg.save_json(str(rpg_path)) # Save to persist cleanup + print(f"No new base classes added to RPG. Skipped {skipped_nodes} existing nodes.") + + +# ============================================================================ +# Base Class Designer +# ============================================================================ + +class BaseClassDesigner: + """Design base classes using BaseClassAgent.""" + + def __init__( + self, + max_iterations: int = 5, + trajectory: Optional[Trajectory] = None + ): + self.max_iterations = max_iterations + self.trajectory = trajectory + self.logger = logging.getLogger(__name__) + self._current_step_id: Optional[int] = None + + def build( + self, + skeleton: Dict[str, Any], + data_flow: Dict[str, Any] + ) -> Dict[str, Any]: + """Design base classes from skeleton and data flow context. + + Args: + skeleton: The skeleton.json data + data_flow: The data_flow.json data + + Returns: + Dict containing base_classes, class_names, etc. + """ + # Get repository info + repo_name, repo_info = get_repo_info_from_files() + + # Get project background / technology context + project_background = get_project_background_context() + + # Extract functional areas from skeleton + functional_areas = extract_functional_areas_from_skeleton(skeleton) + + # Get hierarchical functional areas overview + functional_areas_overview = format_functional_graph_overview(skeleton) + + # Get skeleton tree for context + skeleton_tree = get_skeleton_tree_string(skeleton, max_depth=3) + + # Get data flow edges + data_flow_edges = data_flow.get("data_flow", []) + + print("\n" + "=" * 70) + print("BASE CLASS DESIGN") + print("=" * 70) + print(f"Repository: {repo_name}") + print(f"Functional Areas: {len(functional_areas)}") + print(f"Data Flow Edges: {len(data_flow_edges)}") + print("=" * 70) + + # Record step start + if self.trajectory: + step = self.trajectory.add_step( + "design_base_classes", + "Design shared base classes and data structures" + ) + self._current_step_id = step.step_id + self.trajectory.start_step(step.step_id) + + # Initialize agent and run + agent = BaseClassAgent( + max_iterations=self.max_iterations, + logger=self.logger, + trajectory=self.trajectory, + step_id=self._current_step_id + ) + + result = agent.design_base_classes( + repo_name=repo_name, + repo_info=repo_info, + data_flow=data_flow_edges, + skeleton_tree=skeleton_tree, + functional_areas=functional_areas, + functional_areas_overview=functional_areas_overview, + project_background=project_background, + ) + + # Update trajectory + if self.trajectory and self._current_step_id: + if result.get("success"): + self.trajectory.complete_step( + self._current_step_id, + {"class_count": len(result.get("base_classes", []))} + ) + else: + self.trajectory.fail_step( + self._current_step_id, + result.get("error", "Unknown error") + ) + + return result + + def print_summary(self, result: Dict[str, Any]) -> None: + """Print summary of base class design.""" + print("\n" + "=" * 60) + print("BASE CLASS DESIGN SUMMARY") + print("=" * 60) + + base_classes = result.get("base_classes", []) + class_names = result.get("class_names", []) + data_structures = result.get("data_structures", []) + ds_class_names = result.get("data_structure_names", []) + + print(f"\nBase Class Files: {len(base_classes)}") + print(f"Total Base Classes: {len(class_names)}") + print(f"Data Structure Entries: {len(data_structures)}") + print(f"Total Data Structures: {len(ds_class_names)}") + + if base_classes: + rows = [] + for bc in base_classes: + file_path = bc.get("file_path", "")[:40] + code = bc.get("code", "") + classes = extract_class_names(code) + class_str = ", ".join(classes[:3]) + if len(classes) > 3: + class_str += f" (+{len(classes) - 3})" + rows.append([file_path, class_str]) + + print_unicode_table( + headers=["File Path", "Classes"], + rows=rows, + title="Base Class Definitions" + ) + + if data_structures: + rows = [] + for ds in data_structures: + subtree = ds.get("subtree", "")[:30] + code = ds.get("code", "") + classes = extract_class_names(code) + class_str = ", ".join(classes[:3]) + if len(classes) > 3: + class_str += f" (+{len(classes) - 3})" + df_types = ", ".join(ds.get("data_flow_types", [])[:3]) + rows.append([subtree, class_str, df_types]) + + print_unicode_table( + headers=["Subtree", "Data Structures", "Covers Data Flow Types"], + rows=rows, + title="Data Flow Data Structure Stubs" + ) + + uncovered = result.get("uncovered_data_flow_types", []) + if uncovered: + print(f"\n[WARNING] Uncovered data flow types: {', '.join(uncovered)}") + + if result.get("note"): + print(f"\nโ„น Note: {result['note']}") + if result.get("error"): + print(f"\n[WARNING] Error: {result['error']}") + + print("=" * 60) + + +# ============================================================================ +# Main Entry Point +# ============================================================================ + +def main(): + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Design shared base classes and data structures" + ) + parser.add_argument( + "--skeleton", "-s", + type=str, + default=str(INPUT_SKELETON), + help=f"Skeleton input file (default: {INPUT_SKELETON})" + ) + parser.add_argument( + "--data-flow", "-d", + type=str, + default=str(INPUT_DATA_FLOW), + help=f"Data flow input file (default: {INPUT_DATA_FLOW})" + ) + parser.add_argument( + "--output", "-o", + type=str, + default=str(OUTPUT_FILE), + help=f"Output file (default: {OUTPUT_FILE})" + ) + parser.add_argument( + "--max-iterations", "-m", + type=int, + default=5, + help="Max iterations for valid design (default: 5)" + ) + parser.add_argument( + "--verbose", "-v", + action="store_true", + help="Enable verbose logging" + ) + parser.add_argument( + "--no-trajectory", + action="store_true", + help="Disable trajectory recording" + ) + + args = parser.parse_args() + + # Setup logging + log_level = logging.DEBUG if args.verbose else logging.INFO + logging.basicConfig( + level=log_level, + format="%(asctime)s - %(levelname)s - %(message)s" + ) + logger = logging.getLogger(__name__) + + # Load skeleton + skeleton_path = Path(args.skeleton) + if not skeleton_path.exists(): + logger.error(f"Skeleton file not found: {skeleton_path}") + print(f"ERROR: Skeleton file not found: {skeleton_path}") + print("Please run /rpgkit.build_skeleton first.") + return 1 + + with open(skeleton_path, "r", encoding="utf-8") as f: + skeleton = json.load(f) + + # Load data flow (optional, but recommended) + data_flow_path = Path(args.data_flow) + data_flow = {} + if data_flow_path.exists(): + try: + with open(data_flow_path, "r", encoding="utf-8") as f: + data_flow = json.load(f) + except Exception as e: + logger.warning(f"Could not load data flow: {e}") + else: + logger.warning(f"Data flow file not found: {data_flow_path}") + print(f"[WARNING] Warning: Data flow file not found: {data_flow_path}") + print(" Run /rpgkit.build_data_flow first for better results.") + + # Initialize trajectory + trajectory = None + if not args.no_trajectory: + trajectory = load_or_create_trajectory("design_base_classes") + + if trajectory.is_resumable(): + print(f"\n[WARNING] Found in-progress execution from {trajectory.started_at}") + print(f" Resume point: {trajectory.resume_point.step_name}") + print(" (Use --no-trajectory to start fresh)") + + trajectory.start(metadata={ + "skeleton_file": str(skeleton_path), + "data_flow_file": str(data_flow_path), + "output_file": str(args.output), + "max_iterations": args.max_iterations + }) + + try: + # Design base classes + designer = BaseClassDesigner( + max_iterations=args.max_iterations, + trajectory=trajectory + ) + + result = designer.build(skeleton, data_flow) + + # Save output + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + + with open(output_path, "w", encoding="utf-8") as f: + json.dump(result, f, indent=2, ensure_ascii=False) + + logger.info(f"[OK] Base classes saved to: {output_path}") + designer.print_summary(result) + print(f"\n[OK] Base classes saved to: {output_path}") + + # Update RPG with base classes + if result.get("success", True): + update_rpg_with_base_classes(result, REPO_RPG_FILE) + + if not result.get("success", True) and "error" in result: + if trajectory: + trajectory.fail(result["error"]) + return 1 + + # Mark trajectory as complete + if trajectory: + trajectory.complete(metadata={ + "base_class_files": len(result.get("base_classes", [])), + "class_names": result.get("class_names", []), + "data_structure_files": len(result.get("data_structures", [])), + "data_structure_names": result.get("data_structure_names", []), + }) + print(f"[OK] Trajectory saved to: {trajectory.trajectory_file}") + + return 0 + + except Exception as e: + logger.error(f"Design failed: {e}") + if trajectory: + trajectory.fail(str(e)) + raise + + +if __name__ == "__main__": + exit(main()) diff --git a/RPG-Kit/scripts/design_interfaces.py b/RPG-Kit/scripts/design_interfaces.py new file mode 100644 index 0000000..5de9201 --- /dev/null +++ b/RPG-Kit/scripts/design_interfaces.py @@ -0,0 +1,1248 @@ +#!/usr/bin/env python3 +"""Design Interfaces Script - Implementation Level Step 4. + +Function: Design function/class interfaces for each file in the repository +- Reads skeleton.json, data_flow.json, and base_classes.json for context +- For each subtree (in data flow order), plans file implementation order +- For each file, designs interfaces with signatures, docstrings, and feature mappings +- Validates Python syntax and docstring presence +- Collects fine-grained dependencies (inheritance, invocation, type references) +- Updates repo_rpg.json with dependency edges + +Input: + - .rpgkit/skeleton.json (file structure with feature assignments) + - .rpgkit/data_flow.json (data flow with subtree order) + - .rpgkit/base_classes.json (base classes for context) +Output: + - .rpgkit/interfaces.json (interfaces organized by subtree and file, with enhanced_data_flow) + - .rpgkit/repo_rpg.json (updated with fine-grained dependency edges) +""" + +import json +import logging +import argparse +from pathlib import Path +from typing import Dict, Any, List, Optional + +# Import trajectory module +from common.trajectory import Trajectory, load_or_create_trajectory +from common.llm_client import LLMClient + +# Import the InterfaceOrchestrator and DependencyCollector +from func_design.interface_agent import InterfaceOrchestrator, DependencyCollector + +# Import Global Interface Reviewer +from func_design.interface_review import ( + InterfaceReviewer, + print_review_summary, +) + +# Import unified InterfacesStore +from func_design.interfaces_store import InterfacesStore + +# Import RPG models for updating repo_rpg.json +from rpg import RPG, Node, NodeType, Edge, EdgeType, NodeMetaData, strip_uuid8 + +# Import centralized paths +from common.paths import ( + SKELETON_FILE as INPUT_SKELETON, + DATA_FLOW_FILE as INPUT_DATA_FLOW, + BASE_CLASSES_FILE as INPUT_BASE_CLASSES, + INTERFACES_FILE as OUTPUT_FILE, + REPO_RPG_FILE, +) +from common import print_unicode_table, get_repo_info_from_files +import ast +from common import get_project_background_context +from func_design.interface_review import review_orphan_units + + +def count_total_files(skeleton: Dict[str, Any]) -> int: + """Count total files in skeleton.""" + count = 0 + + def traverse(node): + nonlocal count + if node.get("type") == "file": + if node.get("feature_paths"): # Only count files with features + count += 1 + elif node.get("type") == "directory": + for child in node.get("children", []): + traverse(child) + + root = skeleton.get("root", skeleton) + traverse(root) + return count + + +def count_total_features(skeleton: Dict[str, Any]) -> int: + """Count total features in skeleton.""" + features = set() + + def traverse(node): + if node.get("type") == "file": + for fp in node.get("feature_paths", []): + features.add(fp) + elif node.get("type") == "directory": + for child in node.get("children", []): + traverse(child) + + root = skeleton.get("root", skeleton) + traverse(root) + return len(features) + + +def extract_known_classes_and_types(base_classes: Dict[str, Any]) -> tuple: + """Extract known base class names and type names from base_classes.json. + + Returns: + Tuple of (known_base_classes: Set[str], known_types: Set[str]) + """ + known_base_classes = set() + known_types = set() + + base_classes_list = base_classes.get("base_classes", []) + + for bc in base_classes_list: + code = bc.get("code", "") + if not code: + continue + + try: + tree = ast.parse(code) + for node in ast.walk(tree): + if isinstance(node, ast.ClassDef): + known_base_classes.add(node.name) + # Classes can also be used as types + known_types.add(node.name) + except SyntaxError: + continue + + # Also add class_names if provided + for name in base_classes.get("class_names", []): + known_base_classes.add(name) + known_types.add(name) + + # Also process data_structures - these are known types (not base classes) + data_structures_list = base_classes.get("data_structures", []) + + for ds in data_structures_list: + code = ds.get("code", "") + if code: + try: + tree = ast.parse(code) + for node in ast.walk(tree): + if isinstance(node, ast.ClassDef): + known_types.add(node.name) + except SyntaxError: + continue + + # Also add data_flow_types names as known types + for dt_name in ds.get("data_flow_types", []): + known_types.add(dt_name) + + # Add data_structure_names if provided + for name in base_classes.get("data_structure_names", []): + known_types.add(name) + + return known_base_classes, known_types + + +# ============================================================================ +# RPG Update Function +# ============================================================================ + +def update_rpg_with_interfaces( + interfaces_data: Dict[str, Any], + base_classes: Dict[str, Any], + rpg_path: Path +): + """.. deprecated:: This standalone function is NOT called at runtime. + + The actual RPG update is done by ``InterfacesStore.update_rpg()`` + (in func_design/interfaces_store.py, called at line ~899). + Use ``rpg.service.RPGService`` for new code. + + Update RPG with interface design results. + + This function does NOT create new nodes. Instead, it: + 1. Updates existing feature nodes' meta.path with implementation location + 2. Adds SAME_UNIT edges when multiple features share the same implementation unit + 3. Adds fine-grained dependency edges (INHERITS, INVOKES, REFERENCES) + + Each feature maps to at most one implementation unit (class/function/method). + + Args: + interfaces_data: Result dict containing subtrees with interfaces + base_classes: Base classes data (for context) + rpg_path: Path to the repo_rpg.json file + """ + if not rpg_path.exists(): + logging.warning(f"RPG file not found: {rpg_path}") + return + + try: + rpg = RPG.load_json(str(rpg_path)) + except Exception as e: + logging.error(f"Failed to load RPG: {e}") + return + + # Remove old edges by generator + rpg.remove_edges_by_generator("design_interfaces") + + updated_features = 0 + added_same_unit_edges = 0 + added_dependency_edges = 0 + skipped = 0 + + # Build feature name -> node mapping for quick lookup + feature_nodes: Dict[str, Node] = {} + for node in rpg.nodes.values(): + # Feature nodes are typically at node_type "feature" or leaf nodes + if node.node_type == "feature" or node.level == rpg.MAX_FEATURE_LEVEL: + feature_nodes[node.name] = node + # Also index by feature_path + feature_path = node.feature_path() + if feature_path: + feature_nodes[feature_path] = node + + # Track unit -> list of feature nodes mapping for SAME_UNIT edges + unit_to_features: Dict[str, List[Node]] = {} + + # Process interfaces from subtrees + subtrees = interfaces_data.get("subtrees", interfaces_data.get("components", {})) + + for subtree_name, subtree_data in subtrees.items(): + # Support both "interfaces" and "files" format + file_interfaces = subtree_data.get("interfaces", subtree_data.get("files", {})) + + for file_path, file_data in file_interfaces.items(): + # Get units_to_features mapping: {unit_name: [feature_paths]} + units_to_features = file_data.get("units_to_features", {}) + + for unit_name, feature_list in units_to_features.items(): + if not isinstance(feature_list, list): + continue + + # Build the implementation path for this unit + # Format: file_path::unit_name (e.g., "src/parser.py::Parser::parse") + impl_path = f"{file_path}::{unit_name}" + + # Track features that share this unit + features_for_unit = [] + + for feature_path in feature_list: + # Find the feature node + feature_node = feature_nodes.get(feature_path) + if not feature_node: + # Try finding by name (last part of path) + feature_name = feature_path.split("/")[-1] if "/" in feature_path else feature_path + feature_node = feature_nodes.get(feature_name) + + if not feature_node: + logging.warning(f"Feature node not found: {feature_path}") + skipped += 1 + continue + + # Update the feature node's meta.path with implementation location + if feature_node.meta is None: + feature_node.meta = NodeMetaData() + + feature_node.meta.path = impl_path + # Note: Do NOT modify generator - it should reflect the step that first created the node + + # Infer type_name from unit_name prefix ("class Foo" or "function bar") + if unit_name.startswith("class "): + feature_node.meta.type_name = NodeType.CLASS + elif unit_name.startswith("function "): + feature_node.meta.type_name = NodeType.FUNCTION + elif "::" in impl_path: + # Fallback: infer from path structure + parts = impl_path.split("::") + if len(parts) >= 3: + feature_node.meta.type_name = NodeType.METHOD + + updated_features += 1 + features_for_unit.append(feature_node) + logging.debug(f"Updated feature '{feature_path}' with path: {impl_path}") + + # Record for SAME_UNIT edge creation + if impl_path not in unit_to_features: + unit_to_features[impl_path] = [] + unit_to_features[impl_path].extend(features_for_unit) + + # Add SAME_UNIT edges for features sharing the same implementation unit + for impl_path, feature_list in unit_to_features.items(): + if len(feature_list) < 2: + continue + + # Create edges between all pairs (only one direction to avoid duplicates) + for i in range(len(feature_list)): + for j in range(i + 1, len(feature_list)): + node_a = feature_list[i] + node_b = feature_list[j] + + # Check if edge already exists + existing = rpg.find_edge_by_signature( + strip_uuid8(node_a.id), + strip_uuid8(node_b.id), + EdgeType.SAME_UNIT + ) + if existing: + continue + + # Also check reverse direction + existing_rev = rpg.find_edge_by_signature( + strip_uuid8(node_b.id), + strip_uuid8(node_a.id), + EdgeType.SAME_UNIT + ) + if existing_rev: + continue + + edge = Edge( + src=node_a.id, + dst=node_b.id, + relation=EdgeType.SAME_UNIT, + meta=NodeMetaData( + description=f"Share implementation: {impl_path}", + generator="design_interfaces" + ) + ) + rpg.edges.append(edge) + added_same_unit_edges += 1 + logging.debug(f"Added SAME_UNIT edge: {node_a.name} <-> {node_b.name}") + + # Process enhanced_data_flow for dependency edges (INHERITS, INVOKES, REFERENCES) + enhanced_data_flow = interfaces_data.get("enhanced_data_flow", {}) + + if enhanced_data_flow: + # Process inheritance edges + for edge_data in enhanced_data_flow.get("inheritance_edges", []): + child = edge_data.get("child", "") + parent = edge_data.get("parent", "") + + if not child or not parent: + continue + + # Find nodes by name + child_node = _find_node_by_name(rpg, child) + parent_node = _find_node_by_name(rpg, parent) + + if child_node and parent_node: + existing = rpg.find_edge_by_signature( + strip_uuid8(child_node.id), + strip_uuid8(parent_node.id), + EdgeType.INHERITS + ) + if not existing: + edge = Edge( + src=child_node.id, + dst=parent_node.id, + relation=EdgeType.INHERITS, + meta=NodeMetaData( + description=f"{child} inherits from {parent}", + generator="design_interfaces" + ) + ) + rpg.edges.append(edge) + added_dependency_edges += 1 + + # Process invocation edges + for edge_data in enhanced_data_flow.get("invocation_edges", []): + caller = edge_data.get("caller", "") + callee = edge_data.get("callee", "") + + if not caller or not callee: + continue + + caller_node = _find_node_by_name(rpg, caller) + callee_node = _find_node_by_name(rpg, callee) + + if caller_node and callee_node: + existing = rpg.find_edge_by_signature( + strip_uuid8(caller_node.id), + strip_uuid8(callee_node.id), + EdgeType.INVOKES + ) + if not existing: + edge = Edge( + src=caller_node.id, + dst=callee_node.id, + relation=EdgeType.INVOKES, + meta=NodeMetaData( + description=f"{caller} invokes {callee}", + generator="design_interfaces" + ) + ) + rpg.edges.append(edge) + added_dependency_edges += 1 + + # Process reference edges + for edge_data in enhanced_data_flow.get("reference_edges", []): + unit = edge_data.get("unit", "") + ref_type = edge_data.get("referenced_type", "") + + if not unit or not ref_type: + continue + + unit_node = _find_node_by_name(rpg, unit) + type_node = _find_node_by_name(rpg, ref_type) + + if unit_node and type_node: + existing = rpg.find_edge_by_signature( + strip_uuid8(unit_node.id), + strip_uuid8(type_node.id), + EdgeType.REFERENCES + ) + if not existing: + edge = Edge( + src=unit_node.id, + dst=type_node.id, + relation=EdgeType.REFERENCES, + meta=NodeMetaData( + description=f"{unit} references type {ref_type}", + generator="design_interfaces" + ) + ) + rpg.edges.append(edge) + added_dependency_edges += 1 + + # Process global review: mark entry points on RPG nodes + global_review = interfaces_data.get("global_review", {}) + entry_points = global_review.get("entry_points", []) + marked_entry_points = 0 + + if entry_points: + for ep in entry_points: + ep_unit = ep.get("unit_name", "") + ep_file = ep.get("file_path", "") + ep_rationale = ep.get("rationale", "") + + if not ep_unit: + continue + + # Find the node by unit name + ep_node = _find_node_by_name(rpg, ep_unit) + + if not ep_node: + # Try matching by file_path::unit_name in meta.path + expected_path = f"{ep_file}::{ep_unit}" if ep_file else "" + if expected_path: + for node in rpg.nodes.values(): + if node.meta and node.meta.path == expected_path: + ep_node = node + break + + if ep_node: + if ep_node.meta is None: + ep_node.meta = NodeMetaData() + # Append entry_point marker to description + ep_marker = f"[ENTRY_POINT] {ep_rationale}".strip() + if ep_node.meta.description: + if "[ENTRY_POINT]" not in ep_node.meta.description: + ep_node.meta.description += f" | {ep_marker}" + else: + ep_node.meta.description = ep_marker + marked_entry_points += 1 + logging.debug(f"Marked entry point: {ep_unit} in {ep_file}") + else: + logging.debug(f"Entry point node not found in RPG: {ep_unit} ({ep_file})") + + # Save RPG + rpg.save_json(str(rpg_path)) + + total_changes = updated_features + added_same_unit_edges + added_dependency_edges + marked_entry_points + if total_changes > 0: + print(f"[OK] RPG updated: {updated_features} features updated, " + f"{added_same_unit_edges} SAME_UNIT edges, " + f"{added_dependency_edges} dependency edges, " + f"{marked_entry_points} entry points marked. Skipped: {skipped}") + else: + print(f"No interface updates applied. Skipped: {skipped}") + + +def _find_node_by_name(rpg: RPG, name: str) -> Optional[Node]: + """Find a node by name (class, function, or feature name). + + .. deprecated:: + Use ``rpg.service.RPGService.find_node_by_unit_name()`` instead. + + Searches by: + 1. Exact node.name match + 2. meta.path match (e.g., "src/file.py::class Foo" matches "class Foo") + 3. Qualified name suffix (e.g., "ClassName.method" -> "method") + """ + # Try exact name match first + for node in rpg.nodes.values(): + if node.name == name: + return node + + # Try matching by meta.path (e.g., "src/file.py::class ClassName") + for node in rpg.nodes.values(): + if node.meta and node.meta.path: + path_str = node.meta.path if isinstance(node.meta.path, str) else "" + # Extract unit part from path like "src/file.py::class Foo" + if "::" in path_str: + unit_part = path_str.split("::", 1)[-1] + # Match "class Foo" == "class Foo" or "function bar" == "function bar" + if unit_part == name: + return node + # Also try matching the bare name (e.g., "Foo" from "class Foo") + if " " in unit_part: + bare_name = unit_part.split(" ", 1)[-1] + if bare_name == name: + return node + + # Try matching by the last part of a qualified name (e.g., "ClassName.method" -> "method") + if "." in name: + short_name = name.rsplit(".", 1)[-1] + for node in rpg.nodes.values(): + if node.name == short_name: + return node + + return None + + +def prune_orphan_features_from_rpg( + surviving_feature_paths: set, + rpg_path: Path, +) -> Dict[str, Any]: + """.. deprecated:: This standalone function is NOT called at runtime. + + The actual pruning is done by ``InterfacesStore._prune_rpg_orphan_features()``. + Use ``rpg.service.RPGService.prune_orphan_features()`` for new code. + + Remove features from repo_rpg.json that have **no surviving interface unit**. + + After interface pruning, ``surviving_feature_paths`` contains the set of + feature paths that still map to at least one interface unit. Any RPG + feature node whose ``feature_path()`` is NOT in this set is removed, + along with any edges referencing the removed nodes. + + Empty parent nodes (feature_group / category / subcategory) whose children + have all been removed are also pruned to keep the tree clean. + + Args: + surviving_feature_paths: Feature path strings that still have at least + one implementing interface unit. + rpg_path: Path to the ``repo_rpg.json`` file. + + Returns: + Summary dict with pruned_node_count, pruned_node_names, pruned_edge_count. + """ + empty = {"pruned_node_count": 0, "pruned_node_names": [], "pruned_edge_count": 0} + + if not rpg_path.exists(): + logging.warning(f"RPG file not found for orphan pruning: {rpg_path}") + return empty + + try: + rpg = RPG.load_json(str(rpg_path)) + except Exception as e: + logging.error(f"Failed to load RPG for orphan pruning: {e}") + return empty + + if not surviving_feature_paths: + logging.info("No surviving features at all โ€” skipping RPG pruning to avoid wiping entire tree") + return empty + + # ---- 1. Identify feature nodes to remove ---- + # A feature node is removed when its feature_path() is NOT in the + # surviving set. We only consider leaf-level feature nodes (the ones + # that correspond to actual implementation units). + nodes_to_remove: Dict[str, Node] = {} + + for node in rpg.nodes.values(): + if node.node_type != "feature" and node.level != rpg.MAX_FEATURE_LEVEL: + continue + fp = node.feature_path() + if fp and fp in surviving_feature_paths: + continue # this feature survives + if node.name in surviving_feature_paths: + continue # fallback match by name + # This feature has no surviving interface unit โ†’ mark for removal + nodes_to_remove[node.id] = node + + if not nodes_to_remove: + logging.info("All RPG feature nodes have surviving interface units โ€” nothing to prune") + return empty + + removed_ids = set(nodes_to_remove.keys()) + pruned_names = [n.name for n in nodes_to_remove.values()] + + # ---- 2. Remove the feature nodes (dict + tree) ---- + for nid in removed_ids: + node = rpg.nodes.pop(nid, None) + if node: + parent = node.parent() + if parent: + parent.remove_child(node) + + # ---- 3. Prune empty parent nodes (bottom-up) ---- + # After removing feature leaves, some parent nodes (feature_group, + # category, subcategory, functional_area) may have zero remaining + # children. Remove those iteratively. + pruned_parents = 0 + parent_types = {"feature_group", "category", "subcategory", "functional_area"} + changed = True + while changed: + changed = False + to_remove_parents = [] + for nid, node in rpg.nodes.items(): + if str(node.node_type) not in parent_types: + continue + if not node.children(): + to_remove_parents.append(nid) + for nid in to_remove_parents: + removed_ids.add(nid) + node = rpg.nodes.pop(nid, None) + if node: + parent = node.parent() + if parent: + parent.remove_child(node) + pruned_parents += 1 + changed = True + + # ---- 4. Remove edges referencing removed nodes ---- + edges_before = len(rpg.edges) + rpg.edges = [ + e for e in rpg.edges + if e.src not in removed_ids and e.dst not in removed_ids + ] + pruned_edge_count = edges_before - len(rpg.edges) + + # ---- 5. Save ---- + rpg.save_json(str(rpg_path)) + + feat_count = len(nodes_to_remove) + logging.info( + f"Pruned {feat_count} orphan feature nodes, {pruned_parents} empty parent nodes, " + f"{pruned_edge_count} edges from RPG" + ) + print( + f"[OK] RPG pruned: removed {feat_count} orphan feature nodes" + + (f", {pruned_parents} empty parent nodes" if pruned_parents else "") + + f", {pruned_edge_count} edges" + + f" (surviving: {len(rpg.nodes)} nodes, {len(rpg.edges)} edges)" + ) + + return { + "pruned_node_count": feat_count + pruned_parents, + "pruned_node_names": pruned_names, + "pruned_edge_count": pruned_edge_count, + } + + +# ============================================================================ +# Interface Designer +# ============================================================================ + +class InterfaceDesigner: + """Design interfaces using InterfaceOrchestrator.""" + + def __init__( + self, + max_file_iterations: int = 10, + max_planning_retries: int = 3, + trajectory: Optional[Trajectory] = None, + output_path: Optional[str] = None + ): + self.max_file_iterations = max_file_iterations + self.max_planning_retries = max_planning_retries + self.trajectory = trajectory + self.output_path = output_path + self.logger = logging.getLogger(__name__) + self._current_step_id: Optional[int] = None + self.llm: Optional[LLMClient] = None # Created lazily when step_id is known + + def build( + self, + skeleton: Dict[str, Any], + data_flow: Dict[str, Any], + base_classes: Dict[str, Any] + ) -> Dict[str, Any]: + """Design interfaces for all files in the skeleton. + + Args: + skeleton: The skeleton.json data + data_flow: The data_flow.json data + base_classes: The base_classes.json data + + Returns: + Dict containing all interfaces organized by subtree + """ + # Get repository info + repo_name, repo_info = get_repo_info_from_files() + + # Enrich repo_info with project background / technology context + # so it flows through to all interface design prompts. + project_background = get_project_background_context() + if project_background and project_background.strip(): + enriched_repo_info = ( + f"{repo_info}\n\n{project_background}\n" + "When the project specifies a concrete technology stack, design interfaces " + "that are idiomatic for those technologies (e.g., Flask route patterns, " + "SQLAlchemy model methods, etc.)." + ) + else: + enriched_repo_info = repo_info + + # Get statistics + total_files = count_total_files(skeleton) + total_features = count_total_features(skeleton) + subtree_order = data_flow.get("subtree_order", []) + + print("\n" + "=" * 70) + print("INTERFACE DESIGN") + print("=" * 70) + print(f"Repository: {repo_name}") + print(f"Total Files: {total_files}") + print(f"Total Features: {total_features}") + print(f"Subtrees: {len(subtree_order)}") + if subtree_order: + print("Processing Order:") + for i, st in enumerate(subtree_order, 1): + print(f" {i}. {st}") + print("=" * 70) + + # Record step start + if self.trajectory: + step = self.trajectory.add_step( + "design_interfaces", + f"Design interfaces for {total_files} files" + ) + self._current_step_id = step.step_id + self.trajectory.start_step(step.step_id) + + # Create LLMClient with trajectory support + self.llm = LLMClient(trajectory=self.trajectory, step_id=self._current_step_id) + + # Get base classes list + base_classes_list = base_classes.get("base_classes", []) + data_structures_list = base_classes.get("data_structures", []) + + # Extract known classes and types for dependency analysis + known_base_classes, known_types = extract_known_classes_and_types(base_classes) + + # Initialize dependency collector + dependency_collector = DependencyCollector( + known_base_classes=known_base_classes, + known_types=known_types + ) + + # Store original data flow edges + dependency_collector.set_original_edges(data_flow.get("data_flow", [])) + + # Initialize orchestrator and run + orchestrator = InterfaceOrchestrator( + max_file_iterations=self.max_file_iterations, + max_planning_retries=self.max_planning_retries, + logger=self.logger, + trajectory=self.trajectory, + step_id=self._current_step_id, + output_path=self.output_path + ) + + result = orchestrator.design_all_interfaces( + skeleton=skeleton, + data_flow=data_flow, + base_classes=base_classes_list, + repo_info=enriched_repo_info, + dependency_collector=dependency_collector, + data_structures=data_structures_list + ) + + # ===================================================================== + # Phase 1.5: Post-process invocation edges (normalise + resolve) + # ===================================================================== + global_registry = result.get("_global_registry") + if global_registry: + dependency_collector.post_process_edges(global_registry) + self.logger.info( + f"Post-processed invocation edges: {dependency_collector.get_summary()}" + ) + + # Add enhanced data flow to result + result["enhanced_data_flow"] = dependency_collector.to_dict() + + # Log dependency summary + dep_summary = dependency_collector.get_summary() + self.logger.info(f"Collected dependencies: {dep_summary}") + + # ===================================================================== + # Phase 2: Global Interface Review (entry points + wiring + auto-fix) + # ===================================================================== + global_registry = result.pop("_global_registry", None) + import_warnings = result.pop("_import_warnings", []) + + if global_registry and result.get("success"): + self.logger.info("Starting global interface review phase...") + print("\n" + "=" * 70) + print("GLOBAL INTERFACE REVIEW") + print("=" * 70) + + reviewer = InterfaceReviewer( + trajectory=self.trajectory, + step_id=self._current_step_id, + ) + + review_result = reviewer.review_and_fix( + interfaces_data=result, + enhanced_data_flow=result["enhanced_data_flow"], + global_registry=global_registry, + import_warnings=import_warnings, + data_flow_edges=data_flow.get("data_flow", []), + dependency_collector=dependency_collector, + max_fix_iterations=2, + ) + + # Update enhanced_data_flow in result (may have been modified by fixes) + result["enhanced_data_flow"] = dependency_collector.to_dict() + + # Store review results in interfaces output + result["global_review"] = { + "entry_points": review_result.get("final_entry_points", []), + "feature_orphans_count": len(review_result.get("final_feature_orphans", [])), + "iterations_run": review_result.get("iterations_run", 0), + "passed": review_result.get("passed", False), + } + + # Store import warnings summary + if import_warnings: + result["import_warnings_count"] = len(import_warnings) + + # Print review summary + print_review_summary(review_result) + + # ================================================================= + # Phase 3: Create InterfacesStore and prune orphans + # ================================================================= + # Create unified store from current result + store = InterfacesStore.from_legacy_format( + interfaces_data=result, + enhanced_data_flow=result["enhanced_data_flow"], + global_review={ + "entry_points": review_result.get("final_entry_points", []), + }, + ) + + # ================================================================= + # Phase 3b: Review and prune orphan units + # ================================================================= + # First, find orphan units + orphan_keys = store.find_orphan_units() + prune_summary = None # Initialize to None + + if orphan_keys: + print(f"\nFound {len(orphan_keys)} orphan interface units (no call edges)") + + # Get details for review + orphan_details = store.get_orphan_unit_details(orphan_keys) + + # Review orphans using LLM (grouped by subtree) + print(" Reviewing orphan units with LLM (by subtree)...") + + orphan_review_result = review_orphan_units( + orphan_details=orphan_details, + repo_info=repo_info, + subtree_interfaces=result.get("subtrees", {}), + llm_client=self.llm, + ) + + # Apply completed edges first (before pruning, so retained units get connected) + if orphan_review_result.completed_edges: + all_edges = orphan_review_result.get_all_edges() + edges_added = store.add_edges(all_edges) + if edges_added: + print(f" [OK] Added {edges_added} missing edges (design completion)") + + # Report retained units + if orphan_review_result.keys_to_retain: + print(f" [OK] Retaining {len(orphan_review_result.keys_to_retain)} units (deemed necessary)") + for key in orphan_review_result.keys_to_retain[:5]: + print(f" - {key}") + if len(orphan_review_result.keys_to_retain) > 5: + print(f" ... and {len(orphan_review_result.keys_to_retain) - 5} more") + + # Prune only the units that LLM confirmed should be pruned + if orphan_review_result.keys_to_prune: + prune_summary = store.prune_units(orphan_review_result.keys_to_prune) + + pruned_count = len(prune_summary.pruned_units) + pruned_file_count = len(prune_summary.pruned_files) + orphan_feat_count = len(prune_summary.orphan_features) + print( + f"\n[OK] Pruned {pruned_count} orphan interface units" + + (f", {pruned_file_count} empty files removed" if pruned_file_count else "") + + (f", {orphan_feat_count} features orphaned" if orphan_feat_count else "") + ) + + # Record pruning info in global_review + result["global_review"]["pruned_units_count"] = pruned_count + result["global_review"]["pruned_files_count"] = pruned_file_count + result["global_review"]["orphan_features"] = prune_summary.get_orphan_features_list() + result["global_review"]["retained_orphans_count"] = len(orphan_review_result.keys_to_retain) + result["global_review"]["pruned_units"] = [ + f"{unit.file_path}::{unit.name}" for unit in prune_summary.pruned_units + ] + else: + print("\n[OK] All orphan units retained after review") + result["global_review"]["retained_orphans_count"] = len(orphan_review_result.keys_to_retain) + + # ================================================================= + # Phase 4: Update result from store and update RPG + # ================================================================= + # Update result with store's current state (reflects pruning) + store_export = store.to_interfaces_json() + result["subtrees"] = store_export["subtrees"] + result["enhanced_data_flow"] = store_export["enhanced_data_flow"] + result["implemented_subtrees"] = store_export["implemented_subtrees"] + + # Store surviving feature paths for potential later use + if prune_summary: + result["_surviving_feature_paths"] = prune_summary.surviving_feature_paths + + # Update RPG using the store + rpg_summary = store.update_rpg(REPO_RPG_FILE) + + # Record RPG pruning in global_review + if rpg_summary.pruned_feature_nodes > 0: + result["global_review"]["rpg_pruned_nodes"] = ( + rpg_summary.pruned_feature_nodes + rpg_summary.pruned_parent_nodes + ) + + # Update dependency summary + dep_summary = store.get_stats() + self.logger.info(f"Final store stats: {dep_summary}") + else: + if not global_registry: + self.logger.info("GlobalInterfaceRegistry not available, skipping global review") + + # Update trajectory + if self.trajectory and self._current_step_id: + if result.get("success"): + # Count successful files + success_count = 0 + total_count = 0 + for subtree_data in result.get("subtrees", {}).values(): + # Support both "interfaces" (reference format) and "files" (old format) + file_container = subtree_data.get("interfaces", subtree_data.get("files", {})) + for file_data in file_container.values(): + total_count += 1 + # Check if units exist (success indicator) + if file_data.get("units"): + success_count += 1 + + self.trajectory.complete_step( + self._current_step_id, + {"success_files": success_count, "total_files": total_count} + ) + else: + self.trajectory.fail_step( + self._current_step_id, + result.get("error", "Unknown error") + ) + + return result + + def print_summary(self, result: Dict[str, Any]) -> None: + """Print summary of interface design.""" + print("\n" + "=" * 60) + print("INTERFACE DESIGN SUMMARY") + print("=" * 60) + + subtrees = result.get("subtrees", {}) + subtree_order = result.get("subtree_order", []) + + print(f"\nSubtrees Processed: {len(subtrees)}") + + # Summary per subtree + total_files = 0 + total_success = 0 + total_interfaces = 0 + + rows = [] + for subtree_name in subtree_order: + subtree_data = subtrees.get(subtree_name, {}) + # Support both "interfaces" (reference format) and "files" (old format) + file_container = subtree_data.get("interfaces", subtree_data.get("files", {})) + + file_count = len(file_container) + success_count = sum(1 for f in file_container.values() if f.get("units")) + interface_count = sum(len(f.get("units", [])) for f in file_container.values()) + + total_files += file_count + total_success += success_count + total_interfaces += interface_count + + status = "[OK]" if success_count == file_count else f"[WARNING] {success_count}/{file_count}" + rows.append([subtree_name[:25], file_count, interface_count, status]) + + if rows: + print_unicode_table( + headers=["Subtree", "Files", "Interfaces", "Status"], + rows=rows, + title="Per-Subtree Summary" + ) + + print(f"\nTotal Files: {total_files}") + print(f"Successful: {total_success}") + print(f"Total Interfaces: {total_interfaces}") + + if total_files > 0: + success_rate = (total_success / total_files) * 100 + print(f"Success Rate: {success_rate:.1f}%") + + # List any failures + failed_files = [] + for subtree_name, subtree_data in subtrees.items(): + # Support both "interfaces" (reference format) and "files" (old format) + file_container = subtree_data.get("interfaces", subtree_data.get("files", {})) + for file_path, file_data in file_container.items(): + if not file_data.get("units"): + failed_files.append(file_path) + + if failed_files: + print(f"\n[WARNING] Failed Files ({len(failed_files)}):") + for f in failed_files[:10]: + print(f" - {f}") + if len(failed_files) > 10: + print(f" ... and {len(failed_files) - 10} more") + + # Print dependency summary + enhanced_data_flow = result.get("enhanced_data_flow", {}) + if enhanced_data_flow: + inheritance_count = len(enhanced_data_flow.get("inheritance_edges", [])) + invocation_count = len(enhanced_data_flow.get("invocation_edges", [])) + reference_count = len(enhanced_data_flow.get("reference_edges", [])) + + if inheritance_count or invocation_count or reference_count: + print("\nCollected Dependencies:") + print(f" - Inheritance edges: {inheritance_count}") + print(f" - Invocation edges: {invocation_count}") + print(f" - Reference edges: {reference_count}") + + # Show cross-file vs same-file breakdown + cross_file = sum( + 1 for e in enhanced_data_flow.get("invocation_edges", []) + if e.get("caller_file") != e.get("callee_file") and e.get("callee_file") + ) + no_callee = sum( + 1 for e in enhanced_data_flow.get("invocation_edges", []) + if not e.get("callee_file") + ) + print(f" - Cross-file invocations: {cross_file}") + if no_callee: + print(f" - Unresolved callee_file: {no_callee}") + + # Print global review summary + global_review = result.get("global_review", {}) + if global_review: + print("\nGlobal Review:") + print(f" - Passed: {'[OK]' if global_review.get('passed') else '[FAIL]'}") + print(f" - Entry points: {len(global_review.get('entry_points', []))}") + orphans = global_review.get("feature_orphans_count", 0) + if orphans: + print(f" - Orphan features: {orphans}") + if result.get("import_warnings_count"): + print(f" - Import cross-validation warnings: {result['import_warnings_count']}") + + # Pruning info + pruned_units_count = global_review.get("pruned_units_count", 0) + pruned_files_count = global_review.get("pruned_files_count", 0) + if pruned_units_count: + print(f" - Pruned orphan units: {pruned_units_count}") + if pruned_files_count: + print(f" - Pruned empty files: {pruned_files_count}") + orphan_features = global_review.get("orphan_features", []) + if orphan_features: + print(f" - Orphan features (pruned from RPG): {len(orphan_features)}") + for of in orphan_features[:5]: + print(f" - {of['feature_path']} ({of['unit_key']})") + if len(orphan_features) > 5: + print(f" ... and {len(orphan_features) - 5} more") + rpg_pruned = global_review.get("rpg_pruned_nodes", 0) + if rpg_pruned: + print(f" - RPG nodes pruned: {rpg_pruned}") + + print("=" * 60) + + +# ============================================================================ +# Main Entry Point +# ============================================================================ + +def main(): + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Design function/class interfaces for repository files" + ) + parser.add_argument( + "--skeleton", "-s", + type=str, + default=str(INPUT_SKELETON), + help=f"Skeleton input file (default: {INPUT_SKELETON})" + ) + parser.add_argument( + "--data-flow", "-d", + type=str, + default=str(INPUT_DATA_FLOW), + help=f"Data flow input file (default: {INPUT_DATA_FLOW})" + ) + parser.add_argument( + "--base-classes", "-b", + type=str, + default=str(INPUT_BASE_CLASSES), + help=f"Base classes input file (default: {INPUT_BASE_CLASSES})" + ) + parser.add_argument( + "--output", "-o", + type=str, + default=str(OUTPUT_FILE), + help=f"Output file (default: {OUTPUT_FILE})" + ) + parser.add_argument( + "--max-file-iterations", "-m", + type=int, + default=10, + help="Max iterations per file (default: 10)" + ) + parser.add_argument( + "--verbose", "-v", + action="store_true", + help="Enable verbose logging" + ) + parser.add_argument( + "--no-trajectory", + action="store_true", + help="Disable trajectory recording" + ) + + args = parser.parse_args() + + # Setup logging + log_level = logging.DEBUG if args.verbose else logging.INFO + logging.basicConfig( + level=log_level, + format="%(asctime)s - %(levelname)s - %(message)s" + ) + logger = logging.getLogger(__name__) + + # Load skeleton + skeleton_path = Path(args.skeleton) + if not skeleton_path.exists(): + logger.error(f"Skeleton file not found: {skeleton_path}") + print(f"ERROR: Skeleton file not found: {skeleton_path}") + print("Please run /rpgkit.build_skeleton first.") + return 1 + + with open(skeleton_path, "r", encoding="utf-8") as f: + skeleton = json.load(f) + + # Load data flow + data_flow_path = Path(args.data_flow) + data_flow = {} + if data_flow_path.exists(): + try: + with open(data_flow_path, "r", encoding="utf-8") as f: + data_flow = json.load(f) + except Exception as e: + logger.warning(f"Could not load data flow: {e}") + else: + logger.warning(f"Data flow file not found: {data_flow_path}") + print(f"[WARNING] Warning: Data flow file not found: {data_flow_path}") + print(" Run /rpgkit.build_data_flow first for better results.") + + # Load base classes + base_classes_path = Path(args.base_classes) + base_classes = {} + if base_classes_path.exists(): + try: + with open(base_classes_path, "r", encoding="utf-8") as f: + base_classes = json.load(f) + except Exception as e: + logger.warning(f"Could not load base classes: {e}") + else: + logger.warning(f"Base classes file not found: {base_classes_path}") + print(f"[WARNING] Warning: Base classes file not found: {base_classes_path}") + print(" Run /rpgkit.design_base_classes first for better results.") + + # Initialize trajectory + trajectory = None + if not args.no_trajectory: + trajectory = load_or_create_trajectory("design_interfaces") + + if trajectory.is_resumable(): + print(f"\n[WARNING] Found in-progress execution from {trajectory.started_at}") + print(f" Resume point: {trajectory.resume_point.step_name}") + print(" (Use --no-trajectory to start fresh)") + + trajectory.start(metadata={ + "skeleton_file": str(skeleton_path), + "data_flow_file": str(data_flow_path), + "base_classes_file": str(base_classes_path), + "output_file": str(args.output), + "max_file_iterations": args.max_file_iterations + }) + + try: + # Design interfaces + designer = InterfaceDesigner( + max_file_iterations=args.max_file_iterations, + trajectory=trajectory, + output_path=str(args.output) + ) + + result = designer.build(skeleton, data_flow, base_classes) + + # Extract internal keys before JSON serialisation + result.pop("_surviving_feature_paths", None) + + # Save output (interfaces.json) + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + + with open(output_path, "w", encoding="utf-8") as f: + json.dump(result, f, indent=2, ensure_ascii=False) + + logger.info(f"[OK] Interfaces saved to: {output_path}") + designer.print_summary(result) + print(f"\n[OK] Interfaces saved to: {output_path}") + + # RPG update is now handled inside InterfaceDesigner.build() via InterfacesStore + + if not result.get("success", True) and "error" in result: + if trajectory: + trajectory.fail(result["error"]) + return 1 + + # Mark trajectory as complete + if trajectory: + subtrees = result.get("subtrees", {}) + total_files = sum( + len(st.get("files", {})) + for st in subtrees.values() + ) + # Include dependency summary in metadata + enhanced_data_flow = result.get("enhanced_data_flow", {}) + trajectory.complete(metadata={ + "subtrees": len(subtrees), + "total_files": total_files, + "inheritance_edges": len(enhanced_data_flow.get("inheritance_edges", [])), + "invocation_edges": len(enhanced_data_flow.get("invocation_edges", [])), + "reference_edges": len(enhanced_data_flow.get("reference_edges", [])) + }) + print(f"[OK] Trajectory saved to: {trajectory.trajectory_file}") + + return 0 + + except Exception as e: + logger.error(f"Design failed: {e}") + if trajectory: + trajectory.fail(str(e)) + raise + + +if __name__ == "__main__": + exit(main()) diff --git a/RPG-Kit/scripts/feature/__init__.py b/RPG-Kit/scripts/feature/__init__.py new file mode 100644 index 0000000..462e4c4 --- /dev/null +++ b/RPG-Kit/scripts/feature/__init__.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 +"""Feature Module for RPG-Kit. + +This module provides prompt templates for feature tree operations: +- Feature build (expansion and review) +- Feature edit (planning and review) +- Feature refactor (planning and organization) +""" + +from .prompts import ( + # Feature Build Prompts + PROMPT_TEMPLATE_BUILD_REVIEW, + PROMPT_TEMPLATE_BUILD_FEATURE, + PROMPT_TEMPLATE_BUILD_DIRECTED, + PROMPT_TEMPLATE_SUGGEST_DIRECTIONS, + # Feature Edit Prompts + PROMPT_TEMPLATE_EDIT_PLAN, + PROMPT_TEMPLATE_EDIT_REVIEW, + # Feature Refactor Prompts + PROMPT_TEMPLATE_SUBTREE_PLANNING, + PROMPT_TEMPLATE_FEATURE_ORGANIZATION, +) + +__all__ = [ + # Feature Build + "PROMPT_TEMPLATE_BUILD_REVIEW", + "PROMPT_TEMPLATE_BUILD_FEATURE", + "PROMPT_TEMPLATE_BUILD_DIRECTED", + "PROMPT_TEMPLATE_SUGGEST_DIRECTIONS", + # Feature Edit + "PROMPT_TEMPLATE_EDIT_PLAN", + "PROMPT_TEMPLATE_EDIT_REVIEW", + # Feature Refactor + "PROMPT_TEMPLATE_SUBTREE_PLANNING", + "PROMPT_TEMPLATE_FEATURE_ORGANIZATION", +] diff --git a/RPG-Kit/scripts/feature/prompts.py b/RPG-Kit/scripts/feature/prompts.py new file mode 100644 index 0000000..22a573d --- /dev/null +++ b/RPG-Kit/scripts/feature/prompts.py @@ -0,0 +1,1212 @@ +#!/usr/bin/env python3 +"""Prompt Templates for Feature Tree Operations. + +Contains all prompt templates used in feature tree workflows: +- Feature build prompts (expansion and review) +- Feature edit prompts (planning and review) +- Feature refactor prompts (subtree planning and organization) +""" + +# ============================================================================ +# Feature Build Prompts +# ============================================================================ + +PROMPT_TEMPLATE_BUILD_REVIEW = r""" +## Instruction +You are a Feature Coverage Review Assistant. Your task is to analyze whether ALL described capabilities from the repository_specification have been properly converted into feature paths in the feature tree. + +Review coverage across ALL specification sections: +- **functional_requirements**: Every leaf-level requirement must have a corresponding feature path +- **background_and_overview**: Page structures, routes, data models, and technology integrations described here are implementable features +- **non_functional_requirements**: Security measures, performance constraints, etc. should have concrete feature paths + +Perform a **semantic matching** analysis - do not require exact text matches, but verify that the **intent and functionality** of each described item is represented in the feature tree. + +Your primary responsibility is to ensure completeness โ€” the feature tree should cover WHAT the system does across all spec sections. + +## Review Process +1. Extract distinct capabilities from ALL sections of the repository_specification (functional_requirements, background_and_overview, non_functional_requirements) +2. For each capability, semantically match it against existing feature paths +3. Identify any requirements that are NOT adequately covered +4. For uncovered requirements, generate new feature paths that would cover them +5. Calculate: coverage_percentage = (covered_requirements / total_requirements) * 100 +6. **Validate leaf nodes against the Minimum Implementable Unit (MIU) principle** + +## IMPORTANT: Coverage Consistency Rule +- If this is a follow-up review (previous_review_result is provided), you MUST maintain consistency: + - Any requirement/capability that was previously marked as COVERED should remain covered (unless the feature tree was modified to remove relevant paths) + - Coverage percentage should NOT decrease unless paths were removed + - Only focus on the previously identified missing functionalities + - The new coverage should be: previous_coverage + (newly_covered / total_requirements * 100) + +## MINIMUM IMPLEMENTABLE UNIT (MIU) PRINCIPLE +**This validation applies ONLY to LEAF NODES (the final segment of each path), NOT to intermediate nodes or top-level categories.** + +Each leaf node MUST be a **Minimum Implementable Unit** โ€” independently implementable, testable, and deliverable. + +**MIU Criteria:** +1. **Single Action**: One verb + one object; no "and"/"or" +2. **Testable**: Clear input โ†’ output or observable state change +3. **Atomic**: One function/method scope; assignable as one dev task +4. **Behavior-focused**: Describes WHAT the system does, not internal execution steps + +**[WARNING] CONSERVATIVE DELETION PRINCIPLE (VERY IMPORTANT):** +- **ONLY flag nodes that are CLEARLY and OBVIOUSLY invalid** - no borderline cases +- **When in doubt, KEEP the node** - do not delete +- **Domain-specific terms are usually VALID** - do not flag them as implementation details +- **Prefer suggesting improvements over deletion** - if a node is marginally acceptable, keep it + +**CATEGORY-SPECIFIC GUIDANCE (BE LENIENT):** +- Mathematical/statistical operations ARE VALID (e.g., "calculate percentage", "compute average", "aggregate counts", "sum values") +- Algorithm-related operations ARE VALID (e.g., "sort by priority", "topological ordering", "detect cycles", "traverse graph") +- Data structure operations ARE VALID (e.g., "index lookup", "cache retrieval", "queue insertion", "hash mapping") +- Transformation operations ARE VALID (e.g., "parse markers", "normalize paths", "extract values", "filter items") +- Domain-specific operations ARE VALID - respect the repository's domain terminology + +**Only flag as INVALID if ALL of these are true:** +1. The node describes a pure internal loop/recursion step (e.g., "iterate items", "recurse children") +2. The node has NO observable outcome or testable result +3. The node is NOT a domain-specific operation +4. You are 100% confident it should be removed + +**IMPORTANT DISTINCTIONS:** +- Top-level categories are organizational and should NOT be flagged as MIU violations +- Intermediate path segments are organizational and should NOT be validated against MIU +- Only the FINAL segment (leaf node) of each path is subject to MIU validation + + +When reviewing, if any leaf nodes violate MIU: +- **Only add to `invalid_leaf_nodes` if you are 100% certain** it is invalid +- Provide `suggested_replacements` that are valid MIUs +- **If unsure, do NOT add to invalid_leaf_nodes** - leave it alone + +## FEATURE PATH NAMING STANDARD (for suggested paths) +- TOP-LEVEL CATEGORIES: Use categories that best fit the repository's domain (e.g., workflow, functionality, api, service, module, component, feature, etc.) +- CHARACTERS: lowercase letters aโ€“z, digits 0โ€“9; single spaces allowed inside segments +- SEPARATOR: forward slash "/" ONLY +- PATH DEPTH: 4โ€“7 segments +- VALIDATION: First segment is the top-level category, followed by 3-6 hierarchical segments + +## DUPLICATE LEAF NODE RESOLUTION +If duplicate leaf names are detected in the current feature tree, you MUST resolve them by renaming. + +**Rules:** +- For each set of duplicate leaf names, keep ONE path unchanged +- Rename the other duplicate(s) to make leaf names unique across the entire tree +- Use more specific or contextual names that reflect the path's location + +**Output format for renames:** +``` +"duplicate_leaf_renames": [ + "old/full/path/duplicate_name -> more_specific_name", + "another/path/duplicate_name -> contextual_name" +] +``` + +## Output Format +Your response MUST contain exactly one block and exactly one block. +**IMPORTANT: You MUST ALWAYS output the block, even if there are no changes to suggest.** + + +1. List all requirements/capabilities found in the repository_specification +2. For each, explain which feature paths cover it (or note if uncovered) +3. Validate leaf nodes against MIU and WHAT-only rules +4. Check for duplicate leaf names and plan renames if needed +5. Provide your coverage calculation methodology + + + +{{ + "coverage_percentage": 85.5, + "has_gaps": true, + "missing_functionalities": [ + "Filter Books by Publication Date - date range filtering not covered" + ], + "suggested_paths": [ + {{"path": "workflow/date filtering/ui controls/range selection", "description": "Allow users to select a date range for filtering books"}} + ], + "invalid_leaf_nodes": [ + "workflow/user/manage account - too broad" + ], + "suggested_replacements": [ + {{"path": "workflow/user/account/update email address", "description": "Allow users to change their email address"}} + ], + "duplicate_leaf_renames": [ + "path/to/duplicate -> unique_name" + ] +}} + + +**When everything is perfect (100% coverage, no MIU violations, no duplicates), output:** + +{{ + "coverage_percentage": 100.0, + "has_gaps": false, + "missing_functionalities": [], + "suggested_paths": [], + "invalid_leaf_nodes": [], + "suggested_replacements": [], + "duplicate_leaf_renames": [] +}} + + +## Inputs + +### Previous Review Result (if any): +{previous_review} + +### Duplicate Leaf Names Detected: +{duplicate_leaves_info} + +### Current Feature Tree: +{current_tree} + +### Repository Information: +{repo_info} + +""" + +PROMPT_TEMPLATE_BUILD_FEATURE = r""" +## Instruction +You are a GitHub Repository Feature Tree Builder. Your task is to convert the repository specification into a comprehensive, structured feature tree. Extract features from **all sections** of the specification: + +- **functional_requirements**: The primary source โ€” convert every described capability into feature paths +- **background_and_overview**: Extract implementable features from architecture descriptions, page structures, data models, technology integrations, and routing definitions +- **non_functional_requirements**: Convert security measures, performance constraints, and other cross-cutting concerns into concrete feature paths + +**GUIDING PRINCIPLE: Comprehensive Specification Coverage** +- Cover ALL sections of the specification, not just functional_requirements +- If the spec describes pages, routes, or UI structure (even in background sections), create features for them +- If the spec defines data models, create features for the model layer +- If the spec lists security measures, create features for each measure +- Each feature should trace back to the specification where possible +- When the spec implies but does not fully detail a capability (e.g., mentions a page but not its sub-features), you may reasonably expand it into concrete implementable features +- Use domain knowledge to fill in standard supporting features that the spec assumes but does not enumerate (e.g., data models, form handling, error responses) + +## LEAF NODE GRANULARITY (MIU Principle) +Each leaf node (final path segment) MUST be a **Minimum Implementable Unit** - independently implementable, testable, and deliverable. + +**MIU Criteria:** +1. **Single Action**: One verb + one object; no "and"/"or" +2. **Testable**: Clear input โ†’ output or observable state change +3. **Atomic**: One function/method scope; assignable as one dev task +4. **Behavior-focused**: Describes WHAT the system does, not pure control flow + +**[WARNING] AVOID THESE PATTERNS (will be flagged in review):** +- Pure loop/iteration steps: "iterate items", "loop through elements", "traverse nodes" +- Pure recursion steps: "recurse children", "recurse subtree" +- Internal state only: "set flag", "increment counter", "mark visited" +- No observable outcome: operations with no return value or side effect + +**[OK] VALID leaf node patterns:** +- Returns a value: "calculate X", "compute Y", "get Z" +- Produces output: "generate X", "create Y", "format Z" +- Performs action with result: "detect X", "validate Y", "resolve Z" +- Transforms data: "parse X", "convert Y", "normalize Z" +- Stores/retrieves: "cache X", "lookup Y", "index Z" + +**[OK] VALID Examples by Category:** + +`computation operation` (mathematical/statistical operations): +- `calculate coverage percentage` โ†’ returns a number +- `compute execution duration` โ†’ returns time value +- `aggregate test counts` โ†’ returns summary statistics +- `measure memory usage` โ†’ returns memory metrics + +`algorithm` (algorithm-related operations): +- `sort tests by priority` โ†’ returns ordered list +- `detect circular dependencies` โ†’ returns cycle info +- `resolve fixture ordering` โ†’ returns ordered fixtures +- `match keyword expression` โ†’ returns matching items + +`data structures` (data structure operations): +- `index test by nodeid` โ†’ enables lookup +- `cache fixture result` โ†’ stores for reuse +- `enqueue test item` โ†’ adds to queue +- `lookup parent node` โ†’ retrieves parent + +`data processing` (transformation operations): +- `parse marker expression` โ†’ extracts marker info +- `normalize file path` โ†’ standardizes path format +- `extract parameter values` โ†’ retrieves param data +- `format error message` โ†’ produces readable output + +**[FAIL] INVALID Examples (will be deleted in review):** +- `iterate test items` โ†’ pure loop step, no outcome +- `recurse into children` โ†’ pure recursion step +- `visit graph node` โ†’ traversal step only +- `increment failure count` โ†’ internal state only + +## LEAF NODE UNIQUENESS REQUIREMENT +**Do NOT generate paths whose leaf node name already exists in the Current Feature Tree.** +If a leaf name is taken, use a more specific or different name. + +## FEATURE PATH NAMING AND SELECTION STANDARD +- TOP-LEVEL CATEGORIES: Use categories that best fit the repository's domain and existing tree structure + - Analyze the current feature tree and repository context to determine appropriate categories + - Common examples: workflow, functionality, api, service, module, component, feature, core, util, etc. +- CHARACTERS: lowercase letters aโ€“z, digits 0โ€“9; single spaces allowed inside segments +- PATH AS NODE SEQUENCE: + - Each feature path MUST be a sequence of nodes, with each node as one segment. + - Segments MUST be separated **only** by "/" (forward slash). + - No other separators are allowed. +- SEPARATOR: forward slash "/" ONLY + - No leading or trailing "/" + - No double slashes + - No spaces around "/" +- DISALLOWED: underscores, hyphens, commas, parentheses, dots, emojis, special symbols +- SEGMENT LENGTH: 1โ€“5 words (prefer 2โ€“4) +- PATH DEPTH: 4โ€“7 segments (minimum: category / subsystem / feature group / specific feature) +- GROUPING: cluster related leaves under shared parents; avoid scattering similar concepts +- NORMALIZATION (apply before validation): + 1) trim leading/trailing spaces + 2) collapse multiple spaces into one + 3) convert underscores and hyphens to spaces + 4) remove non-essential noise phrases + 5) enforce exactly one "/" between segments + 6) remove leading/trailing "/" + 7) convert to lowercase +- HARD VALIDATION (post-normalization): + - Path must have 4-7 segments (3-6 slashes) + - Each segment: 1-5 words, lowercase alphanumeric with single spaces + - REGEX for segment format: ^[a-z0-9]+(?: [a-z0-9]+){{0,4}}$ +- SELECTION POLICY: + - Prefer extending existing branches that map to spec requirements before creating new trunks + - Only create new top-level categories when no existing category fits a spec requirement +- INTERMEDIATE SEGMENTS MUST BE SELF-DESCRIBING: + - Each non-leaf segment is the only carrier of that level's meaning (intermediate nodes do NOT have a description field). Choose names that a reader can understand without surrounding context. + - Source intent / Resulting segment: + "user-facing display surfaces" โ†’ "article display" + "front-end pages" โ†’ "frontend pages" + "authentication routes" โ†’ "auth routes" + - A segment name reads correctly when read alone, in any path it appears in. + +## Task +Convert the repository specification into a comprehensive feature tree that covers everything needed for a working implementation. + +## Specification Mapping Guidelines +1. **Read ALL sections of the specification** (functional_requirements, background_and_overview, non_functional_requirements) and extract every described capability +2. **For each described capability**, create concrete feature paths needed to implement it +3. **Reasonably expand implied features** โ€” if the spec describes a page, also create features for the forms, navigation, and data handling that page requires +4. **Include standard supporting features** that any working implementation would need (e.g., data model definitions, input validation, error handling at system boundaries) +5. **Do NOT copy generic examples** โ€” design the hierarchy based on the actual repository's domain +6. **Maintain consistency** with any existing tree structure (if current_tree is not empty) + +### Path Format Reference (structure only, NOT content to copy) +``` +{{top_level_category}}/{{subsystem}}/{{feature_group}}/{{specific_action}} +``` +- 4-7 segments deep +- Each segment: 1-5 lowercase words +- Separator: "/" only + +## Output Format +Your response MUST contain exactly one block and exactly one block. +**IMPORTANT: You MUST ALWAYS output the block with valid JSON, even if you have no new paths to add.** + + +Describe your analysis approach: +- List requirements from ALL spec sections: functional_requirements, background_and_overview, and non_functional_requirements +- For each new feature path, reference which spec section and item it comes from +- Which spec items still need to be converted into features +- How you ensured no duplicate leaf names with existing tree + + + +{{ + "add_new_feature_paths": [ + {{"path": "path/to/feature1", "description": "Brief 1-2 sentence description of what this feature does"}}, + {{"path": "path/to/feature2", "description": "Brief 1-2 sentence description of what this feature does"}} + ], + "is_complete": false, + "completion_reason": "" +}} + + +**When the feature tree adequately covers all functional requirements from the specification, output:** + +{{ + "add_new_feature_paths": [], + "is_complete": true, + "completion_reason": "All functional requirements from the specification are adequately covered by the feature tree." +}} + + +Constraints for add_new_feature_paths: +- Each item MUST be a JSON object with "path" and "description" keys. +- The "path" value MUST be a single feature path string composed of multiple nodes separated by "/" (e.g., "functionality/memory management/pooling/adaptive scheduling"). +- The "description" value MUST be a concise 1-2 sentence explanation of what this feature does. +- Do NOT return nested structures or any other separators inside paths. +- All paths MUST follow the naming and validation rules above. +- Leaf node names must NOT duplicate existing ones in Current Feature Tree. + +Notes: +- The block MUST contain valid JSON - this is mandatory. +- No additional comments or text outside the two blocks. + +## Completion Judgment +- Set `is_complete: true` when ALL sections of the specification have been covered: + - Every functional_requirement has corresponding feature paths + - Every page/route/UI element from background_and_overview has feature paths + - Every data model from background_and_overview has feature paths + - Every security/non-functional measure has feature paths +- Avoid generating features that are completely unrelated to the repository's domain +- Ask yourself: "Is there anything described in ANY section of the spec, or reasonably implied by it, that doesn't have a feature path yet?" + +## Output Expectations +- Generate feature paths covering ALL spec sections (no fixed minimum or maximum) +- Every feature path uses "/" to separate nodes/segments +- Depth between 4โ€“7 segments +- Grouped siblings with shared prefixes +- No duplicate leaf names with Current Feature Tree + +## Inputs + +### Current Feature Tree: +{current_tree} + +### Repository Information: +{repo_info} +""" + +PROMPT_TEMPLATE_BUILD_EXPAND = r""" +## Instruction +You are a GitHub Repository Feature Expansion Assistant. The current feature tree already covers all explicitly described requirements from the repository specification. Your task is to expand the feature tree **beyond the existing specification** by adding features that are **genuinely necessary and reasonable** for a complete, production-quality implementation. + +**CRITICAL PRINCIPLE: Beyond-Spec Expansion Only** +- The existing tree already covers the spec โ€” do NOT re-implement or duplicate existing features +- Only add features that the spec does NOT describe but that are **practically necessary** +- Each feature must serve a clear, concrete purpose โ€” explain why the repository would be incomplete without it +- Do NOT add features that are merely speculative, decorative, or "nice to have" +- Fewer, well-justified features are always better than many loosely related ones + +## LEAF NODE GRANULARITY (MIU Principle) +Each leaf node (final path segment) MUST be a **Minimum Implementable Unit** - independently implementable, testable, and deliverable. + +**MIU Criteria:** +1. **Single Action**: One verb + one object; no "and"/"or" +2. **Testable**: Clear input โ†’ output or observable state change +3. **Atomic**: One function/method scope; assignable as one dev task +4. **Behavior-focused**: Describes WHAT the system does, not pure control flow + +**[WARNING] AVOID THESE PATTERNS (will be flagged in review):** +- Pure loop/iteration steps: "iterate items", "loop through elements", "traverse nodes" +- Pure recursion steps: "recurse children", "recurse subtree" +- Internal state only: "set flag", "increment counter", "mark visited" +- No observable outcome: operations with no return value or side effect + +**[OK] VALID leaf node patterns:** +- Returns a value: "calculate X", "compute Y", "get Z" +- Produces output: "generate X", "create Y", "format Z" +- Performs action with result: "detect X", "validate Y", "resolve Z" +- Transforms data: "parse X", "convert Y", "normalize Z" +- Stores/retrieves: "cache X", "lookup Y", "index Z" + +## LEAF NODE UNIQUENESS REQUIREMENT +**Do NOT generate paths whose leaf node name already exists in the Current Feature Tree.** +If a leaf name is taken, use a more specific or different name. + +## FEATURE PATH NAMING AND SELECTION STANDARD +- TOP-LEVEL CATEGORIES: Use categories that best fit the repository's domain and existing tree structure + - Analyze the current feature tree and repository context to determine appropriate categories + - Common examples: workflow, functionality, api, service, module, component, feature, core, util, etc. +- CHARACTERS: lowercase letters aโ€“z, digits 0โ€“9; single spaces allowed inside segments +- PATH AS NODE SEQUENCE: + - Each feature path MUST be a sequence of nodes, with each node as one segment. + - Segments MUST be separated **only** by "/" (forward slash). + - No other separators are allowed. +- SEPARATOR: forward slash "/" ONLY + - No leading or trailing "/" + - No double slashes + - No spaces around "/" +- DISALLOWED: underscores, hyphens, commas, parentheses, dots, emojis, special symbols +- SEGMENT LENGTH: 1โ€“5 words (prefer 2โ€“4) +- PATH DEPTH: 4โ€“7 segments (minimum: category / subsystem / feature group / specific feature) +- GROUPING: cluster related leaves under shared parents; avoid scattering similar concepts +- NORMALIZATION (apply before validation): + 1) trim leading/trailing spaces + 2) collapse multiple spaces into one + 3) convert underscores and hyphens to spaces + 4) remove non-essential noise phrases + 5) enforce exactly one "/" between segments + 6) remove leading/trailing "/" + 7) convert to lowercase +- HARD VALIDATION (post-normalization): + - Path must have 4-7 segments (3-6 slashes) + - Each segment: 1-5 words, lowercase alphanumeric with single spaces + - REGEX for segment format: ^[a-z0-9]+(?: [a-z0-9]+){{0,4}}$ +- SELECTION POLICY: + - Prefer extending existing branches before creating new trunks + - Only create new top-level categories when no existing category fits +- INTERMEDIATE SEGMENTS MUST BE SELF-DESCRIBING: + - Each non-leaf segment is the only carrier of that level's meaning (intermediate nodes do NOT have a description field). Choose names that a reader can understand without surrounding context. + - Source intent / Resulting segment: + "user-facing display surfaces" โ†’ "article display" + "front-end pages" โ†’ "frontend pages" + "authentication routes" โ†’ "auth routes" + - A segment name reads correctly when read alone, in any path it appears in. + +## Task +Analyze the current feature tree and repository specification to identify functional areas that are NOT covered by the spec but are **practically necessary** for a production-quality implementation. Add features for these areas. + +## Analysis Approach +1. **Review the spec-based tree** โ€” understand what is already covered +2. **Identify practical gaps** โ€” what functionality would a real implementation need that the spec doesn't mention? +3. **Prioritize by necessity** โ€” focus on features the repository genuinely cannot work without +4. **Consider common patterns** for this type of repository โ€” error handling, edge cases, data validation at system boundaries, performance-critical operations +5. **Maintain consistency** with the existing tree structure + +### Path Format Reference (structure only, NOT content to copy) +``` +{{top_level_category}}/{{subsystem}}/{{feature_group}}/{{specific_action}} +``` +- 4-7 segments deep +- Each segment: 1-5 lowercase words +- Separator: "/" only + +## Output Format +Your response MUST contain exactly one block and exactly one block. +**IMPORTANT: You MUST ALWAYS output the block with valid JSON, even if you have no new paths to add.** + + +Describe your analysis approach: +- What functionality is already covered by the spec-based tree +- What practical gaps exist that a real implementation would need +- For each new feature, explain why the repository would be noticeably incomplete without it +- How you ensured no duplicate leaf names with existing tree +- Confirm you are NOT duplicating spec-covered features + + + +{{ + "add_new_feature_paths": [ + {{"path": "path/to/feature1", "description": "Brief 1-2 sentence description of what this feature does"}}, + {{"path": "path/to/feature2", "description": "Brief 1-2 sentence description of what this feature does"}} + ], + "is_complete": false, + "completion_reason": "" +}} + + +**When all genuinely necessary beyond-spec features have been added:** + +{{ + "add_new_feature_paths": [], + "is_complete": true, + "completion_reason": "All genuinely necessary features beyond the specification have been added." +}} + + +Constraints for add_new_feature_paths: +- Each item MUST be a JSON object with "path" and "description" keys. +- The "path" value MUST be a single feature path string composed of multiple nodes separated by "/" (e.g., "functionality/memory management/pooling/adaptive scheduling"). +- The "description" value MUST be a concise 1-2 sentence explanation of what this feature does. +- Do NOT return nested structures or any other separators inside paths. +- All paths MUST follow the naming and validation rules above. +- Leaf node names must NOT duplicate existing ones in Current Feature Tree. + +Notes: +- The block MUST contain valid JSON - this is mandatory. +- No additional comments or text outside the two blocks. + +## Completion Judgment +- Set `is_complete: true` when all **genuinely necessary** beyond-spec features have been added +- Do NOT over-expand: if the tree is already comprehensive enough for production use, stop immediately +- Only add features the repository genuinely needs โ€” not speculative ones +- Ask yourself: "Would this repository be noticeably incomplete or broken without this feature?" โ€” if no, do not add it + +## Output Expectations +- Generate only features that are genuinely necessary beyond the spec (no fixed minimum or maximum) +- Every feature path uses "/" to separate nodes/segments +- Depth between 4โ€“7 segments +- Grouped siblings with shared prefixes +- No duplicate leaf names with Current Feature Tree + +## Inputs + +### Current Feature Tree: +{current_tree} + +### Repository Information: +{repo_info} +""" + +PROMPT_TEMPLATE_BUILD_DIRECTED = r""" +## Instruction +You are a GitHub Repository Feature Expansion Assistant. Your task is to expand the feature tree **beyond the existing specification** in a specific direction chosen by the user. The current tree already covers the spec requirements โ€” your job is to add features that the spec does NOT describe but that are **genuinely necessary and reasonable** for a complete, production-quality implementation. + +IMPORTANT CONSTRAINTS: +- Only add features that are **genuinely necessary** for the repository to work well in practice +- Each feature must serve a clear, concrete purpose โ€” you must be able to explain why the repository would be incomplete without it +- Do NOT add features that are merely speculative, decorative, or "nice to have" +- Do NOT duplicate or overlap with features already in the tree (those already cover the spec) +- Fewer, well-justified features are always better than many loosely related ones + +## Expansion Direction +{direction} + +## LEAF NODE GRANULARITY (MIU Principle) +Each leaf node (final path segment) MUST be a **Minimum Implementable Unit** - independently implementable, testable, and deliverable. + +**MIU Criteria:** +1. **Single Action**: One verb + one object; no "and"/"or" +2. **Testable**: Clear input โ†’ output or observable state change +3. **Atomic**: One function/method scope; assignable as one dev task +4. **Behavior-focused**: Describes WHAT the system does, not pure control flow + +## LEAF NODE UNIQUENESS REQUIREMENT +**Do NOT generate paths whose leaf node name already exists in the Current Feature Tree.** +If a leaf name is taken, use a more specific or different name. + +## FEATURE PATH NAMING AND SELECTION STANDARD +- TOP-LEVEL CATEGORIES: Use categories that best fit the repository's domain and existing tree structure +- CHARACTERS: lowercase letters aโ€“z, digits 0โ€“9; single spaces allowed inside segments +- SEPARATOR: forward slash "/" ONLY +- PATH DEPTH: 4โ€“7 segments +- GROUPING: cluster related leaves under shared parents; avoid scattering similar concepts +- NORMALIZATION: trim spaces, collapse multiple spaces, convert underscores/hyphens to spaces, lowercase +- HARD VALIDATION: + - Path must have 4-7 segments (3-6 slashes) + - Each segment: 1-5 words, lowercase alphanumeric with single spaces + - REGEX for segment format: ^[a-z0-9]+(?: [a-z0-9]+){{0,4}}$ +- INTERMEDIATE SEGMENTS MUST BE SELF-DESCRIBING: + - Each non-leaf segment is the only carrier of that level's meaning (intermediate nodes do NOT have a description field). Choose names that a reader can understand without surrounding context. + - Source intent / Resulting segment: + "user-facing display surfaces" โ†’ "article display" + "front-end pages" โ†’ "frontend pages" + "authentication routes" โ†’ "auth routes" + - A segment name reads correctly when read alone, in any path it appears in. + +## Completion Judgment +- Set `is_complete: true` when the expansion direction has been **sufficiently covered** with all genuinely necessary features +- Do NOT over-expand: if the direction is well-covered, stop immediately +- Only add features that the repository genuinely needs โ€” not speculative ones +- Ask yourself: "Would this repository be noticeably incomplete or broken without this feature?" โ€” if no, do not add it + +## Output Format +Your response MUST contain exactly one block and exactly one block. + + +Describe your analysis: +- What the expansion direction requires that is NOT already in the spec-based tree +- For each feature, explain concretely why the repository would be incomplete without it +- Which areas of the direction still need coverage +- Why you consider the expansion complete or incomplete + + + +{{ + "add_new_feature_paths": [ + {{"path": "path/to/feature1", "description": "Brief 1-2 sentence description of what this feature does"}}, + {{"path": "path/to/feature2", "description": "Brief 1-2 sentence description of what this feature does"}} + ], + "is_complete": false, + "completion_reason": "" +}} + + +**When the direction is sufficiently expanded:** + +{{ + "add_new_feature_paths": [], + "is_complete": true, + "completion_reason": "The expansion direction has been fully covered with all necessary features." +}} + + +Constraints for add_new_feature_paths: +- Each item MUST be a JSON object with "path" and "description" keys. +- The "path" value MUST be a single feature path string with "/" separators. +- The "description" value MUST be a concise 1-2 sentence explanation of what this feature does. +- All paths MUST follow the naming and validation rules above. +- Leaf node names must NOT duplicate existing ones in Current Feature Tree. + +## Inputs + +### Current Feature Tree: +{current_tree} + +### Repository Information: +{repo_info} +""" + +PROMPT_TEMPLATE_SUGGEST_DIRECTIONS = r""" +## Instruction +You are a Feature Tree Analysis Assistant. The current feature tree already covers all explicitly described requirements from the specification. Your task is to suggest **4 to 6 expansion directions** for features that go **beyond the spec** but are **genuinely necessary** for a complete, production-quality repository. + +Each direction should represent a coherent functional area or capability that is: +- **NOT already covered** by the existing feature tree (which covers the spec) +- **Genuinely necessary** for the repository to work well in practice +- Concrete enough to guide feature expansion +- Something the repository would be noticeably incomplete without +- NOT speculative, generic, or merely "nice to have" + +Do NOT suggest directions that duplicate what the spec already covers. + +## Analysis Process +1. Understand the repository's purpose and what the spec-based tree already covers +2. **Review the expansion history** (previously generated directions and user selections) to understand what has already been explored and what the user considered important +3. Identify functional areas that the spec does not describe but that are **practically necessary** +4. Focus on areas where the repository would be incomplete or impractical without them +5. Rank directions by how essential they are to a working, production-quality repository + +## IMPORTANT: Expansion History Awareness +- **Previously selected directions** indicate areas the user found important โ€” use them to understand the user's priorities and suggest complementary directions +- **Previously generated but NOT selected directions** may still be relevant โ€” you may suggest them again if they remain genuinely necessary, but consider whether the user intentionally skipped them +- **Do NOT suggest directions that have already been expanded** (i.e., previously selected and expanded into the feature tree) +- Use the history to generate **progressively more refined and contextually relevant** suggestions + +## Output Format +Your response MUST contain exactly one block and exactly one block. + + +Analyze the current tree structure, review expansion history, and identify underrepresented or missing functional areas. + + + +{{ + "directions": [ + {{ + "name": "Short direction name", + "description": "2-3 sentence description of what this direction covers", + "rationale": "Why this direction is important for the repository" + }} + ] +}} + + +## Inputs + +### Current Feature Tree: +{current_tree} + +### Repository Information: +{repo_info} + +### Expansion History: +{expansion_history} +""" + + +# ============================================================================ +# Feature Edit Prompts +# ============================================================================ + +PROMPT_TEMPLATE_EDIT_PLAN = """You are an expert software architect. Your task is to create a precise edit plan for modifying a feature tree. + +## User Edit Instructions + +``` +{edit_instruction} +``` + +## Repository Information + +- **Repository Name**: {repository_name} +- **Repository Purpose**: {repository_purpose} + +## Available Components + +The feature tree is organized into the following components. Each component has a `refactored_subtree` containing feature paths. + +{components_summary} + +## Your Task + +Analyze the user's instructions and create a detailed edit plan that specifies EXACTLY which paths to add/remove from which components. + +### Supported Operation Types + +1. **ADD** - Add new features to a component + - For new top-level feature: `paths_to_add: ["new_category/new_feature"]` + - For adding under existing path: `paths_to_add: ["existing_category/existing_subcategory/new_feature"]` + - Example: Add "support parquet format" under "IO Operations/file formats": + ```json + {{ + "component_name": "IO & Serialization", + "operation_type": "ADD", + "paths_to_remove": [], + "paths_to_add": ["IO Operations/file formats/support parquet format"], + "reason": "Add parquet format support" + }} + ``` + +2. **DELETE** - Remove features from a component + - `paths_to_remove: ["path/to/feature"]` removes the leaf + - `paths_to_remove: ["path/to/category"]` removes entire category with all children + - Example: + ```json + {{ + "component_name": "Data Structures", + "operation_type": "DELETE", + "paths_to_remove": ["deprecated/old_feature"], + "paths_to_add": [], + "reason": "Remove deprecated feature" + }} + ``` + +3. **MOVE** - Move features between components + - Requires TWO separate ComponentOperation entries: + - First: DELETE from source component + - Second: ADD to target component + - Example: Move "rolling window" from Component A to Component B: + ```json + [ + {{ + "component_name": "Component A", + "operation_type": "DELETE", + "paths_to_remove": ["windowing/rolling window"], + "paths_to_add": [], + "reason": "Move rolling window to Component B" + }}, + {{ + "component_name": "Component B", + "operation_type": "ADD", + "paths_to_remove": [], + "paths_to_add": ["windowing/rolling window"], + "reason": "Receive rolling window from Component A" + }} + ] + ``` + +4. **RENAME** - Rename a feature (same component) + - DELETE old path + ADD new path in ONE ComponentOperation + - Example: Rename "feature A" to "feature B": + ```json + {{ + "component_name": "X", + "operation_type": "MODIFY", + "paths_to_remove": ["category/feature A"], + "paths_to_add": ["category/feature B"], + "reason": "Rename feature A to feature B" + }} + ``` + +5. **EXTEND** - Expand an existing leaf into a category with children + - DELETE the original leaf, ADD new sub-features + - Example: Expand "data validation" into multiple specific validators: + ```json + {{ + "component_name": "Data Processing", + "operation_type": "MODIFY", + "paths_to_remove": ["validation/data validation"], + "paths_to_add": [ + "validation/data validation/type checking", + "validation/data validation/range validation", + "validation/data validation/null handling" + ], + "reason": "Expand data validation into specific validators" + }} + ``` + +6. **MERGE** - Combine multiple features into one + - DELETE multiple old paths, ADD one consolidated path + - Example: Merge similar features: + ```json + {{ + "component_name": "Analytics", + "operation_type": "MODIFY", + "paths_to_remove": ["stats/mean calculation", "stats/average calculation"], + "paths_to_add": ["stats/mean and average calculation"], + "reason": "Merge duplicate statistics features" + }} + ``` + +7. **SPLIT** - Split one feature into multiple + - DELETE one path, ADD multiple new paths + - Example: Split a complex feature: + ```json + {{ + "component_name": "IO", + "operation_type": "MODIFY", + "paths_to_remove": ["file operations"], + "paths_to_add": ["file operations/read operations", "file operations/write operations"], + "reason": "Split file operations into read and write" + }} + ``` + +### Important Rules + +1. **Paths must be exact** - use the exact path format shown in the component summaries +2. **One operation per component** - combine all changes for each component into one operation +3. **Validate paths exist** - only DELETE paths that actually exist in the component +4. **Use consistent naming** - new paths should follow the existing naming conventions +5. **Cross-component moves** require separate DELETE and ADD operations + +### Path Format + +- Paths use "/" as delimiter +- Example: "dataframe/windowing/create rolling window" +- The path represents the hierarchy from root to leaf + +## Output Format + +Your response must contain exactly one block and exactly one block. + + +1. Understand what the user wants to do +2. Identify source and target components +3. List exact paths to remove and add +4. Verify the plan is consistent + + + +{{ + "summary": "Brief description of the edit plan", + "operations": [ + {{ + "component_name": "Component Name Here", + "operation_type": "DELETE|ADD|MODIFY", + "paths_to_remove": ["path/to/remove1", "path/to/remove2"], + "paths_to_add": ["path/to/add1", "path/to/add2"], + "reason": "Why this operation is needed" + }} + ], + "is_valid": true, + "validation_notes": "Any notes about the plan" +}} + +""" + + +PROMPT_TEMPLATE_EDIT_REVIEW = """You are an expert software architect reviewing the results of a feature tree edit operation. + +## Original User Instructions + +``` +{edit_instruction} +``` + +## Edit Plan That Was Generated + +Summary: {plan_summary} + +Operations planned: +{plan_operations} + +## Execution Results + +{execution_results} + +## State Before Edit + +{state_before} + +## State After Edit + +{state_after} + +## Duplicate Features Detected + +{duplicate_features} + +## Model Analysis Context (if available) + +{model_analysis} + +## Your Task + +Review whether the edit operation was executed correctly and achieved the user's intent. If there are issues or incomplete changes, generate fix operations. + +### Review Criteria + +1. **Plan Accuracy**: Did the plan correctly interpret the user's intent? +2. **Execution Accuracy**: Was the plan executed correctly? +3. **No Side Effects**: Are there any unintended changes? +4. **Completeness**: Were all requested changes made? +5. **Consistency**: Is the resulting tree structure consistent and logical? +6. **No Duplicates**: For MOVE operations, ensure features only exist in the TARGET component, not in both source and target. + +### If Issues Found + +If you find issues that need to be fixed, set `needs_fix` to `true` and provide `fix_operations`. + +## Output Format + +IMPORTANT: Output ONLY a valid JSON object inside tags. Do NOT include any text before or after the JSON. Keep string values on single lines without line breaks. + + +{{ + "thinking": "Brief analysis in one line", + "summary": "What was edited and the outcome in one line", + "execution_matches_plan": true, + "execution_matches_intent": true, + "issues_found": [], + "suggestions": [], + "overall_success": true, + "confidence_score": 0.95, + "needs_fix": false, + "fix_operations": [] +}} + + +If fixes are needed, use this format for fix_operations: +{{ + "needs_fix": true, + "fix_operations": [ + {{"component_name": "Name", "operation_type": "DELETE", "paths_to_remove": ["path"], "paths_to_add": [], "reason": "Why"}} + ] +}} + +RULES: +1. All string values must be on a single line (no newlines inside strings) +2. Use double quotes for all strings +3. Boolean values must be lowercase: true or false +4. Arrays can be empty: [] +5. No trailing commas +""" + + +# ============================================================================ +# Feature Refactor Prompts +# ============================================================================ + +PROMPT_TEMPLATE_SUBTREE_PLANNING = """You are an expert software architect specializing in feature tree organization and modular system design. + +Your task is to analyze feature trees and design logical subtree structures that represent coherent functional components. + +## Definition of Subtrees +In this task, a subtree does not refer to an arbitrary internal tree node. +1. A subtree represents a top-level functional area of the repository. +2. Each subtree should correspond to a distinct, self-contained domain of functionality that contributes to the overall system. +3. Subtrees must not overlap. + - No subtree may conceptually contain another subtree. + - No feature should reasonably belong to more than one subtree. +4. Think of subtrees as the primary architectural divisions of the entire repository. +5. They describe how the system is logically partitioned at the functional level, not how individual nodes are arranged in the feature tree. + +## Expertise +You are expected to rely on the following knowledge areas: + +- Software architecture and modular design principles +- Feature clustering and functional decomposition +- Domain-driven design concepts +- System organization best practices + +Guidelines for Subtree Planning + +## Functional Cohesion +- Group features that work together to achieve the same functional objective +- Consider data flow and dependencies when determining boundaries +- Separate concerns that serve clearly different purposes + +## Modularity Principles +- Determine the appropriate number of subtrees based on the actual complexity and domain structure of the feature tree +- Each subtree should have a specific and focused role in the system +- Minimize dependencies between different subtrees +- Maximize cohesion inside each subtree + +## Adaptive Subtree Count Guidelines +The number of subtrees should emerge from semantic analysis of the domain, NOT from feature count formulas. + +### Primary Principle: Domain-Driven Division +1. **Identify natural functional boundaries first** - What are the distinct responsibility areas in this system? +2. **Each subtree = one coherent domain** - A subtree should answer "what does this part of the system do?" with a clear, focused answer +3. **Let the domain dictate the count** - If the system naturally has 2 major areas, use 2. If it has 12, use 12. + +### Quality Indicators (use these to validate your division, not to determine count): +- **Cohesion check**: Features within a subtree should be more related to each other than to features in other subtrees +- **Naming check**: If you struggle to name a subtree clearly, it may lack coherent purpose +- **Size balance check**: Subtrees with vastly different sizes (e.g., one has 50 features, another has 2) may indicate poor boundary placement +- **Dependency check**: Subtrees should have minimal cross-dependencies + +### Red Flags to Avoid: +- **Forced splits**: Creating subtrees just to reduce size, not because of semantic difference +- **Catch-all subtrees**: Names like "Utilities", "Misc", "Other" suggest poor domain analysis +- **Single-feature subtrees**: Unless it represents a truly distinct concern (e.g., "Authentication" might be small but distinct) +- **Overlapping responsibilities**: If a feature could reasonably belong to multiple subtrees, the boundaries are unclear + +### Reference Boundaries (soft guidelines, not rules): +- A subtree with fewer than 1-5 features: Consider if it should merge with a related subtree +- A subtree with more than 100-200 features: Consider if it should be split into sub-domains +- These are sanity checks, not targets + +## Naming Conventions +- Use clear and descriptive subtree names +- Names should reflect the primary function or domain +- Avoid vague, abstract or overly technical naming +- Consider clarity for readers unfamiliar with implementation details + +## Size Considerations +- Balance sizes so no subtree becomes disproportionately large +- Allow variation where complexity demands it +- Larger subtrees may later be subdivided internally +- Avoid trivial subtrees that serve no meaningful standalone purpose +- A subtree with only 1-2 features should be merged into a related subtree unless it represents a truly distinct concern + +## Output Requirements +Your output should provide: + +- A list of subtrees with names and functional purposes +- A concise explanation of organizational decisions +- An estimated number of features belonging to each subtree +- Coverage that accounts for the entire feature tree space + +## Output Format +Your response must contain exactly one block and exactly one block, with no other content outside these two blocks. + +Your internal reasoning and drafts. Treat this as architectural design notes. +Include: +1. Analysis of the feature tree's size and complexity +2. Identification of natural domain boundaries +3. Justification for the chosen number of subtrees + + +{ + "total_subtrees": "", + "subtree_plans": [ + { + "name": "", + "purpose": "", + "estimate_size": "" + } + ], + "reasoning": "" +} + +""" + +PROMPT_TEMPLATE_FEATURE_ORGANIZATION = """ +## CRITICAL: Path Format Requirement [WARNING] + +Every path in assigned_paths MUST have **2 to 8 segments** separated by "/" (i.e., 1-7 slashes). + +**Required Format:** `/.../` (minimum 2 segments, maximum 8 segments) + +**Format Guidelines:** +- Minimum: 2 segments (e.g., `/`) +- Recommended: 3-5 segments for balanced hierarchy +- Maximum: 8 segments for deeply nested structures +- Final segment: Leaf feature name (MUST match exactly from source tree) + +**Valid Format Examples:** +- 2 segments: `/` +- 3 segments: `//` +- 4 segments: `///` +- 5+ segments: Use when semantic grouping requires deeper hierarchy + +**Invalid Formats (DO NOT USE):** +- `` [FAIL] (only 1 segment, needs at least 2) +- `////////` [FAIL] (9 segments, exceeds maximum of 8) + +**Derive appropriate segment names from the repository's domain and subtree purposes.** + +--- + +## Instruction +You are acting as a senior software architecture engineer responsible for refactoring a complex five-level feature tree into a clean, modular, and semantically consistent architecture. + +Your objective is to reorganize all functionality into well-defined modules that are: +- semantically meaningful, +- non-overlapping, +- internally coherent, +- aligned with the natural structure and intent of the system. + +## Subtree Definition +A Subtree represents a distinct functional area with a flexible hierarchical structure (2-8 levels deep): + +{ + "name": "", + "refactored_subtree": { + "": { + "": [ + "", + "" + ] + } + } +} + +Explanation: +- name: concise label summarizing the scope of this functional area. +- refactored_subtree: structured hierarchy with 2-8 segments: + - Minimum 2 segments: `/` + - Recommended 3-5 segments for balanced organization + - Maximum 8 segments for complex nested structures + - Final segment (Leaf): concrete features originating directly from the original feature tree. + +## Leaf Assignment Rules +All assignment actions operate only on leaf nodes of the remaining feature tree. + +1. Every value in assigned_paths must correspond to a leaf node currently present in the remaining feature tree. +2. Intermediate categories, internal nodes, or partially expanded paths must never be assigned directly. +3. Leaf labels must remain exactly as they appear in the source feature tree. +4. A leaf that has already been assigned in previous steps must not be reused. + +If a value does not exist as a leaf in the remaining feature tree, it must not appear in assigned_paths. + +## Path Refactoring Rules +The original feature tree structure is input only for meaning, not for target layout. + +1. Your job is to refactor paths, not to preserve them. +2. Middle-level categories may be regrouped or recombined where appropriate. +3. Leaf names must remain unchanged, but the path leading to them may change. +4. Simply copying the original full path for a leaf is considered a failure of refactoring. +5. **CRITICAL:** Each refactored path must have 2-8 segments, with the leaf name as the final segment. + +## Path Composition + +Each `assigned_paths` entry has two distinct kinds of segments: + +1. Intermediate segments โ€” the new component hierarchy you design. + These follow the Naming Guidance below and do NOT need to mirror + the source tree's intermediate names. You are free to regroup + leaves under any meaningful hierarchy. + +2. Leaf segment (the final segment) โ€” exactly the leaf's `name` value + from the source tree, copied verbatim. The leaf name is the stable + identifier downstream stages use to locate the feature; never + modify, abbreviate, expand, or annotate it. + +Source leaf: + {"name": "user model definition", + "description": "User model with username, password_hash, ..."} + +Resulting path: + "user system/data model/user model definition" + ^^^^^^^^^^^^^^^^^^^^^^^^ + leaf segment = source leaf's `name` (verbatim) + +## Naming Guidance +When defining categories within a subtree: +- Prefer names that describe real functionality rather than abstract taxonomy labels. +- Avoid generic buckets such as misc, utilities, general, etc. +- Names should plausibly map to real modules, packages, or directories, while still conveying business or system meaning. + +## **Requirements** +- Each subtree represents a self-contained functional domain. +- Every valid leaf node appears exactly once across all subtrees. +- Collectively, all subtrees must cover the complete set of valid feature leaves. +- Leaf names must remain exactly as they appear in the source tree (no renaming). +- Each path in assigned_paths has 2-8 segments (leaf name as the final segment). + - The subtree_name field identifies which subtree receives these paths. + - The path string itself does NOT include the subtree name as a prefix. + +## Output Format +Your response must contain exactly one block and exactly one block, with no other content outside these sections. + +Explain your reasoning process: +1. How you evaluated grouping options and identified natural clusters +2. How you handled features that could belong to multiple subtrees +3. How you ensured balanced distribution across subtrees + +Self-check before submission (MANDATORY): +- [ ] Count "/" in each path: every path must have 1-7 slashes (2-8 segments) +- [ ] Verify each leaf_name exists in the remaining feature tree (no invented names) +- [ ] Confirm no leaf appears in more than one assigned_path +- [ ] Ensure intermediate segments form meaningful, non-generic hierarchies + + +{ + "assignments": [ + { + "subtree_name": "", + "assigned_paths": [ + "level1/level2/level3/leaf1", + "level1/level2/level3/leaf2" + ] + } + ] +} + +""" diff --git a/RPG-Kit/scripts/feature_build.py b/RPG-Kit/scripts/feature_build.py new file mode 100644 index 0000000..5782f75 --- /dev/null +++ b/RPG-Kit/scripts/feature_build.py @@ -0,0 +1,2310 @@ +#!/usr/bin/env python3 +"""Feature Tree Expansion Script Uses AI Assistant CLI tool instead of API calls.""" + +import copy +import json +import argparse +import sys +import logging +from datetime import datetime, timezone +from pathlib import Path +from typing import Dict, Any, List, Optional, Tuple +from pydantic import BaseModel, Field + +from feature.prompts import ( + PROMPT_TEMPLATE_BUILD_REVIEW, + PROMPT_TEMPLATE_BUILD_FEATURE, + PROMPT_TEMPLATE_BUILD_EXPAND, + PROMPT_TEMPLATE_BUILD_DIRECTED, + PROMPT_TEMPLATE_SUGGEST_DIRECTIONS, +) +from common.paths import ( + FEATURE_SPEC_FILE, + FEATURE_BUILD_FILE, +) +from common import print_unicode_table, get_all_leaf_paths, get_leaf_name, get_all_leaf_descriptions +from common.llm_client import LLMClient +from common.trajectory import load_or_create_trajectory + +# ======================== Configuration ======================== + +MAX_ITERATIONS = 20 # Hard safety cap for both Step 1 and Step 2 +MAX_CONSECUTIVE_FAILURES = 3 # Break after N consecutive empty/error responses + +logging.basicConfig( + level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + + +# ======================== Pydantic Models ======================== + + +class AddPathsOutput(BaseModel): + """Output model for adding new feature paths.""" + + add_new_feature_paths: List[Dict[str, str]] = Field( + description="List of new feature paths to add, each with 'path' and 'description'", + default_factory=list, + ) + is_complete: bool = Field( + description="Whether the feature tree is sufficiently complete for the current objective", + default=False, + ) + completion_reason: str = Field( + description="Reason for declaring completion", + default="", + ) + + +class ReviewOutput(BaseModel): + """Output model for review results - combines gap analysis and suggested paths.""" + + coverage_percentage: float = Field( + description="Estimated percentage of FRD requirements covered by feature tree (0-100)", + ge=0, + le=100, + ) + has_gaps: bool = Field( + description="Whether there are uncovered functional requirements" + ) + missing_functionalities: List[str] = Field( + description="List of functional requirements from FRD that are not yet covered", + default_factory=list, + ) + suggested_paths: List[Dict[str, str]] = Field( + description="List of new feature paths to cover the missing functionalities, each with 'path' and 'description'", + default_factory=list, + ) + invalid_leaf_nodes: List[str] = Field( + description="List of leaf nodes that violate the Minimum Implementable Unit principle", + default_factory=list, + ) + suggested_replacements: List[Dict[str, str]] = Field( + description="List of replacement paths for invalid leaf nodes (split into proper MIU), each with 'path' and 'description'", + default_factory=list, + ) + duplicate_leaf_renames: List[str] = Field( + description="List of rename operations for duplicate leaf names, format: 'old_full_path -> new_leaf_name'", + default_factory=list, + ) + + +class DirectionItem(BaseModel): + """A single expansion direction suggestion.""" + + name: str = Field(description="Short direction name") + description: str = Field( + description="2-3 sentence description of what this direction covers" + ) + rationale: str = Field( + description="Why this direction is important for the repository" + ) + + +class SuggestDirectionsOutput(BaseModel): + """Output model for expansion direction suggestions.""" + + directions: List[DirectionItem] = Field( + description="List of 4-6 expansion direction suggestions", + default_factory=list, + ) + + +# ======================== Utility Functions ======================== + + +def load_json(path: Path) -> Dict[str, Any]: + """Load JSON file.""" + try: + with open(path, "r", encoding="utf-8") as f: + return json.load(f) + except Exception as e: + logger.error(f"Failed to load JSON file: {path}, error: {e}") + sys.exit(1) + + +def save_json(data: Dict[str, Any], path: Path): + """Save data to JSON file.""" + try: + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2, ensure_ascii=False) + logger.info(f"Results saved to: {path}") + except Exception as e: + logger.error(f"Failed to save JSON file: {path}, error: {e}") + sys.exit(1) + + +def build_repo_info(repo_data: Dict[str, Any]) -> str: + """Build repository information string. + + Extracts repo metadata and spec fields. If background_and_overview or + functional_requirements are not at the top level, they are parsed from + the repository_specification JSON string (which contains the full + feature_spec.json content). + """ + # Build a merged view: top-level keys + parsed repository_specification + merged = dict(repo_data) + spec_keys = ["background_and_overview", "functional_requirements", + "non_functional_requirements"] + + # If spec fields are missing at top level, extract from repository_specification + if any(k not in merged for k in spec_keys): + raw_spec = repo_data.get("repository_specification", "") + if isinstance(raw_spec, str) and raw_spec.strip(): + try: + parsed_spec = json.loads(raw_spec) + for k in spec_keys: + if k not in merged and k in parsed_spec: + merged[k] = parsed_spec[k] + except (json.JSONDecodeError, TypeError): + pass + elif isinstance(raw_spec, dict): + for k in spec_keys: + if k not in merged and k in raw_spec: + merged[k] = raw_spec[k] + + info = "" + for key in [ + "repository_name", + "repository_purpose", + "background_and_overview", + "non_functional_requirements", + "functional_requirements", + ]: + if key in merged: + formatted_key = key.replace("_", " ").capitalize() + value = merged[key] + if isinstance(value, (list, dict)): + value = json.dumps(value, indent=2, ensure_ascii=False) + info += f"{formatted_key}: {value}\n" + return info.strip() + + +def find_duplicate_leaf_names(tree: Dict[str, Any]) -> Dict[str, List[str]]: + """Find duplicate leaf node names in the feature tree. + + Args: + tree: Feature tree dictionary + + Returns: + Dictionary mapping duplicate leaf names to their full paths + """ + leaf_to_paths: Dict[str, List[str]] = {} + all_paths = get_all_leaf_paths(tree) + + for path in all_paths: + leaf_name = path.split("/")[-1] + if leaf_name not in leaf_to_paths: + leaf_to_paths[leaf_name] = [] + leaf_to_paths[leaf_name].append(path) + + # Filter to only duplicates (more than one path per leaf name) + duplicates = {k: v for k, v in leaf_to_paths.items() if len(v) > 1} + return duplicates + + +def format_duplicate_leaves_info(duplicates: Dict[str, List[str]]) -> str: + """Format duplicate leaf information for the prompt. + + Args: + duplicates: Dictionary from find_duplicate_leaf_names() + + Returns: + Formatted string for the prompt + """ + if not duplicates: + return "No duplicate leaf nodes detected." + + lines = [ + f"Found {len(duplicates)} duplicate leaf name(s) that need to be renamed:\n" + ] + for leaf_name, paths in duplicates.items(): + lines.append(f'- Leaf name "{leaf_name}" appears {len(paths)} times:') + for path in paths: + lines.append(f" - {path}") + lines.append( + "\nFor each duplicate, keep ONE path unchanged and rename the others to make leaf names unique." + ) + return "\n".join(lines) + + +def convert_leaves_to_list(tree: Dict[str, Any]) -> Dict[str, Any]: + """Convert leaf nodes to lists. + + Rules: + - Empty dict {} becomes empty list [] + - Non-empty dict is recursively processed + - List values have whitespace trimmed from string elements + - Other values are wrapped in a list + """ + result = {} + for key, value in tree.items(): + clean_key = key.strip() + if isinstance(value, dict): + if len(value) == 0: + # Empty dict should become empty list (leaf node with no children) + result[clean_key] = [] + else: + result[clean_key] = convert_leaves_to_list(value) + elif isinstance(value, list): + # Trim whitespace from string elements in list; preserve dict items + cleaned = [] + for item in value: + if isinstance(item, str): + cleaned.append(item.strip()) + elif isinstance(item, dict): + # Dict leaf node: strip the name field + stripped = dict(item) + if "name" in stripped and isinstance(stripped["name"], str): + stripped["name"] = stripped["name"].strip() + cleaned.append(stripped) + else: + cleaned.append(item) + result[clean_key] = cleaned + else: + # If value is string, trim and wrap in list + result[clean_key] = [value.strip() if isinstance(value, str) else value] + return result + + +def apply_changes(tree: Dict[str, Any], paths: List[str]) -> Dict[str, Any]: + """Apply path list to tree structure. + + For a path like "a/b/c/d/leaf", creates: + { + "a": { + "b": { + "c": { + "d": ["leaf"] + } + } + } + } + """ + new_tree = copy.deepcopy(tree) + + for path in paths: + # Trim whitespace from each path segment + parts = [part.strip() for part in path.split("/")] + + if len(parts) < 2: + continue # Skip invalid paths + + # Navigate/create the tree structure up to the second-to-last element + current = new_tree + for part in parts[:-2]: + if part not in current: + current[part] = {} + elif isinstance(current[part], list): + # If we encounter a list, we need to convert it to a dict + # This happens when a previous leaf node needs to become a branch + old_list = current[part] + current[part] = {item: {} for item in old_list} + elif not isinstance(current[part], dict): + # Unexpected type, convert to dict + current[part] = {} + current = current[part] + + # Now handle the last two parts: parent and leaf + parent_key = parts[-2] + leaf = parts[-1] + + # Ensure parent_key exists in current + if parent_key not in current: + # Create a new list with the leaf + current[parent_key] = [leaf] + elif isinstance(current[parent_key], list): + # Append to existing list if not already present + if leaf not in current[parent_key]: + current[parent_key].append(leaf) + elif isinstance(current[parent_key], dict): + # If it's a dict (previously a branch node), we have a conflict + # This means some paths treat this as a leaf parent, others as a branch + # Keep it as a dict and add the leaf as a key with empty value + if leaf not in current[parent_key]: + current[parent_key][leaf] = [] + else: + # Unexpected type, replace with list + current[parent_key] = [leaf] + + return new_tree + + +def remove_paths(tree: Dict[str, Any], paths: List[str]) -> Dict[str, Any]: + """Remove paths from tree structure. + + For a path like "a/b/c/d/leaf", removes the leaf from the tree. + If the parent becomes empty after removal, it will be cleaned up. + + Note: The paths in invalid_leaf_nodes may contain explanations like: + "workflow/user/manage account - too broad and not WHAT-only" + We need to extract just the path part before the explanation. + + Args: + tree: Current feature tree + paths: List of paths to remove (may contain explanations after ' - ') + + Returns: + Updated tree with paths removed + """ + new_tree = copy.deepcopy(tree) + + for path in paths: + # Extract path part (before any explanation marker like " - ") + path_part = path.split(" - ")[0].strip() + + # Trim whitespace from each path segment + parts = [part.strip() for part in path_part.split("/")] + + if len(parts) < 2: + logger.warning(f"Invalid path to remove (too short): {path}") + continue + + # Navigate to the parent of the leaf + current = new_tree + parent_stack = [] # Track parents for cleanup + + # Navigate to the second-to-last element + valid_path = True + for part in parts[:-2]: + if part not in current or not isinstance(current[part], dict): + logger.warning(f"Path not found in tree: {path} (missing: {part})") + valid_path = False + break + parent_stack.append((current, part)) + current = current[part] + + if not valid_path: + continue + + # Now handle the last two parts: parent_key and leaf + parent_key = parts[-2] + leaf = parts[-1] + + if parent_key not in current: + logger.warning(f"Parent key not found: {parent_key} in path {path}") + continue + + parent_stack.append((current, parent_key)) + + # Remove the leaf + if isinstance(current[parent_key], list): + # Find and remove the leaf, handling both str and dict items + removed = False + for idx, item in enumerate(current[parent_key]): + item_name = get_leaf_name(item) + if item_name == leaf: + current[parent_key].pop(idx) + removed = True + logger.info(f"Removed leaf '{leaf}' from list at '{parent_key}'") + break + if not removed: + logger.warning(f"Leaf '{leaf}' not found in list at '{parent_key}'") + elif isinstance(current[parent_key], dict): + if leaf in current[parent_key]: + del current[parent_key][leaf] + logger.info(f"Removed leaf '{leaf}' from dict at '{parent_key}'") + else: + logger.warning(f"Leaf '{leaf}' not found in dict at '{parent_key}'") + else: + logger.warning( + f"Unexpected type at '{parent_key}': {type(current[parent_key])}" + ) + continue + + # Clean up empty parents (bottom-up) + for parent, key in reversed(parent_stack): + if key in parent: + value = parent[key] + # Remove if empty list or empty dict + if (isinstance(value, list) and len(value) == 0) or ( + isinstance(value, dict) and len(value) == 0 + ): + del parent[key] + logger.debug(f"Cleaned up empty node: {key}") + else: + break # Stop cleanup if not empty + + return new_tree + + +def count_features_recursive(tree: Dict[str, Any]) -> int: + """Recursively count only leaf features in the tree. + + Args: + tree: Feature tree or subtree + + Returns: + Total number of leaf features (items in lists) + """ + count = 0 + for key, value in tree.items(): + if isinstance(value, dict): + # Recursively count features in subtree + count += count_features_recursive(value) + elif isinstance(value, list): + # Count leaf features (items in the list) + count += len(value) + # Don't count intermediate nodes, only leaf nodes + return count + + +def attach_descriptions(tree: Dict[str, Any], desc_map: Dict[str, str], prefix: str = "") -> Dict[str, Any]: + """Attach descriptions to leaf nodes in the tree. + + Traverses the tree and replaces string leaf items with {"name": ..., "description": ...} + when a matching description is found in desc_map. + + Args: + tree: Feature tree dictionary + desc_map: Mapping of full leaf paths to descriptions + prefix: Current path prefix (for recursion) + + Returns: + Updated tree with descriptions attached to leaf nodes + """ + result = {} + for key, value in tree.items(): + new_prefix = f"{prefix}/{key}" if prefix else key + if isinstance(value, dict): + result[key] = attach_descriptions(value, desc_map, new_prefix) + elif isinstance(value, list): + new_list = [] + for item in value: + name = get_leaf_name(item) + path = f"{new_prefix}/{name}" if new_prefix else name + desc = desc_map.get(path, "") + if not desc and isinstance(item, dict): + # Preserve existing description if no new one + desc = item.get("description", "") + if desc: + new_list.append({"name": name, "description": desc}) + else: + new_list.append(item) + result[key] = new_list + else: + result[key] = value + return result + + +def analyze_tree_statistics(tree: Dict[str, Any]) -> Dict[str, Dict[str, int]]: + """Analyze feature tree statistics for each top-level category. + + Args: + tree: Feature tree + + Returns: + Dictionary with statistics for each category + """ + stats = {} + for category, subtree in tree.items(): + if isinstance(subtree, dict): + feature_count = count_features_recursive(subtree) + stats[category] = {"total_features": feature_count} + elif isinstance(subtree, list): + stats[category] = {"total_features": len(subtree)} + else: + stats[category] = {"total_features": 1} + return stats + + + + +def count_paths_by_category(paths: List[str]) -> Dict[str, int]: + """Count paths by their top-level category. + + Args: + paths: List of feature paths + + Returns: + Dictionary mapping category to count + """ + category_counts = {} + for path in paths: + parts = [part.strip() for part in path.split("/")] + if parts: + category = parts[0] + category_counts[category] = category_counts.get(category, 0) + 1 + return category_counts + + +def print_summary_tables( + iteration_logs: List[Dict[str, Any]], + final_tree: Dict[str, Any], + review_logs: List[Dict[str, Any]] = None, + previous_tree: Dict[str, Any] = None, +): + """Print summary tables with Unicode borders. + + Args: + iteration_logs: List of iteration logs + final_tree: Final feature tree + review_logs: List of review logs (optional) + previous_tree: Initial feature tree before expansion (optional) + """ + print("\n" + "=" * 70) + print("FEATURE EXPANSION SUMMARY") + print("=" * 70) + + # Iteration Summary Table + iteration_rows = [] + total_paths = 0 + all_new_paths = [] + + for log in iteration_logs: + paths_count = log.get("paths_count", 0) + total_paths += paths_count + raw_paths = log.get("new_paths", []) + extracted, _ = _extract_paths_and_descs(raw_paths) + all_new_paths.extend(extracted) + iteration_rows.append([log["iteration"], paths_count]) + + # Add total row + iteration_rows.append(["Total", total_paths]) + + print_unicode_table( + headers=["Iteration", "New Feature Paths Added"], rows=iteration_rows, title="" + ) + + # Collect Review phase paths if available + review_added_paths = [] + review_removed_paths = [] + if review_logs: + for log in review_logs: + if log.get("status") in ("paths_applied", "threshold_met"): + # threshold_met only applies MIU fixes, not coverage gap paths + if log.get("status") != "threshold_met": + paths, _ = _extract_paths_and_descs(log.get("suggested_paths", [])) + review_added_paths.extend(paths) + repl, _ = _extract_paths_and_descs(log.get("suggested_replacements", [])) + review_added_paths.extend(repl) + review_removed_paths.extend(log.get("invalid_leaf_nodes", [])) + + # Module Statistics Table + # Count initial features (before expansion) + initial_stats = analyze_tree_statistics(previous_tree) if previous_tree else {} + + # Count new added paths by category (expansion phase only) + expansion_category_counts = count_paths_by_category(all_new_paths) + + # Count review added paths by category + review_added_category_counts = count_paths_by_category(review_added_paths) + + # Count review removed paths by category (extract path part before explanation) + review_removed_clean_paths = [] + for path in review_removed_paths: + path_part = path.split(" - ")[0].strip() + review_removed_clean_paths.append(path_part) + review_removed_category_counts = count_paths_by_category(review_removed_clean_paths) + + # Count total features in final tree + final_stats = analyze_tree_statistics(final_tree) + + # Get all categories + all_categories = sorted( + set( + list(initial_stats.keys()) + + list(expansion_category_counts.keys()) + + list(review_added_category_counts.keys()) + + list(review_removed_category_counts.keys()) + + list(final_stats.keys()) + ) + ) + + module_rows = [] + total_initial = 0 + total_expansion_added = 0 + total_review_added = 0 + total_review_removed = 0 + total_features = 0 + + for category in all_categories: + initial_feat = initial_stats.get(category, {}).get("total_features", 0) + expansion_added = expansion_category_counts.get(category, 0) + review_added = review_added_category_counts.get(category, 0) + review_removed = review_removed_category_counts.get(category, 0) + total_feat = final_stats.get(category, {}).get("total_features", 0) + + total_initial += initial_feat + total_expansion_added += expansion_added + total_review_added += review_added + total_review_removed += review_removed + total_features += total_feat + + # Format: Initial | Final | Expansion Added | Review (+added/-removed = net) + if review_logs: + if review_added or review_removed: + net = review_added - review_removed + net_str = f"+{net}" if net >= 0 else str(net) + review_change = f"+{review_added}/-{review_removed}={net_str}" + else: + review_change = "-" + module_rows.append( + [category, initial_feat, total_feat, expansion_added, review_change] + ) + else: + module_rows.append([category, initial_feat, total_feat, expansion_added]) + + # Add total row + if review_logs: + total_net = total_review_added - total_review_removed + total_net_str = f"+{total_net}" if total_net >= 0 else str(total_net) + review_total_change = ( + f"+{total_review_added}/-{total_review_removed}={total_net_str}" + ) + module_rows.append( + [ + "TOTAL", + total_initial, + total_features, + total_expansion_added, + review_total_change, + ] + ) + headers = [ + "Top-Level Category", + "Initial", + "Final", + "Expansion Added", + "Review Change", + ] + else: + module_rows.append( + ["TOTAL", total_initial, total_features, total_expansion_added] + ) + headers = ["Top-Level Category", "Initial", "Final", "New Added Features"] + + print_unicode_table( + headers=headers, + rows=module_rows, + title="\n", + ) + + # Print detailed new paths grouped by category + if all_new_paths: + print("\n New Feature Paths:") + grouped: Dict[str, List[str]] = {} + for path in all_new_paths: + parts = [p.strip() for p in path.split("/")] + category = parts[0] if parts else "unknown" + grouped.setdefault(category, []).append(path) + for category in sorted(grouped.keys()): + paths = grouped[category] + noun = "path" if len(paths) == 1 else "paths" + print(f"\n [{category}] ({len(paths)} {noun})") + for p in paths: + print(f" - {p}") + + print("\n" + "=" * 70) + + +# ======================== Review Functions ======================== + + +def review_feature_coverage( + llm: LLMClient, + repo_info: str, + current_tree: Dict[str, Any], + previous_review: Optional[Dict[str, Any]] = None, + review_iter: int = 0, +) -> Tuple[Optional[ReviewOutput], str, str]: + """Review feature tree coverage against FRD requirements. + + Uses AI semantic matching to check if all functional requirements + from the repository information are covered by the feature tree. + + Args: + llm: LLMClient instance + repo_info: The repository information content + current_tree: Current feature tree + previous_review: Previous review result (to maintain consistency) + + Returns: + Tuple of (ReviewOutput, think_content, raw_response) + """ + logger.info("Starting feature coverage review...") + + current_tree_json = json.dumps(current_tree, indent=2) if current_tree else "{}" + + # Detect duplicate leaf names + duplicates = find_duplicate_leaf_names(current_tree) + duplicate_leaves_info = format_duplicate_leaves_info(duplicates) + if duplicates: + logger.info(f"Detected {len(duplicates)} duplicate leaf name(s)") + + # Format previous review info for the prompt + if previous_review: + previous_review_str = f"""Previous coverage: {previous_review.get("coverage_percentage", 0):.1f}% +Previously identified missing functionalities: +{json.dumps(previous_review.get("missing_functionalities", []), indent=2)} +Paths that were added to address gaps: +{json.dumps(previous_review.get("suggested_paths", []), indent=2)} + +IMPORTANT: The coverage should NOT decrease. Only the previously missing functionalities need to be re-evaluated.""" + else: + previous_review_str = ( + "This is the first review iteration. No previous review data." + ) + + prompt = PROMPT_TEMPLATE_BUILD_REVIEW.format( + repo_info=repo_info, + current_tree=current_tree_json, + previous_review=previous_review_str, + duplicate_leaves_info=duplicate_leaves_info, + ) + + think, result, response = llm.call_structured( + system_prompt=prompt, user_prompt="", + response_model=ReviewOutput, max_retries=3, + purpose=f"review_{review_iter + 1}", + ) + + if result: + logger.info(f"Review complete: {result.coverage_percentage:.1f}% coverage") + logger.info(f"Has gaps: {result.has_gaps}") + if result.missing_functionalities: + logger.info( + f"Missing functionalities: {len(result.missing_functionalities)}" + ) + if result.suggested_paths: + logger.info(f"Suggested paths: {len(result.suggested_paths)}") + if result.invalid_leaf_nodes: + logger.info( + f"Invalid leaf nodes (MIU violations): {len(result.invalid_leaf_nodes)}" + ) + if result.suggested_replacements: + logger.info(f"Suggested replacements: {len(result.suggested_replacements)}") + if result.duplicate_leaf_renames: + logger.info(f"Duplicate leaf renames: {len(result.duplicate_leaf_renames)}") + + return result, think, response + + +def print_review_summary(review_result: ReviewOutput, iteration: int): + """Print a formatted summary of the review results.""" + print(f"\n{'=' * 60}") + print(f"REVIEW ITERATION {iteration} SUMMARY") + print(f"{'=' * 60}") + print(f"Coverage: {review_result.coverage_percentage:.1f}%") + print(f"Has Gaps: {review_result.has_gaps}") + + if review_result.missing_functionalities: + print( + f"\nMissing Functionalities ({len(review_result.missing_functionalities)}):" + ) + for i, missing in enumerate(review_result.missing_functionalities[:10], 1): + print(f" {i}. {missing[:80]}{'...' if len(missing) > 80 else ''}") + if len(review_result.missing_functionalities) > 10: + print(f" ... and {len(review_result.missing_functionalities) - 10} more") + + if review_result.suggested_paths: + print(f"\nSuggested Paths ({len(review_result.suggested_paths)}):") + for i, item in enumerate(review_result.suggested_paths[:10], 1): + if isinstance(item, dict): + print(f" {i}. {item.get('path', '')}: {item.get('description', '')}") + else: + print(f" {i}. {item}") + if len(review_result.suggested_paths) > 10: + print(f" ... and {len(review_result.suggested_paths) - 10} more") + + if review_result.invalid_leaf_nodes: + print(f"\n[WARNING] MIU Violations ({len(review_result.invalid_leaf_nodes)}):") + for i, node in enumerate(review_result.invalid_leaf_nodes[:10], 1): + print(f" {i}. {node[:80]}{'...' if len(node) > 80 else ''}") + if len(review_result.invalid_leaf_nodes) > 10: + print(f" ... and {len(review_result.invalid_leaf_nodes) - 10} more") + + if review_result.suggested_replacements: + print( + f"\nSuggested Replacements ({len(review_result.suggested_replacements)}):" + ) + for i, item in enumerate(review_result.suggested_replacements[:10], 1): + if isinstance(item, dict): + print(f" {i}. {item.get('path', '')}: {item.get('description', '')}") + else: + print(f" {i}. {item}") + if len(review_result.suggested_replacements) > 10: + print(f" ... and {len(review_result.suggested_replacements) - 10} more") + + if review_result.duplicate_leaf_renames: + print( + f"\nDuplicate Leaf Renames ({len(review_result.duplicate_leaf_renames)}):" + ) + for i, rename in enumerate(review_result.duplicate_leaf_renames[:10], 1): + print(f" {i}. {rename}") + if len(review_result.duplicate_leaf_renames) > 10: + print(f" ... and {len(review_result.duplicate_leaf_renames) - 10} more") + + print(f"{'=' * 60}\n") + + +# ======================== Helper Functions ======================== + + +def _extract_paths_and_descs(items: List) -> Tuple[List[str], Dict[str, str]]: + """Extract path strings and description map from AddPathsOutput items. + + Handles both old format (List[str]) and new format (List[Dict[str, str]]). + + Returns: + Tuple of (path_list, desc_map) + """ + paths = [] + desc_map = {} + for item in items: + if isinstance(item, dict): + p = item.get("path", "") + d = item.get("description", "") + if p: + paths.append(p) + if d: + desc_map[p] = d + elif isinstance(item, str): + paths.append(item) + return paths, desc_map + + +def _save_intermediate( + feature_tree: Dict[str, Any], + current_tree: Dict[str, Any], + previous_feature_tree: Dict[str, Any], + iteration_logs: List[Dict[str, Any]], + review_logs: List[Dict[str, Any]], + output_file: Path, +): + """Save intermediate results to file.""" + intermediate_result = { + "repository_name": feature_tree.get("repository_name", "unknown"), + "repository_purpose": feature_tree.get("repository_purpose", ""), + "repository_specification": feature_tree.get("repository_specification", ""), + "feature_tree": current_tree, + "previous_feature_tree": previous_feature_tree, + "iteration_logs": iteration_logs, + "review_logs": review_logs, + "expansion_directions": feature_tree.get("expansion_directions") or [], + } + try: + save_json(intermediate_result, output_file) + except Exception as e: + logger.warning(f"Failed to save intermediate results: {e}") + + +def _apply_duplicate_renames( + current_tree: Dict[str, Any], + duplicate_leaf_renames: List[str], +) -> Tuple[Dict[str, Any], int]: + """Apply duplicate leaf rename operations to the tree. + + Returns: + Tuple of (updated tree, number of renames applied) + """ + renames_applied = 0 + if not duplicate_leaf_renames: + return current_tree, 0 + + logger.info( + f"Applying {len(duplicate_leaf_renames)} leaf renames for duplicates..." + ) + for rename_str in duplicate_leaf_renames: + if " -> " not in rename_str: + logger.warning(f"Invalid rename format (missing ' -> '): {rename_str}") + continue + + parts = rename_str.split(" -> ") + if len(parts) != 2: + logger.warning(f"Invalid rename format: {rename_str}") + continue + + old_path = parts[0].strip() + new_leaf_name = parts[1].strip() + + path_parts = [p.strip() for p in old_path.split("/")] + if len(path_parts) < 2: + logger.warning(f"Path too short for rename: {old_path}") + continue + + new_path = "/".join(path_parts[:-1] + [new_leaf_name]) + + existing_paths = get_all_leaf_paths(current_tree) + if new_path in existing_paths: + logger.warning( + f"Cannot rename '{old_path}' -> '{new_path}': target path already exists" + ) + continue + + # Collect description before removing + old_descs = get_all_leaf_descriptions(current_tree) + old_desc = old_descs.get(old_path, "") + + current_tree = remove_paths(current_tree, [old_path]) + current_tree = apply_changes(current_tree, [new_path]) + + # Re-attach description to renamed leaf + if old_desc: + current_tree = attach_descriptions(current_tree, {new_path: old_desc}) + + logger.info(f"Renamed: '{old_path}' -> '{new_path}'") + renames_applied += 1 + + current_tree = convert_leaves_to_list(current_tree) + return current_tree, renames_applied + + +def _print_review_summary(review_logs: List[Dict[str, Any]]): + """Print review phase summary.""" + print("\n" + "=" * 60) + print("REVIEW PHASE SUMMARY") + print("=" * 60) + + applied_logs = [ + log for log in review_logs + if log.get("status") in ("paths_applied", "threshold_met") + ] + + total_invalid_removed = sum( + len(log.get("invalid_leaf_nodes", [])) for log in applied_logs + ) + # threshold_met only applies MIU fixes, not coverage gap paths + total_suggested_paths = sum( + len(log.get("suggested_paths", [])) + for log in applied_logs + if log.get("status") != "threshold_met" + ) + total_replacements = sum( + len(log.get("suggested_replacements", [])) for log in applied_logs + ) + total_paths_added = total_suggested_paths + total_replacements + + print(f"Review iterations: {len(review_logs)} (applied: {len(applied_logs)})") + print() + print("[IN] PATHS ADDED:") + print(f" Total paths added: {total_paths_added}") + print(f" โ”œโ”€ For coverage gaps: {total_suggested_paths}") + print(f" โ””โ”€ For MIU replacements: {total_replacements}") + print() + print("[OUT] NODES REMOVED:") + print(f" Invalid nodes removed (MIU violations): {total_invalid_removed}") + print() + print( + f"Net change: +{total_paths_added} added, -{total_invalid_removed} removed" + ) + + if review_logs: + last_review = review_logs[-1] + if "coverage_percentage" in last_review: + print() + print(f"Final coverage: {last_review['coverage_percentage']:.1f}%") + print(f"Final status: {last_review.get('status', 'unknown')}") + print("=" * 60) + + +# ======================== Review Phase ======================== + + +def _run_review_phase( + llm: LLMClient, + feature_tree: Dict[str, Any], + current_tree: Dict[str, Any], + previous_feature_tree: Dict[str, Any], + iteration_logs: List[Dict[str, Any]], + output_file: Optional[Path], + review_max_iterations: int = 3, + review_threshold: float = 98.0, + skip_coverage_gaps: bool = False, +) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]: + """Run the review phase on the current feature tree. + + Args: + llm: LLMClient instance + feature_tree: Full feature tree data (with repo info) + current_tree: Current feature tree structure + previous_feature_tree: Initial feature tree before expansion + iteration_logs: Expansion iteration logs + output_file: Output file path + review_max_iterations: Max review iterations + review_threshold: Coverage threshold (step1 only) + skip_coverage_gaps: If True, only check duplicates and MIU (step2) + + Returns: + Tuple of (updated current_tree, review_logs) + """ + review_logs = [] + phase_name = "Lightweight Review (Duplicates + MIU)" if skip_coverage_gaps else "Full Review" + + logger.info(f"\n{'=' * 60}") + logger.info(f"Starting {phase_name} Phase") + if not skip_coverage_gaps: + logger.info(f"Review threshold: {review_threshold}%") + logger.info(f"Max review iterations: {review_max_iterations}") + logger.info(f"{'=' * 60}") + + repository_specification = feature_tree.get("repository_specification", "") + previous_review_data = None + + for review_iter in range(review_max_iterations): + logger.info( + f"\n{'=' * 20} Review Iteration {review_iter + 1}/{review_max_iterations} {'=' * 20}" + ) + + # Perform review with retry on parse failure + parse_retries = 2 + review_result = None + review_think = "" + review_response = "" + + for parse_attempt in range(parse_retries): + if parse_attempt > 0: + logger.info( + f"Retrying review parse (attempt {parse_attempt + 1}/{parse_retries})..." + ) + + review_result, review_think, review_response = review_feature_coverage( + llm=llm, + repo_info=repository_specification, + current_tree=current_tree, + previous_review=previous_review_data, + review_iter=review_iter, + ) + + if review_result is not None: + break + else: + logger.warning( + f"Parse attempt {parse_attempt + 1}/{parse_retries} failed" + ) + + if review_result is None: + error_msg = f"Failed to parse review response after {parse_retries} attempts" + logger.warning(f"Review iteration {review_iter + 1}: {error_msg}") + review_logs.append({ + "review_iteration": review_iter + 1, + "status": "error", + "error": error_msg, + "think": review_think, + "response": review_response, + }) + continue + + # Print review summary + print_review_summary(review_result, review_iter + 1) + + # Apply duplicate leaf renames + current_tree, renames_applied = _apply_duplicate_renames( + current_tree, review_result.duplicate_leaf_renames + ) + + if skip_coverage_gaps: + # Lightweight review (step2): only handle MIU violations and duplicates + repl_paths, repl_descs = _extract_paths_and_descs(review_result.suggested_replacements) + + if review_result.invalid_leaf_nodes or repl_paths: + # Remove invalid nodes (even if no replacements provided) + if review_result.invalid_leaf_nodes: + logger.info( + f"Removing {len(review_result.invalid_leaf_nodes)} invalid leaf nodes (MIU violations)..." + ) + current_tree = remove_paths(current_tree, review_result.invalid_leaf_nodes) + if repl_paths: + logger.info(f"Applying {len(repl_paths)} MIU replacement paths...") + current_tree = apply_changes(current_tree, repl_paths) + current_tree = convert_leaves_to_list(current_tree) + if repl_descs: + current_tree = attach_descriptions(current_tree, repl_descs) + + review_logs.append({ + "review_iteration": review_iter + 1, + "status": "paths_applied", + "coverage_percentage": review_result.coverage_percentage, + "has_gaps": review_result.has_gaps, + "missing_functionalities": [], + "suggested_paths": [], + "invalid_leaf_nodes": review_result.invalid_leaf_nodes, + "suggested_replacements": review_result.suggested_replacements, + "duplicate_leaf_renames": review_result.duplicate_leaf_renames, + "renames_applied": renames_applied, + "paths_applied": len(repl_paths), + "think": review_think, + "response": review_response, + }) + + if output_file: + _save_intermediate( + feature_tree, current_tree, previous_feature_tree, + iteration_logs, review_logs, output_file, + ) + logger.info(f"[OK] Review iteration {review_iter + 1} results saved") + else: + status = "renames_only" if renames_applied > 0 else "clean" + logger.info(f"Lightweight review complete: {status}") + review_logs.append({ + "review_iteration": review_iter + 1, + "status": status, + "coverage_percentage": review_result.coverage_percentage, + "has_gaps": review_result.has_gaps, + "invalid_leaf_nodes": review_result.invalid_leaf_nodes, + "duplicate_leaf_renames": review_result.duplicate_leaf_renames, + "renames_applied": renames_applied, + "paths_applied": 0, + "think": review_think, + "response": review_response, + }) + break # Lightweight review: one pass unless MIU fixes were applied + else: + # Full review (step1): check coverage threshold + if review_result.coverage_percentage >= review_threshold: + logger.info( + f"[OK] Coverage threshold met: {review_result.coverage_percentage:.1f}% >= {review_threshold}%" + ) + + # Even when coverage is met, still apply MIU fixes and replacements + miu_paths, miu_descs = _extract_paths_and_descs(review_result.suggested_replacements) + if review_result.invalid_leaf_nodes: + logger.info( + f"Removing {len(review_result.invalid_leaf_nodes)} invalid leaf nodes (MIU violations)..." + ) + current_tree = remove_paths(current_tree, review_result.invalid_leaf_nodes) + if miu_paths: + logger.info( + f"Applying {len(miu_paths)} MIU replacement paths..." + ) + current_tree = apply_changes(current_tree, miu_paths) + if review_result.invalid_leaf_nodes or miu_paths: + current_tree = convert_leaves_to_list(current_tree) + if miu_descs: + current_tree = attach_descriptions(current_tree, miu_descs) + + if renames_applied > 0: + logger.info(f" (Applied {renames_applied} leaf renames)") + + review_logs.append({ + "review_iteration": review_iter + 1, + "status": "threshold_met", + "coverage_percentage": review_result.coverage_percentage, + "has_gaps": review_result.has_gaps, + "missing_functionalities": review_result.missing_functionalities, + "suggested_paths": review_result.suggested_paths, + "invalid_leaf_nodes": review_result.invalid_leaf_nodes, + "suggested_replacements": review_result.suggested_replacements, + "duplicate_leaf_renames": review_result.duplicate_leaf_renames, + "renames_applied": renames_applied, + "paths_applied": len(miu_paths), + "think": review_think, + "response": review_response, + }) + break + + # Collect all paths to apply + sugg_paths, sugg_descs = _extract_paths_and_descs(review_result.suggested_paths) + repl_paths2, repl_descs2 = _extract_paths_and_descs(review_result.suggested_replacements) + all_paths_to_apply = sugg_paths + repl_paths2 + all_descs = {**sugg_descs, **repl_descs2} + + if all_paths_to_apply or review_result.invalid_leaf_nodes: + if all_paths_to_apply: + logger.info( + f"Applying {len(all_paths_to_apply)} paths from review " + f"(suggested: {len(sugg_paths)}, " + f"MIU replacements: {len(repl_paths2)})..." + ) + + if review_result.invalid_leaf_nodes: + logger.info( + f"Removing {len(review_result.invalid_leaf_nodes)} invalid leaf nodes..." + ) + current_tree = remove_paths( + current_tree, review_result.invalid_leaf_nodes + ) + + if all_paths_to_apply: + current_tree = apply_changes(current_tree, all_paths_to_apply) + current_tree = convert_leaves_to_list(current_tree) + if all_descs: + current_tree = attach_descriptions(current_tree, all_descs) + + previous_review_data = { + "coverage_percentage": review_result.coverage_percentage, + "missing_functionalities": review_result.missing_functionalities, + "suggested_paths": review_result.suggested_paths, + "invalid_leaf_nodes": review_result.invalid_leaf_nodes, + "suggested_replacements": review_result.suggested_replacements, + } + + review_logs.append({ + "review_iteration": review_iter + 1, + "status": "paths_applied", + "coverage_percentage": review_result.coverage_percentage, + "has_gaps": review_result.has_gaps, + "missing_functionalities": review_result.missing_functionalities, + "suggested_paths": review_result.suggested_paths, + "invalid_leaf_nodes": review_result.invalid_leaf_nodes, + "suggested_replacements": review_result.suggested_replacements, + "duplicate_leaf_renames": review_result.duplicate_leaf_renames, + "renames_applied": renames_applied, + "paths_applied": len(all_paths_to_apply), + "think": review_think, + "response": review_response, + }) + + if output_file: + _save_intermediate( + feature_tree, current_tree, previous_feature_tree, + iteration_logs, review_logs, output_file, + ) + logger.info(f"[OK] Review iteration {review_iter + 1} results saved") + else: + if renames_applied > 0: + logger.info( + f"No more gaps, but applied {renames_applied} leaf renames" + ) + status = "renames_only" + else: + logger.info("No more gaps identified or no suggestions provided") + status = "no_gaps" + review_logs.append({ + "review_iteration": review_iter + 1, + "status": status, + "coverage_percentage": review_result.coverage_percentage, + "has_gaps": review_result.has_gaps, + "missing_functionalities": review_result.missing_functionalities, + "suggested_paths": review_result.suggested_paths, + "invalid_leaf_nodes": review_result.invalid_leaf_nodes, + "suggested_replacements": review_result.suggested_replacements, + "duplicate_leaf_renames": review_result.duplicate_leaf_renames, + "renames_applied": renames_applied, + "paths_applied": 0, + "think": review_think, + "response": review_response, + }) + break + + logger.info(f"\n{'=' * 60}") + logger.info(f"{phase_name} Phase Completed") + logger.info(f"{'=' * 60}") + + if review_logs: + last_review = review_logs[-1] + if "coverage_percentage" in last_review: + logger.info(f"Final coverage: {last_review['coverage_percentage']:.1f}%") + + return current_tree, review_logs + + +# ======================== Feature Tree Building ======================== + + +def build_from_spec( + feature_tree: Dict[str, Any], + output_file: Path = None, + review_max_iterations: int = 3, + review_threshold: float = 98.0, + llm: LLMClient = None, +) -> Dict[str, Any]: + """Step 1: Build or expand feature tree. + + If output_file already exists with a non-empty feature tree, assumes spec + features are complete and switches to beyond-spec expansion mode (adds + features the spec does not describe but are practically necessary). + Otherwise, builds the feature tree from spec requirements. + + Args: + feature_tree: Initial feature tree structure with repo info + output_file: Output file path for saving intermediate results + review_max_iterations: Maximum review iterations + review_threshold: Coverage threshold to stop review (spec mode only) + llm: LLMClient instance + + Returns: + Dictionary containing final feature tree and logs + """ + if llm is None: + llm = LLMClient() + + # Detect expand mode: output file exists with non-empty feature tree + expand_mode = ( + output_file is not None + and output_file.exists() + and bool(feature_tree.get("feature_tree")) + ) + + if expand_mode: + logger.info("=" * 60) + logger.info("Step 1: Beyond-Spec Expansion (output file exists, spec features assumed complete)") + logger.info(f"Max iterations: {MAX_ITERATIONS}") + logger.info("=" * 60) + else: + logger.info("=" * 60) + logger.info("Step 1: Build from Spec (Model Self-Termination)") + logger.info(f"Max iterations: {MAX_ITERATIONS}") + logger.info("=" * 60) + + iteration_logs = [] + previous_feature_tree = feature_tree.get("feature_tree", {}) + current_tree = previous_feature_tree + repo_info = build_repo_info(feature_tree) + consecutive_failures = 0 + + for i in range(MAX_ITERATIONS): + logger.info(f"\n{'=' * 20} Iteration {i + 1}/{MAX_ITERATIONS} {'=' * 20}") + + current_tree_json = json.dumps(current_tree, indent=2) if current_tree else "{}" + if expand_mode: + prompt = PROMPT_TEMPLATE_BUILD_EXPAND.format( + repo_info=repo_info, current_tree=current_tree_json + ) + else: + prompt = PROMPT_TEMPLATE_BUILD_FEATURE.format( + repo_info=repo_info, current_tree=current_tree_json + ) + + think, result, response = llm.call_structured( + system_prompt=prompt, user_prompt="", + response_model=AddPathsOutput, max_retries=3, + purpose=f"step1_{'expand' if expand_mode else 'build'}_{i + 1}", + ) + + if result is None: + error_msg = "Failed to parse AI response or AI call failed" + logger.warning(f"Iteration {i + 1}: {error_msg}") + iteration_logs.append({ + "iteration": i + 1, + "status": "error", + "error": error_msg, + "new_paths": [], + "paths_count": 0, + "is_complete": False, + "think": think, + "current_tree": current_tree, + "response": response, + }) + consecutive_failures += 1 + if consecutive_failures >= MAX_CONSECUTIVE_FAILURES: + logger.warning( + f"[WARNING] {MAX_CONSECUTIVE_FAILURES} consecutive failures, stopping expansion" + ) + break + continue + + # Check completion signal + if result.is_complete: + logger.info( + f"[OK] Model declared completion at iteration {i + 1}: {result.completion_reason}" + ) + # Apply any remaining paths before stopping + if result.add_new_feature_paths: + new_paths, new_descs = _extract_paths_and_descs(result.add_new_feature_paths) + logger.info(f"Applying {len(new_paths)} final paths before completion") + current_tree = apply_changes(current_tree, new_paths) + current_tree = convert_leaves_to_list(current_tree) + if new_descs: + current_tree = attach_descriptions(current_tree, new_descs) + iteration_logs.append({ + "iteration": i + 1, + "status": "complete", + "new_paths": result.add_new_feature_paths, + "paths_count": len(result.add_new_feature_paths), + "is_complete": True, + "completion_reason": result.completion_reason, + "think": think, + "current_tree": current_tree, + "response": response, + }) + break + + if not result.add_new_feature_paths: + logger.info(f"Iteration {i + 1}: No new paths returned (continuing)") + iteration_logs.append({ + "iteration": i + 1, + "status": "empty", + "new_paths": [], + "paths_count": 0, + "is_complete": False, + "think": think, + "current_tree": current_tree, + "response": response, + }) + consecutive_failures += 1 + if consecutive_failures >= MAX_CONSECUTIVE_FAILURES: + logger.warning( + f"[WARNING] {MAX_CONSECUTIVE_FAILURES} consecutive empty/error responses, stopping expansion" + ) + break + continue + + # Reset counter on successful path addition + consecutive_failures = 0 + new_paths, new_descs = _extract_paths_and_descs(result.add_new_feature_paths) + logger.info(f"Received {len(new_paths)} new paths") + + current_tree = apply_changes(current_tree, new_paths) + current_tree = convert_leaves_to_list(current_tree) + if new_descs: + current_tree = attach_descriptions(current_tree, new_descs) + + iteration_logs.append({ + "iteration": i + 1, + "status": "success", + "new_paths": result.add_new_feature_paths, + "paths_count": len(result.add_new_feature_paths), + "is_complete": False, + "think": think, + "current_tree": current_tree, + "response": response, + }) + + if output_file: + _save_intermediate( + feature_tree, current_tree, previous_feature_tree, + iteration_logs, [], output_file, + ) + logger.info(f"[OK] Iteration {i + 1} results saved to {output_file}") + else: + logger.warning( + f"[WARNING] Reached maximum iterations ({MAX_ITERATIONS}) without model declaring completion" + ) + + logger.info(f"\n{'=' * 60}") + logger.info("Expansion phase completed") + logger.info(f"{'=' * 60}") + + # Review phase: + # - Expand mode: lightweight review (duplicates + MIU only, no coverage gaps) + # - Spec mode: full review (coverage gaps + MIU + duplicates) + current_tree, review_logs = _run_review_phase( + llm=llm, + feature_tree=feature_tree, + current_tree=current_tree, + previous_feature_tree=previous_feature_tree, + iteration_logs=iteration_logs, + output_file=output_file, + review_max_iterations=review_max_iterations, + review_threshold=review_threshold, + skip_coverage_gaps=expand_mode, + ) + + result = { + "repository_name": feature_tree.get("repository_name", "unknown"), + "repository_purpose": feature_tree.get("repository_purpose", ""), + "repository_specification": feature_tree.get("repository_specification", ""), + "feature_tree": current_tree, + "previous_feature_tree": previous_feature_tree, + "iteration_logs": iteration_logs, + "review_logs": review_logs, + "expansion_directions": feature_tree.get("expansion_directions") or [], + } + return result + + +def expand_with_direction( + feature_tree: Dict[str, Any], + direction: str, + output_file: Path = None, + review_max_iterations: int = 3, + llm: LLMClient = None, +) -> Dict[str, Any]: + """Step 2: Expand feature tree in a user-chosen direction with model self-termination. + + Only expands reasonable and necessary features for the given direction. + Does NOT add uncertain, unclear, or meaningless features. + After expansion, runs lightweight review (duplicates + MIU only). + + Args: + feature_tree: Feature tree data (with repo info and existing tree) + direction: User-chosen expansion direction description + output_file: Output file path for saving intermediate results + review_max_iterations: Max review iterations for lightweight review + llm: LLMClient instance + + Returns: + Dictionary containing final feature tree and logs + """ + if llm is None: + llm = LLMClient() + + # Resolve short direction name to full direction context + resolved_direction = _resolve_direction(feature_tree, direction) + + logger.info("=" * 60) + logger.info("Step 2: Directed Expansion") + logger.info(f"Direction: {direction}") + if resolved_direction != direction: + logger.info(f"Resolved to full context ({len(resolved_direction)} chars)") + logger.info(f"Max iterations: {MAX_ITERATIONS}") + logger.info("=" * 60) + + # Record user selection + _record_selected_direction(feature_tree, direction) + + iteration_logs = [] + previous_feature_tree = feature_tree.get("feature_tree", {}) + current_tree = previous_feature_tree + repo_info = build_repo_info(feature_tree) + consecutive_failures = 0 + + for i in range(MAX_ITERATIONS): + logger.info(f"\n{'=' * 20} Iteration {i + 1}/{MAX_ITERATIONS} {'=' * 20}") + + current_tree_json = json.dumps(current_tree, indent=2) if current_tree else "{}" + prompt = PROMPT_TEMPLATE_BUILD_DIRECTED.format( + repo_info=repo_info, + current_tree=current_tree_json, + direction=resolved_direction, + ) + + think, result, response = llm.call_structured( + system_prompt=prompt, user_prompt="", + response_model=AddPathsOutput, max_retries=3, + purpose=f"step2_directed_{i + 1}", + ) + + if result is None: + error_msg = "Failed to parse AI response or AI call failed" + logger.warning(f"Iteration {i + 1}: {error_msg}") + iteration_logs.append({ + "iteration": i + 1, + "status": "error", + "error": error_msg, + "new_paths": [], + "paths_count": 0, + "is_complete": False, + "think": think, + "current_tree": current_tree, + "response": response, + }) + consecutive_failures += 1 + if consecutive_failures >= MAX_CONSECUTIVE_FAILURES: + logger.warning( + f"[WARNING] {MAX_CONSECUTIVE_FAILURES} consecutive failures, stopping expansion" + ) + break + continue + + # Check completion signal + if result.is_complete: + logger.info( + f"[OK] Model declared completion at iteration {i + 1}: {result.completion_reason}" + ) + if result.add_new_feature_paths: + new_paths, new_descs = _extract_paths_and_descs(result.add_new_feature_paths) + logger.info(f"Applying {len(new_paths)} final paths before completion") + current_tree = apply_changes(current_tree, new_paths) + current_tree = convert_leaves_to_list(current_tree) + if new_descs: + current_tree = attach_descriptions(current_tree, new_descs) + iteration_logs.append({ + "iteration": i + 1, + "status": "complete", + "new_paths": result.add_new_feature_paths, + "paths_count": len(result.add_new_feature_paths), + "is_complete": True, + "completion_reason": result.completion_reason, + "think": think, + "current_tree": current_tree, + "response": response, + }) + break + + if not result.add_new_feature_paths: + logger.info(f"Iteration {i + 1}: No new paths returned (continuing)") + iteration_logs.append({ + "iteration": i + 1, + "status": "empty", + "new_paths": [], + "paths_count": 0, + "is_complete": False, + "think": think, + "current_tree": current_tree, + "response": response, + }) + consecutive_failures += 1 + if consecutive_failures >= MAX_CONSECUTIVE_FAILURES: + logger.warning( + f"[WARNING] {MAX_CONSECUTIVE_FAILURES} consecutive empty/error responses, stopping expansion" + ) + break + continue + + # Reset counter on successful path addition + consecutive_failures = 0 + new_paths, new_descs = _extract_paths_and_descs(result.add_new_feature_paths) + logger.info(f"Received {len(new_paths)} new paths") + + current_tree = apply_changes(current_tree, new_paths) + current_tree = convert_leaves_to_list(current_tree) + if new_descs: + current_tree = attach_descriptions(current_tree, new_descs) + + iteration_logs.append({ + "iteration": i + 1, + "status": "success", + "new_paths": result.add_new_feature_paths, + "paths_count": len(result.add_new_feature_paths), + "is_complete": False, + "think": think, + "current_tree": current_tree, + "response": response, + }) + + if output_file: + _save_intermediate( + feature_tree, current_tree, previous_feature_tree, + iteration_logs, [], output_file, + ) + logger.info(f"[OK] Iteration {i + 1} results saved to {output_file}") + else: + logger.warning( + f"[WARNING] Reached maximum iterations ({MAX_ITERATIONS}) without model declaring completion" + ) + + logger.info(f"\n{'=' * 60}") + logger.info("Directed expansion phase completed") + logger.info(f"{'=' * 60}") + + # Lightweight review: duplicates + MIU only (no coverage gap analysis) + current_tree, review_logs = _run_review_phase( + llm=llm, + feature_tree=feature_tree, + current_tree=current_tree, + previous_feature_tree=previous_feature_tree, + iteration_logs=iteration_logs, + output_file=output_file, + review_max_iterations=review_max_iterations, + review_threshold=100.0, # Not used when skip_coverage_gaps=True + skip_coverage_gaps=True, + ) + + result = { + "repository_name": feature_tree.get("repository_name", "unknown"), + "repository_purpose": feature_tree.get("repository_purpose", ""), + "repository_specification": feature_tree.get("repository_specification", ""), + "feature_tree": current_tree, + "previous_feature_tree": previous_feature_tree, + "iteration_logs": iteration_logs, + "review_logs": review_logs, + "expansion_directions": feature_tree.get("expansion_directions") or [], + } + return result + + +def suggest_directions( + feature_tree: Dict[str, Any], + output_file: Path = None, + llm: LLMClient = None, +) -> Dict[str, Any]: + """Suggest 4-6 expansion directions based on current feature tree. + + Saves the suggested directions as a new round in the expansion_directions array. + + Args: + feature_tree: Feature tree data (with repo info and existing tree) + output_file: Output file path to persist directions + llm: LLMClient instance + + Returns: + Dictionary with directions list + """ + if llm is None: + llm = LLMClient() + + logger.info("=" * 60) + logger.info("Suggesting Expansion Directions") + logger.info("=" * 60) + + current_tree = feature_tree.get("feature_tree", {}) + repo_info = build_repo_info(feature_tree) + current_tree_json = json.dumps(current_tree, indent=2) if current_tree else "{}" + + # Build expansion history from previous rounds + expansion_history = _build_expansion_history(feature_tree) + + prompt = PROMPT_TEMPLATE_SUGGEST_DIRECTIONS.format( + repo_info=repo_info, + current_tree=current_tree_json, + expansion_history=expansion_history, + ) + + think, result, response = llm.call_structured( + system_prompt=prompt, user_prompt="", + response_model=SuggestDirectionsOutput, max_retries=3, + purpose="suggest_directions", + ) + + if result is None: + logger.error("Failed to get direction suggestions") + return {"directions": [], "error": "Failed to parse AI response"} + + generated_at = datetime.now(timezone.utc).isoformat() + directions = [] + for d in result.directions: + directions.append({ + "name": d.name, + "description": d.description, + "rationale": d.rationale, + }) + + # Save directions as a new round in the expansion_directions array + if output_file and output_file.exists(): + try: + existing_data = load_json(output_file) + expansion_dirs = existing_data.get("expansion_directions") or [] + + # Migrate old object format to array format if needed + if isinstance(expansion_dirs, dict): + expansion_dirs = _migrate_expansion_directions(expansion_dirs) + + # Determine new round number + new_round = len(expansion_dirs) + 1 + + # Append new round + expansion_dirs.append({ + "round": new_round, + "generated_at": generated_at, + "directions": directions, + "selected": [], + }) + + existing_data["expansion_directions"] = expansion_dirs + save_json(existing_data, output_file) + logger.info(f"Saved {len(directions)} directions as round {new_round} to {output_file}") + except Exception as e: + logger.warning(f"Failed to save directions to {output_file}: {e}") + + logger.info(f"Generated {len(directions)} expansion directions") + return {"directions": directions} + + +def _migrate_expansion_directions(old_format: Dict[str, Any]) -> List[Dict[str, Any]]: + """Migrate old single-object expansion_directions format to the new array-of-rounds format. + + Old format: + {"generated_at": "...", "directions": [...], "selected": [...]} + + New format: + [{"round": 1, "generated_at": "...", "directions": [...], "selected": [...]}] + + Args: + old_format: The old-style expansion_directions dict + + Returns: + List of round dicts in the new format + """ + if not old_format or not old_format.get("directions"): + return [] + + return [{ + "round": 1, + "generated_at": old_format.get("generated_at", ""), + "directions": old_format.get("directions", []), + "selected": old_format.get("selected", []), + }] + + +def _build_expansion_history(feature_tree: Dict[str, Any]) -> str: + """Build a formatted expansion history string from all previous rounds for inclusion in the suggest-directions prompt. + + Args: + feature_tree: Feature tree data containing expansion_directions + + Returns: + Formatted string describing previous rounds of direction generation and selection + """ + expansion_dirs = feature_tree.get("expansion_directions") or [] + + # Support old dict format + if isinstance(expansion_dirs, dict): + expansion_dirs = _migrate_expansion_directions(expansion_dirs) + + if not expansion_dirs: + return "No previous expansion history. This is the first time suggesting directions." + + lines = [f"Total previous rounds: {len(expansion_dirs)}\n"] + + for round_data in expansion_dirs: + round_num = round_data.get("round", "?") + generated_at = round_data.get("generated_at", "unknown") + directions = round_data.get("directions", []) + selected = round_data.get("selected", []) + + selected_names = {s.get("name", "").strip().lower() for s in selected} + direction_names = {d.get("name", "").strip().lower() for d in directions} + + lines.append(f"### Round {round_num} (generated: {generated_at})") + lines.append(f"Generated {len(directions)} directions, user selected {len(selected)}:\n") + + for i, d in enumerate(directions, 1): + name = d.get("name", "") + desc = d.get("description", "") + rationale = d.get("rationale", "") + was_selected = name.strip().lower() in selected_names + status = "[SELECTED & EXPANDED]" if was_selected else "[NOT SELECTED]" + + lines.append(f" {i}. {status} **{name}**") + if desc: + lines.append(f" Description: {desc}") + if rationale: + lines.append(f" Rationale: {rationale}") + lines.append("") + + # List selected directions that don't match any generated direction in this round + # (can happen with migrated data from old format) + orphaned_selected = [ + s for s in selected + if s.get("name", "").strip().lower() not in direction_names + ] + if orphaned_selected: + lines.append(" Also expanded (from earlier sessions):") + for s in orphaned_selected: + lines.append(f" - [SELECTED & EXPANDED] **{s.get('name', '')}**") + lines.append("") + + return "\n".join(lines) + + +def _resolve_direction( + feature_tree: Dict[str, Any], + direction_name: str, +) -> str: + """Resolve a short direction name to a full direction string for the prompt. + + Looks up saved directions across all rounds in feature_tree data. If found, + returns 'name: description (rationale: ...)'. Otherwise returns the name as-is. + + Args: + feature_tree: Feature tree data (may contain expansion_directions) + direction_name: Short direction name from CLI + + Returns: + Full direction string for the prompt + """ + saved = feature_tree.get("expansion_directions") or [] + + # Support both old dict format and new array format + if isinstance(saved, dict): + saved = _migrate_expansion_directions(saved) + + direction_lower = direction_name.strip().lower() + + # Search across all rounds, latest first + for round_data in reversed(saved): + saved_directions = round_data.get("directions", []) + for d in saved_directions: + if d.get("name", "").strip().lower() == direction_lower: + parts = [d["name"]] + if d.get("description"): + parts.append(d["description"]) + if d.get("rationale"): + parts.append(f"Rationale: {d['rationale']}") + logger.info(f"Resolved direction '{direction_name}' from saved data") + return "\n".join(parts) + + logger.info(f"Direction '{direction_name}' not found in saved data, using as-is") + return direction_name + + +def _record_selected_direction( + feature_tree: Dict[str, Any], + direction_name: str, +): + """Record that the user selected a direction for expansion. + + Updates the in-memory feature_tree dict (persisted via normal save flow). + Records the selection in the latest round of expansion_directions. + + Args: + feature_tree: Feature tree data (modified in-place) + direction_name: The direction name selected by user + """ + expansion_dirs = feature_tree.get("expansion_directions") or [] + + # Migrate old dict format if needed + if isinstance(expansion_dirs, dict): + expansion_dirs = _migrate_expansion_directions(expansion_dirs) + feature_tree["expansion_directions"] = expansion_dirs + + if not expansion_dirs: + return + + # Record in the latest round + latest_round = expansion_dirs[-1] + selected = latest_round.get("selected", []) + + # Avoid duplicate entries for same direction name within the same round + already_selected = any( + s.get("name", "").strip().lower() == direction_name.strip().lower() + for s in selected + ) + if not already_selected: + selected.append({ + "name": direction_name, + "selected_at": datetime.now(timezone.utc).isoformat(), + }) + latest_round["selected"] = selected + logger.info(f"Recorded selected direction: {direction_name} (round {latest_round.get('round', '?')})") + + +# ======================== Data Loading ======================== + + +def _load_feature_data(feature_build_path: Path, feature_spec_path: Path) -> Dict[str, Any]: + """Load feature tree data from feature_build.json or feature_spec.json.""" + use_feature_build = False + if feature_build_path.exists(): + try: + feature_build_data = load_json(feature_build_path) + repository_name = feature_build_data.get("repository_name", "") + repository_purpose = feature_build_data.get("repository_purpose", "") + repository_specification = feature_build_data.get( + "repository_specification", "" + ) + feature_tree_data = feature_build_data.get("feature_tree", {}) + + if ( + isinstance(repository_name, str) + and repository_name.strip() + and isinstance(repository_purpose, str) + and repository_purpose.strip() + and isinstance(repository_specification, str) + and repository_specification.strip() + and isinstance(feature_tree_data, dict) + and feature_tree_data + ): + use_feature_build = True + logger.info( + "feature_build.json has all required fields, using it as input" + ) + else: + logger.info( + "feature_build.json exists but has empty required fields, will use feature_spec.json" + ) + except Exception as e: + logger.warning( + f"Failed to validate feature_build.json: {e}, will use feature_spec.json" + ) + + if use_feature_build: + feature_tree = feature_build_data + logger.info("Loaded from feature_build.json:") + logger.info( + f" repository_name: {feature_tree.get('repository_name', 'unknown')}" + ) + logger.info( + f" repository_purpose: {len(feature_tree.get('repository_purpose', ''))} chars" + ) + logger.info( + f" repository_specification: {len(feature_tree.get('repository_specification', ''))} chars" + ) + logger.info( + f" feature_tree: {len(feature_tree.get('feature_tree', {}))} top-level categories" + ) + else: + if not feature_spec_path.exists(): + logger.error(f"feature_spec.json not found: {feature_spec_path}") + sys.exit(1) + + try: + feature_spec = load_json(feature_spec_path) + + feature_tree = { + "repository_name": "", + "repository_purpose": "", + "repository_specification": "", + "feature_tree": {}, + } + + spec_repo_name = feature_spec.get("repository_name", "").strip() + if spec_repo_name: + feature_tree["repository_name"] = spec_repo_name + logger.info( + f"Loaded repository_name from feature_spec.json: {spec_repo_name}" + ) + + spec_repo_purpose = feature_spec.get("repository_purpose", "").strip() + if spec_repo_purpose: + feature_tree["repository_purpose"] = spec_repo_purpose + logger.info( + f"Loaded repository_purpose from feature_spec.json ({len(spec_repo_purpose)} chars)" + ) + + spec_content = feature_spec_path.read_text(encoding="utf-8").strip() + if spec_content: + feature_tree["repository_specification"] = spec_content + logger.info( + f"Loaded repository_specification from feature_spec.json ({len(spec_content)} chars)" + ) + + if feature_build_path.exists(): + try: + existing_build = load_json(feature_build_path) + existing_tree = existing_build.get("feature_tree", {}) + if isinstance(existing_tree, dict) and existing_tree: + feature_tree["feature_tree"] = existing_tree + logger.info( + f"Loaded existing feature_tree from feature_build.json ({len(existing_tree)} top-level categories)" + ) + # Preserve expansion_directions from existing build + existing_dirs = existing_build.get("expansion_directions") + if existing_dirs: + feature_tree["expansion_directions"] = existing_dirs + logger.info("Loaded existing expansion_directions from feature_build.json") + except Exception as e: + logger.warning( + f"Could not load feature_tree from existing feature_build.json: {e}" + ) + + except Exception as e: + logger.error(f"Failed to read feature_spec.json: {e}") + sys.exit(1) + + # Validate required fields + if not feature_tree.get("repository_name"): + logger.error( + f"repository_name is required. Ensure {feature_spec_path} exists and has repository_name field." + ) + sys.exit(1) + + if not feature_tree.get("repository_specification"): + logger.error( + f"repository_specification is required. Ensure {feature_spec_path} exists and has content." + ) + sys.exit(1) + + return feature_tree + + +# ======================== Main Function ======================== + + +def main(): + parser = argparse.ArgumentParser( + description="Feature tree expansion script - Two-step workflow" + ) + + parser.add_argument( + "--mode", + choices=["step1", "step2", "suggest-directions"], + default="step1", + help="Operation mode: step1 (spec-driven build), step2 (directed expansion), suggest-directions", + ) + + parser.add_argument( + "--direction", + type=str, + default="", + help="Comma-separated direction indices from suggest-directions output, e.g. '1,3,5' (required for --mode step2)", + ) + + parser.add_argument( + "--feature-tree", + type=Path, + required=False, + default=FEATURE_BUILD_FILE, + help=f"Path to feature tree JSON file (default: {FEATURE_BUILD_FILE})", + ) + + parser.add_argument( + "--feature-spec", + type=Path, + required=False, + default=FEATURE_SPEC_FILE, + help=f"Path to feature spec JSON file (default: {FEATURE_SPEC_FILE})", + ) + + parser.add_argument( + "--output", + type=Path, + default=FEATURE_BUILD_FILE, + help=f"Output file path (default: {FEATURE_BUILD_FILE})", + ) + + parser.add_argument("--verbose", action="store_true", help="Show verbose logging") + + parser.add_argument( + "--no-trajectory", + action="store_true", + help="Disable trajectory recording", + ) + + # Review arguments (used in step1; step2 uses lightweight review automatically) + parser.add_argument( + "--review-max-iterations", + type=int, + default=3, + help="Maximum number of review iterations (default: 3)", + ) + + parser.add_argument( + "--review-threshold", + type=float, + default=98.0, + help="Coverage percentage threshold to stop review (default: 98.0, step1 only)", + ) + + args = parser.parse_args() + + # Validate step2 requires --direction + if args.mode == "step2" and not args.direction.strip(): + parser.error("--direction is required when --mode is step2") + + # Set log level + if args.verbose: + logging.getLogger().setLevel(logging.DEBUG) + + # Load feature data + logger.info("Loading input data...") + feature_tree = _load_feature_data(args.feature_tree, args.feature_spec) + + logger.info(f"Repository: {feature_tree.get('repository_name', 'unknown')}") + logger.info(f"Mode: {args.mode}") + + # Initialize trajectory + trajectory = None + step_id = None + if not args.no_trajectory: + trajectory = load_or_create_trajectory("feature_build") + trajectory.start(metadata={ + "mode": args.mode, + "direction": args.direction if args.mode == "step2" else "", + "output_file": str(args.output), + }) + step_desc = { + "step1": "Build feature tree from specification", + "step2": f"Expand feature tree: directions {args.direction}", + "suggest-directions": "Suggest expansion directions", + }[args.mode] + step = trajectory.add_step("feature_build", step_desc) + trajectory.start_step(step.step_id) + step_id = step.step_id + + # Execute + try: + llm = LLMClient(trajectory=trajectory, step_id=step_id) + + if args.mode == "step1": + result = build_from_spec( + feature_tree, + output_file=args.output, + review_max_iterations=args.review_max_iterations, + review_threshold=args.review_threshold, + llm=llm, + ) + save_json(result, args.output) + + # Print summary + iteration_logs = result.get("iteration_logs", []) + review_logs = result.get("review_logs", []) + final_tree = result.get("feature_tree", {}) + previous_tree = result.get("previous_feature_tree", {}) + print_summary_tables(iteration_logs, final_tree, review_logs, previous_tree) + if review_logs: + _print_review_summary(review_logs) + + elif args.mode == "step2": + # Parse direction indices (comma-separated) + try: + direction_indices = [int(x.strip()) for x in args.direction.split(",")] + except ValueError: + logger.error("--direction must be comma-separated integers (e.g., '1,3,5')") + sys.exit(1) + + # Deduplicate while preserving order + seen = set() + unique_indices = [] + for idx in direction_indices: + if idx not in seen: + seen.add(idx) + unique_indices.append(idx) + if len(unique_indices) < len(direction_indices): + logger.info( + f"Deduplicated direction indices: {direction_indices} -> {unique_indices}" + ) + direction_indices = unique_indices + + # Resolve indices to direction names from saved data + saved = feature_tree.get("expansion_directions") or [] + + # Support both old dict format and new array format + if isinstance(saved, dict): + saved = _migrate_expansion_directions(saved) + + if not saved: + logger.error("No saved expansion directions found. Run --mode suggest-directions first.") + sys.exit(1) + + # Use the latest round's directions for index resolution + latest_round = saved[-1] + saved_directions = latest_round.get("directions", []) + + if not saved_directions: + logger.error("No saved expansion directions found in latest round. Run --mode suggest-directions first.") + sys.exit(1) + + direction_names = [] + for idx in direction_indices: + if idx < 1 or idx > len(saved_directions): + logger.error( + f"Invalid direction index: {idx} (valid range: 1-{len(saved_directions)})" + ) + sys.exit(1) + direction_names.append(saved_directions[idx - 1]["name"]) + + logger.info(f"Expanding {len(direction_names)} direction(s): {direction_names}") + + for dir_i, direction_name in enumerate(direction_names): + logger.info(f"\n{'#' * 60}") + logger.info(f"Direction {dir_i + 1}/{len(direction_names)}: {direction_name}") + logger.info(f"{'#' * 60}") + + # Reload feature tree between directions (previous expansion saved to args.output) + if dir_i > 0: + feature_tree = _load_feature_data(args.output, args.feature_spec) + + result = expand_with_direction( + feature_tree, + direction=direction_name, + output_file=args.output, + review_max_iterations=args.review_max_iterations, + llm=llm, + ) + save_json(result, args.output) + + # Print summary for this direction + iteration_logs = result.get("iteration_logs", []) + review_logs = result.get("review_logs", []) + final_tree = result.get("feature_tree", {}) + previous_tree = result.get("previous_feature_tree", {}) + print_summary_tables(iteration_logs, final_tree, review_logs, previous_tree) + if review_logs: + _print_review_summary(review_logs) + + elif args.mode == "suggest-directions": + result = suggest_directions(feature_tree, output_file=args.output, llm=llm) + # Output JSON to stdout for agent parsing + print(json.dumps(result, indent=2, ensure_ascii=False)) + + except Exception as e: + if trajectory: + if step_id is not None: + trajectory.fail_step(step_id, str(e)) + trajectory.fail(str(e)) + raise + + # Mark trajectory as complete + if trajectory: + completion_metadata = {"mode": args.mode} + if args.mode in ("step1", "step2"): + completion_metadata["feature_count"] = len(result.get("feature_tree", {})) + completion_metadata["review_iterations"] = len(result.get("review_logs", [])) + elif args.mode == "suggest-directions": + completion_metadata["directions_count"] = len(result.get("directions", [])) + + if step_id is not None: + trajectory.complete_step(step_id, completion_metadata) + trajectory.complete(metadata=completion_metadata) + logger.info(f"[OK] Trajectory saved to: {trajectory.trajectory_file}") + + logger.info(f"\n[OK] Done (mode={args.mode})") + + +if __name__ == "__main__": + main() diff --git a/RPG-Kit/scripts/feature_build_validation.py b/RPG-Kit/scripts/feature_build_validation.py new file mode 100644 index 0000000..1837f02 --- /dev/null +++ b/RPG-Kit/scripts/feature_build_validation.py @@ -0,0 +1,257 @@ +#!/usr/bin/env python3 +"""Validate feature_spec.json (input) and feature_build.json (output) for /rpgkit.feature_build command. + +This script checks: +1. Input file: .rpgkit/data/feature_spec.json + - File existence + - Required fields: meta, background_and_overview, functional_requirements + - Fields must exist and not be empty + +2. Output file: .rpgkit/data/feature_build.json + - File existence + - Fields status: repository_name, repository_purpose, repository_specification, feature_tree + +Output: +- Status messages are printed to stderr (user-friendly progress info) +- JSON result is printed to stdout (for agent parsing) + +Exit codes: +- 0: Input file is valid (output file status is informational only) +- 1: Input file has errors (missing or invalid) +""" + +import json +import logging +import sys +from pathlib import Path +from typing import Any, Dict, List + +from common.paths import FEATURE_SPEC_FILE, FEATURE_BUILD_FILE + +# File paths +INPUT_FILE = FEATURE_SPEC_FILE +OUTPUT_FILE = FEATURE_BUILD_FILE + +# Required fields for input file +INPUT_REQUIRED_FIELDS = [ + "meta", + "repository_name", + "repository_purpose", + "background_and_overview", + "functional_requirements", + "non_functional_requirements", +] + +# Fields to check in output file +OUTPUT_CHECK_FIELDS = [ + "repository_name", + "repository_purpose", + "repository_specification", + "feature_tree", +] + + +def print_status(message: str) -> None: + """Print status message to stderr to keep stdout clean for JSON.""" + print(message, file=sys.stderr) + + +def load_json(path: Path) -> Dict[str, Any] | None: + """Load JSON file and return data if valid, None otherwise.""" + try: + with path.open("r", encoding="utf-8") as f: + data = json.load(f) + if isinstance(data, dict): + return data + except json.JSONDecodeError as e: + print_status(f" [FAIL] JSON parse error: {e}") + except Exception as e: + print_status(f" [FAIL] Failed to read file: {e}") + return None + + +def is_field_valid(data: Dict[str, Any], field: str) -> bool: + """Check if a field exists and is not empty.""" + if field not in data: + return False + + value = data[field] + + if value is None: + return False + if isinstance(value, str) and not value.strip(): + return False + if isinstance(value, list) and len(value) == 0: + return False + if isinstance(value, dict) and len(value) == 0: + return False + + return True + + +def count_nodes(nodes: List[Dict[str, Any]]) -> int: + """Recursively count all nodes in the tree.""" + count = 0 + for node in nodes: + count += 1 + if "children" in node and isinstance(node["children"], list): + count += count_nodes(node["children"]) + return count + + +def validate_input_file() -> Dict[str, Any]: + """Validate the input file (.rpgkit/data/feature_spec.json).""" + result = { + "valid": False, + "exists": False, + "errors": [], + "fields": {field: False for field in INPUT_REQUIRED_FIELDS}, + "meta": None, + } + + if not INPUT_FILE.exists(): + result["errors"].append(f"Input file not found: {INPUT_FILE}") + print_status(f"[INPUT] [FAIL] {INPUT_FILE} not found") + return result + + result["exists"] = True + + data = load_json(INPUT_FILE) + if data is None: + result["errors"].append("Failed to parse JSON or file is empty") + print_status("[INPUT] [FAIL] Invalid JSON") + return result + + all_fields_valid = True + missing_fields = [] + for field in INPUT_REQUIRED_FIELDS: + if is_field_valid(data, field): + result["fields"][field] = True + else: + all_fields_valid = False + missing_fields.append(field) + if field not in data: + result["errors"].append(f"Missing required field: {field}") + else: + result["errors"].append(f"Field is empty: {field}") + + if "meta" in data and isinstance(data["meta"], dict): + meta_dict = data["meta"] + result["meta"] = { + "repository_name": data.get("repository_name"), + "repository_purpose": data.get("repository_purpose"), + "generated_at": meta_dict.get("generated_at"), + "source_documents": meta_dict.get("source_documents"), + "project_types": meta_dict.get("project_types"), + "project_notes": meta_dict.get("project_notes"), + } + + # Validate project_types / project_notes (plan B3). Soft-fail with + # an error entry so the operator regenerates feature_spec, but + # don't prevent legacy specs (without these fields) from running + # through downstream stages โ€” they will simply miss the project- + # specific prompt branches. + try: + from common.project_types import validate_project_types + types, notes = validate_project_types(meta_dict) + result["meta"]["project_types"] = types + result["meta"]["project_notes"] = notes + except Exception as exc: + # Only treat as error when the field is present but invalid; + # missing field is treated as a warning so legacy spec files + # still load. + if "project_types" in meta_dict or "project_notes" in meta_dict: + result["errors"].append(f"meta validation: {exc}") + all_fields_valid = False + else: + logger = logging.getLogger(__name__) + logger.warning( + "feature_spec.meta is missing project_types/project_notes " + "(plan B3); downstream prompts will lack project-type context" + ) + + if result["fields"]["functional_requirements"]: + total_nodes = count_nodes(data.get("functional_requirements", [])) + result["functional_requirements_count"] = total_nodes + + if all_fields_valid: + result["valid"] = True + print_status( + f"[INPUT] [OK] Valid ({result.get('functional_requirements_count', 0)} nodes)" + ) + else: + print_status(f"[INPUT] [FAIL] Missing: {', '.join(missing_fields)}") + + return result + + +def check_output_file() -> Dict[str, Any]: + """Check the output file (.rpgkit/data/feature_build.json) status.""" + result = { + "exists": False, + "has_content": False, + "errors": [], + } + + if not OUTPUT_FILE.exists(): + print_status("[OUTPUT] [-] Not exists (will create)") + return result + + result["exists"] = True + + data = load_json(OUTPUT_FILE) + if data is None: + result["errors"].append("Invalid JSON or empty file") + print_status("[OUTPUT] [-] Exists but invalid JSON") + return result + + result["has_content"] = True + + print_status("[OUTPUT] [OK] Exists") + return result + + +def main() -> None: + input_result = validate_input_file() + output_result = check_output_file() + + # Build simplified result (validation status only, no file content) + result = { + "input_file": str(INPUT_FILE), + "output_file": str(OUTPUT_FILE), + "input": { + "valid": input_result["valid"], + "exists": input_result["exists"], + "errors": input_result["errors"], + }, + "output": { + "exists": output_result["exists"], + "has_content": output_result["has_content"], + "errors": output_result["errors"], + }, + } + if not input_result["valid"]: + result["status"] = "error" + result["message"] = "Input invalid" + result["action"] = "none" + elif output_result["exists"] and output_result["has_content"]: + result["status"] = "ready" + result["message"] = "Output exists" + result["action"] = "overwrite_or_skip" + else: + result["status"] = "ready" + result["message"] = "Ready to create" + result["action"] = "create" + + print_status(f"[RESULT] status={result['status']}, action={result['action']}") + + print(json.dumps(result, ensure_ascii=False, indent=2)) + + if result["status"] == "error": + sys.exit(1) + else: + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/RPG-Kit/scripts/feature_edit.py b/RPG-Kit/scripts/feature_edit.py new file mode 100644 index 0000000..3ab93f5 --- /dev/null +++ b/RPG-Kit/scripts/feature_edit.py @@ -0,0 +1,1309 @@ +#!/usr/bin/env python3 +"""Edit Feature Tree Script (Three-Phase Approach). + +Phase 1: Planning - Analyze all components and generate edit plan +Phase 2: Execution - Execute the plan precisely on each component +Phase 3: Review - Verify changes and auto-fix if needed (up to 3 rounds) + +Input/Output: .rpgkit/data/feature_tree.json +""" + +import json +import logging +import argparse +import copy +import time +from typing import Dict, Any, List, Optional, Union +from pydantic import BaseModel, Field + +from feature.prompts import ( + PROMPT_TEMPLATE_EDIT_PLAN, + PROMPT_TEMPLATE_EDIT_REVIEW, +) +from common.paths import FEATURE_TREE_FILE +from common import print_unicode_table, get_all_leaf_paths +from common.llm_client import LLMClient +from common.trajectory import load_or_create_trajectory + +# ============================================================================ +# Utility Functions +# ============================================================================ + + +def count_leaf_nodes(tree: Dict[str, Any]) -> int: + """Count only leaf nodes in the tree.""" + if not tree: + return 0 + + count = 0 + if isinstance(tree, dict): + for key, value in tree.items(): + if isinstance(value, dict): + if not value: + count += 1 + else: + count += count_leaf_nodes(value) + elif isinstance(value, list): + count += len(value) + else: + count += 1 + elif isinstance(tree, list): + count = len(tree) + + return count + + +def find_duplicate_features(components: List[Dict[str, Any]]) -> Dict[str, List[str]]: + """Find features that exist in multiple components (potential issues from move operations).""" + # Extract leaf names (last part of path) from each component + feature_locations = {} # feature_name -> list of component names + + for comp in components: + name = comp.get("name", "Unknown") + subtree = comp.get("refactored_subtree", {}) + paths = get_all_leaf_paths(subtree) + + for path in paths: + # Get the leaf name (last part) + leaf_name = path.split("/")[-1].lower() if "/" in path else path.lower() + if leaf_name not in feature_locations: + feature_locations[leaf_name] = [] + feature_locations[leaf_name].append((name, path)) + + # Find duplicates (same leaf name in multiple components) + duplicates = {} + for feature, locations in feature_locations.items(): + if len(locations) > 1: + # Check if it's the same feature (not just same name) + unique_components = set(loc[0] for loc in locations) + if len(unique_components) > 1: + duplicates[feature] = [(comp, path) for comp, path in locations] + + return duplicates + + +# ============================================================================ +# Tree Operation Functions +# ============================================================================ + + +def insert_path(tree: Dict[str, Any], path: str, delimiter: str = "/") -> None: + """Insert a single path in place.""" + parts = [p.strip() for p in path.split(delimiter) if p.strip()] + parent, key_in_parent = None, None + node = tree + i = 0 + + while i < len(parts): + part, last = parts[i], i == len(parts) - 1 + + if isinstance(node, dict): + mk = next((k for k in node if k.lower() == part.lower()), None) + if last: + if mk is None: + node[part] = [] + break + else: + if mk is None: + node[part] = {} + mk = part + elif isinstance(node[mk], list): + node[mk] = {x: [] for x in node[mk]} + elif not isinstance(node[mk], dict): + node[mk] = {} + parent, key_in_parent = node, mk + node = node[mk] + i += 1 + continue + + elif isinstance(node, list): + if last: + if part.lower() not in (x.lower() for x in node): + node.append(part) + break + else: + upgraded = {x: [] for x in node} + parent[key_in_parent] = upgraded + node = upgraded + continue + else: + upgraded = {} + parent[key_in_parent] = upgraded + node = upgraded + continue + + +def _collapse_leaf_dicts(node: Union[Dict, List]) -> Union[Dict, List]: + """Collapse pure leaf dicts into lists.""" + if isinstance(node, dict): + if not node: + return {} + collapsed = {k: _collapse_leaf_dicts(v) for k, v in node.items()} + if all(isinstance(v, list) and len(v) == 0 for v in collapsed.values()): + return list(collapsed.keys()) + return collapsed + elif isinstance(node, list): + return [ + _collapse_leaf_dicts(v) if isinstance(v, (dict, list)) else v for v in node + ] + else: + return node + + +def apply_changes( + tree: Dict[str, Any], + changes: List[str], + delimiter: str = "/", + inplace: bool = True, + auto_collapse: bool = True, +) -> Dict[str, Any]: + """Batch insert paths.""" + target = tree if inplace else copy.deepcopy(tree) + for p in changes: + insert_path(target, p, delimiter) + if auto_collapse: + collapsed = _collapse_leaf_dicts(target) + if inplace: + tree.clear() + tree.update(collapsed) + return tree + else: + return collapsed + return target + + +def remove_paths( + tree: Dict[str, Any], paths: List[str], inplace: bool = False +) -> tuple[Dict[str, Any], Dict[str, bool]]: + """Remove specified paths from tree. + + Returns: + tuple: (modified_tree, removal_results) + removal_results is a dict mapping path -> bool (True if actually removed) + """ + if not inplace: + tree = copy.deepcopy(tree) + + removal_results = {} + + def delete_path(node, path_parts): + if not path_parts: + return False + key = path_parts[0] + if isinstance(node, dict): + matched_key = next((k for k in node if k.lower() == key.lower()), None) + if matched_key is None: + return False + + if len(path_parts) == 2 and isinstance(node[matched_key], list): + value_to_remove = path_parts[1] + found = False + for item in node[matched_key]: + item_name = item.get("name", "") if isinstance(item, dict) else item + if isinstance(item_name, str) and item_name.lower() == value_to_remove.lower(): + node[matched_key].remove(item) + found = True + break + if not found: + return False + if not node[matched_key]: + del node[matched_key] + return True + if len(path_parts) == 1: + del node[matched_key] + return True + child_deleted = delete_path(node[matched_key], path_parts[1:]) + if isinstance(node[matched_key], dict) and not node[matched_key]: + del node[matched_key] + return True + elif isinstance(node[matched_key], list) and not node[matched_key]: + del node[matched_key] + return True + return child_deleted + return False + + for path in paths: + if not path or not isinstance(path, str): + removal_results[path] = False + continue + path_parts = [p for p in path.split("/") if p] + was_deleted = delete_path(tree, path_parts) + removal_results[path] = was_deleted + + return tree, removal_results + + +# ============================================================================ +# Pydantic Data Models +# ============================================================================ + + +class ComponentOperation(BaseModel): + """Single operation for a specific component.""" + + component_name: str = Field(description="Name of the component to modify") + operation_type: str = Field(description="Type: DELETE, ADD, or MODIFY") + paths_to_remove: List[str] = Field( + default_factory=list, description="Paths to remove from this component" + ) + paths_to_add: List[str] = Field( + default_factory=list, description="Paths to add to this component" + ) + reason: str = Field(description="Brief explanation of why this operation is needed") + + +class EditPlan(BaseModel): + """Complete edit plan generated in Phase 1.""" + + summary: str = Field(description="Overall summary of the edit plan") + operations: List[ComponentOperation] = Field( + description="List of operations to perform" + ) + is_valid: bool = Field(description="Whether the plan is valid and can be executed") + validation_notes: str = Field(default="", description="Notes about plan validation") + + +class ReviewResult(BaseModel): + """Review result generated in Phase 3.""" + + thinking: str = Field(description="Detailed thinking process of the review") + summary: str = Field( + description="Human-readable summary of what was edited and the result" + ) + execution_matches_plan: bool = Field( + description="Whether execution result matches the plan" + ) + execution_matches_intent: bool = Field( + description="Whether execution result matches user's intent" + ) + issues_found: List[str] = Field( + default_factory=list, description="List of issues found during review" + ) + suggestions: List[str] = Field( + default_factory=list, description="Suggestions for improvement" + ) + overall_success: bool = Field(description="Overall success of the edit operation") + confidence_score: float = Field(description="Confidence score 0.0-1.0") + needs_fix: bool = Field( + default=False, description="Whether fix operations are needed" + ) + fix_operations: List[Dict[str, Any]] = Field( + default_factory=list, description="Operations to fix the issues found" + ) + + +# ============================================================================ +# Three-Phase Feature Tree Editor +# ============================================================================ + + +class FeatureTreeEditor: + """Feature tree editor with three-phase approach: Planning + Execution + Review.""" + + def __init__(self, llm_client: LLMClient, enable_review: bool = True): + self.llm = llm_client + self.enable_review = enable_review + self.logger = logging.getLogger(__name__) + + # Tracking + self.operations_executed = [] + self.paths_deleted = [] + self.paths_added = [] + + # State snapshots for review + self.state_before = {} + self.state_after = {} + + def edit( + self, + components: List[Dict[str, Any]], + edit_instruction: str, + repo_data: Dict[str, Any], + model_analysis: Optional[Dict[str, Any]] = None, + ) -> Dict[str, Any]: + """Execute three-phase editing workflow: Plan -> Execute -> Review.""" + # Capture state before edit + self.state_before = self._capture_state(components) + + self.logger.info("=" * 70) + self.logger.info("PHASE 1: PLANNING") + self.logger.info("=" * 70) + + # Build components summary for planning + components_summary = self._build_components_summary(components) + + # Phase 1: Generate edit plan + plan = self._generate_edit_plan(components_summary, edit_instruction, repo_data) + + if plan is None: + self.logger.error("[FAIL] Failed to generate edit plan") + return {"success": False, "error": "Failed to generate edit plan"} + + if not plan.is_valid: + self.logger.error(f"[FAIL] Plan is invalid: {plan.validation_notes}") + return {"success": False, "error": f"Invalid plan: {plan.validation_notes}"} + + # Display the plan + self._display_plan(plan) + + self.logger.info("\n" + "=" * 70) + self.logger.info("PHASE 2: EXECUTION") + self.logger.info("=" * 70) + + # Phase 2: Execute the plan + execution_results = self._execute_plan(plan, components) + + # Capture state after edit + self.state_after = self._capture_state(components) + + # Build result + result = { + "success": True, + "plan": plan, + "plan_summary": plan.summary, + "operations_executed": self.operations_executed, + "paths_deleted": self.paths_deleted, + "paths_added": self.paths_added, + "execution_results": execution_results, + } + + # Phase 3: Review with auto-fix loop (max 3 iterations) + if self.enable_review: + MAX_REVIEW_ITERATIONS = 3 + review_iterations = [] + final_status = "UNKNOWN" + review_result = None # Initialize to avoid reference before assignment + + for review_round in range(1, MAX_REVIEW_ITERATIONS + 1): + self.logger.info("\n" + "=" * 70) + self.logger.info( + f"PHASE 3: REVIEW (Round {review_round}/{MAX_REVIEW_ITERATIONS})" + ) + self.logger.info("=" * 70) + + # Update state snapshot before review + self.state_after = self._capture_state(components) + + review_result = self._review_execution( + edit_instruction=edit_instruction, + plan=plan, + execution_results=execution_results, + model_analysis=model_analysis, + components=components, # Pass components for duplicate detection + ) + + if review_result is None: + self.logger.warning("[WARNING] Review failed, skipping...") + final_status = "REVIEW_FAILED" + break + + # Store review iteration info + iteration_info = { + "round": review_round, + "execution_matches_plan": review_result.execution_matches_plan, + "execution_matches_intent": review_result.execution_matches_intent, + "issues_found": review_result.issues_found, + "overall_success": review_result.overall_success, + "confidence_score": review_result.confidence_score, + "needs_fix": review_result.needs_fix, + "fix_operations_count": len(review_result.fix_operations) + if review_result.fix_operations + else 0, + } + review_iterations.append(iteration_info) + + # Display review results + self._display_review(review_result, review_round) + + # Case 1: Success - no issues, no fixes needed + if review_result.overall_success and not review_result.needs_fix: + final_status = "SUCCESS" + print("\n" + "=" * 70) + print("[OK] REVIEW COMPLETE - ALL CHANGES VERIFIED") + print("=" * 70) + print(f"\n Review passed after {review_round} round(s)") + print(f" Confidence: {review_result.confidence_score:.2f}") + if review_result.summary: + print("\n Final Summary:") + print(f" {review_result.summary}") + print("\n" + "=" * 70) + break + + # Case 2: Issues found, fixes needed + if review_result.needs_fix and review_result.fix_operations: + print("\n [WARNING] Status: Issues detected, applying fixes...") + self.logger.info( + f"\n Applying {len(review_result.fix_operations)} fix operations..." + ) + + # Execute fix operations + fix_results = self._execute_fix_operations( + review_result.fix_operations, components + ) + + # Update execution results + execution_results.extend(fix_results) + result["execution_results"] = execution_results + + print(" [OK] Fix operations completed") + + # Check if this is the last round + if review_round < MAX_REVIEW_ITERATIONS: + print( + f" โ†’ Proceeding to verification round {review_round + 1}..." + ) + continue + else: + final_status = "MAX_ITERATIONS_REACHED" + print( + f"\n [WARNING] Maximum review iterations ({MAX_REVIEW_ITERATIONS}) reached" + ) + break + + # Case 3: Issues found but no fix operations provided + if review_result.issues_found and not review_result.fix_operations: + if review_result.overall_success: + # Minor issues that don't affect success + final_status = "SUCCESS_WITH_WARNINGS" + print("\n" + "=" * 70) + print( + "[OK] REVIEW COMPLETE - CHANGES VERIFIED (with minor notes)" + ) + print("=" * 70) + else: + final_status = "ISSUES_UNRESOLVED" + print("\n" + "=" * 70) + print("[WARNING] REVIEW COMPLETE - UNRESOLVED ISSUES") + print("=" * 70) + break + + # Case 4: No issues, but overall_success is False (edge case) + if not review_result.issues_found and not review_result.overall_success: + final_status = "UNCERTAIN" + print("\n [WARNING] Review uncertain, stopping...") + break + + # Final summary after all review rounds + print("\n" + "โ”€" * 70) + print("REVIEW PROCESS SUMMARY") + print("โ”€" * 70) + print(f"\n Total Review Rounds: {len(review_iterations)}") + print(f" Final Status: {final_status}") + + # Show iteration history + if len(review_iterations) > 1: + print("\n Iteration History:") + for it in review_iterations: + status_icon = ( + "[OK]" + if it["overall_success"] + else "[FAIL]" + if it["needs_fix"] + else "?" + ) + fix_info = ( + f" โ†’ {it['fix_operations_count']} fixes applied" + if it["fix_operations_count"] > 0 + else "" + ) + print( + f" Round {it['round']}: {status_icon} (confidence: {it['confidence_score']:.2f}){fix_info}" + ) + + print("โ”€" * 70) + + # Store final review result + if review_result: + result["review"] = { + "thinking": review_result.thinking, + "summary": review_result.summary, + "execution_matches_plan": review_result.execution_matches_plan, + "execution_matches_intent": review_result.execution_matches_intent, + "issues_found": review_result.issues_found, + "suggestions": review_result.suggestions, + "overall_success": review_result.overall_success, + "confidence_score": review_result.confidence_score, + "review_iterations": review_iterations, + "total_rounds": len(review_iterations), + "final_status": final_status, + } + + # Update success based on final review + if final_status not in ["SUCCESS", "SUCCESS_WITH_WARNINGS"]: + result["success"] = False + result["review_failed"] = True + + # Print summary + self._print_summary(result, components) + + return result + + def _execute_fix_operations( + self, fix_operations: List[Dict[str, Any]], components: List[Dict[str, Any]] + ) -> List[Dict[str, Any]]: + """Execute fix operations from review phase.""" + # Build component lookup + comp_by_name = {comp.get("name"): comp for comp in components} + + fix_results = [] + + for op_data in fix_operations: + # Convert dict to ComponentOperation-like structure + comp_name = op_data.get("component_name", "") + comp = comp_by_name.get(comp_name) + + if comp is None: + self.logger.warning(f" [WARNING] Fix: Component not found: {comp_name}") + fix_results.append( + { + "component": comp_name, + "status": "skipped", + "reason": "Component not found", + "is_fix": True, + } + ) + continue + + subtree = comp.get("refactored_subtree", {}) + initial_count = count_leaf_nodes(subtree) + + paths_to_remove = op_data.get("paths_to_remove", []) + paths_to_add = op_data.get("paths_to_add", []) + operation_type = op_data.get("operation_type", "FIX") + reason = op_data.get("reason", "Fix from review") + + self.logger.info(f" โ†’ Fix: {comp_name} - {operation_type}") + self.logger.info(f" Reason: {reason}") + + # Execute DELETE operations + deleted_count = 0 + failed_deletions = [] + if paths_to_remove: + for path in paths_to_remove: + _, removal_results = remove_paths(subtree, [path], inplace=True) + if removal_results.get(path, False): + self.paths_deleted.append(f"{comp_name}: {path} (fix)") + deleted_count += 1 + self.logger.info(f" [FAIL] Removed: {path}") + else: + failed_deletions.append(path) + self.logger.warning(f" [WARNING] Path not found: {path}") + + # Execute ADD operations + added_count = 0 + if paths_to_add: + apply_changes(subtree, paths_to_add, inplace=True) + for path in paths_to_add: + self.paths_added.append(f"{comp_name}: {path} (fix)") + added_count += 1 + self.logger.info(f" [OK] Added: {path}") + + final_count = count_leaf_nodes(subtree) + comp["actual_size"] = final_count + + # Determine status + has_failures = len(failed_deletions) > 0 + if has_failures and deleted_count == 0 and len(paths_to_remove) > 0: + status = "partial_failure" + elif has_failures: + status = "partial_success" + else: + status = "success" + + fix_results.append( + { + "component": comp_name, + "status": status, + "operation_type": f"FIX_{operation_type}", + "paths_removed": deleted_count, + "paths_added": added_count, + "failed_deletions": failed_deletions, + "initial_leaf_count": initial_count, + "final_leaf_count": final_count, + "is_fix": True, + "reason": reason, + } + ) + + # Build fix operation record with new optimized structure + fix_op_record = { + "component": comp_name, + "operation_type": f"FIX_{operation_type}", + "removed": paths_to_remove, + "added": paths_to_add, + "reason": reason, + "status": status, + "leaf_count": {"before": initial_count, "after": final_count}, + "is_fix": True, + } + # Only add failed field if there are failures + if failed_deletions: + fix_op_record["failed"] = failed_deletions + + self.operations_executed.append(fix_op_record) + + return fix_results + + def _capture_state(self, components: List[Dict[str, Any]]) -> Dict[str, Any]: + """Capture current state of all components for comparison.""" + state = {} + for comp in components: + name = comp.get("name", "Unknown") + subtree = comp.get("refactored_subtree", {}) + state[name] = { + "leaf_count": count_leaf_nodes(subtree), + "paths": get_all_leaf_paths(subtree), + } + return state + + def _build_components_summary(self, components: List[Dict[str, Any]]) -> str: + """Build a summary of all components with their paths.""" + summary_parts = [] + + for comp in components: + name = comp.get("name", "Unknown") + purpose = comp.get("purpose", "")[:200] + subtree = comp.get("refactored_subtree", {}) + + # Get all paths in this component + paths = get_all_leaf_paths(subtree) + leaf_count = len(paths) + + summary_parts.append(f"### {name}") + summary_parts.append(f"**Purpose**: {purpose}") + summary_parts.append(f"**Leaf Count**: {leaf_count}") + summary_parts.append("**Paths**:") + + # Show paths (limit to 50 for readability) + for path in paths[:50]: + summary_parts.append(f" - {path}") + if len(paths) > 50: + summary_parts.append(f" - ... and {len(paths) - 50} more paths") + + summary_parts.append("") + + return "\n".join(summary_parts) + + def _generate_edit_plan( + self, + components_summary: str, + user_instructions: str, + repo_data: Dict[str, Any], + ) -> Optional[EditPlan]: + """Phase 1: Generate edit plan using LLM.""" + prompt = PROMPT_TEMPLATE_EDIT_PLAN.format( + edit_instruction=user_instructions, + repository_name=repo_data.get("repository_name", "Unknown"), + repository_purpose=repo_data.get("repository_purpose", "")[:500], + components_summary=components_summary, + ) + + self.logger.info("Generating edit plan...") + self.logger.debug(f"Prompt length: {len(prompt)} characters") + + _, plan, _ = self.llm.call_structured( + system_prompt=prompt, + user_prompt="", + response_model=EditPlan, + purpose="generate_plan", + ) + + return plan + + def _display_plan(self, plan: EditPlan): + """Display the generated edit plan.""" + print("\n" + "โ”€" * 70) + print("EDIT PLAN") + print("โ”€" * 70) + print(f"\nSummary: {plan.summary}") + print(f"\nOperations ({len(plan.operations)}):") + + for i, op in enumerate(plan.operations, 1): + print(f"\n [{i}] {op.component_name} - {op.operation_type}") + print(f" Reason: {op.reason}") + if op.paths_to_remove: + print(f" Remove ({len(op.paths_to_remove)}):") + for path in op.paths_to_remove[:5]: + print(f" [FAIL] {path}") + if len(op.paths_to_remove) > 5: + print(f" ... and {len(op.paths_to_remove) - 5} more") + if op.paths_to_add: + print(f" Add ({len(op.paths_to_add)}):") + for path in op.paths_to_add[:5]: + print(f" [OK] {path}") + if len(op.paths_to_add) > 5: + print(f" ... and {len(op.paths_to_add) - 5} more") + + if plan.validation_notes: + print(f"\nValidation Notes: {plan.validation_notes}") + + print("\n" + "โ”€" * 70) + + def _execute_plan( + self, plan: EditPlan, components: List[Dict[str, Any]] + ) -> List[Dict[str, Any]]: + """Phase 2: Execute the edit plan.""" + # Build component lookup + comp_by_name = {comp.get("name"): comp for comp in components} + + execution_results = [] + + for op in plan.operations: + comp_name = op.component_name + comp = comp_by_name.get(comp_name) + + if comp is None: + self.logger.warning(f"[WARNING] Component not found: {comp_name}") + execution_results.append( + { + "component": comp_name, + "status": "skipped", + "reason": "Component not found", + } + ) + continue + + subtree = comp.get("refactored_subtree", {}) + initial_count = count_leaf_nodes(subtree) + + self.logger.info(f"\nProcessing: {comp_name}") + + # Execute DELETE operations + deleted_count = 0 + failed_deletions = [] + if op.paths_to_remove: + self.logger.info(f" Removing {len(op.paths_to_remove)} paths...") + for path in op.paths_to_remove: + _, removal_results = remove_paths(subtree, [path], inplace=True) + if removal_results.get(path, False): + self.paths_deleted.append(f"{comp_name}: {path}") + deleted_count += 1 + self.logger.info(f" [OK] Removed: {path}") + else: + failed_deletions.append(path) + self.logger.warning(f" [WARNING] Path not found: {path}") + + # Execute ADD operations + added_count = 0 + if op.paths_to_add: + self.logger.info(f" Adding {len(op.paths_to_add)} paths...") + apply_changes(subtree, op.paths_to_add, inplace=True) + for path in op.paths_to_add: + self.paths_added.append(f"{comp_name}: {path}") + added_count += 1 + + final_count = count_leaf_nodes(subtree) + + # Update component's actual_size + comp["actual_size"] = final_count + + # Determine actual status + has_failures = len(failed_deletions) > 0 + if has_failures and deleted_count == 0: + status = "partial_failure" + elif has_failures: + status = "partial_success" + else: + status = "success" + + execution_results.append( + { + "component": comp_name, + "status": status, + "operation_type": op.operation_type, + "paths_removed": deleted_count, + "paths_requested_remove": len(op.paths_to_remove), + "paths_added": added_count, + "failed_deletions": failed_deletions, + "initial_leaf_count": initial_count, + "final_leaf_count": final_count, + } + ) + + # Build operation record with new optimized structure + op_record = { + "component": comp_name, + "operation_type": op.operation_type, + "removed": op.paths_to_remove, + "added": op.paths_to_add, + "reason": op.reason, + "status": status, + "leaf_count": {"before": initial_count, "after": final_count}, + } + # Only add failed field if there are failures + if failed_deletions: + op_record["failed"] = failed_deletions + + self.operations_executed.append(op_record) + + # Log with appropriate status + if has_failures: + self.logger.warning( + f" [WARNING] Partial: -{deleted_count}/{len(op.paths_to_remove)} +{added_count} (leaves: {initial_count} โ†’ {final_count})" + ) + self.logger.warning(f" Failed to remove: {failed_deletions}") + else: + self.logger.info( + f" [OK] Done: -{deleted_count} +{added_count} (leaves: {initial_count} โ†’ {final_count})" + ) + + return execution_results + + def _review_execution( + self, + edit_instruction: str, + plan: EditPlan, + execution_results: List[Dict[str, Any]], + model_analysis: Optional[Dict[str, Any]] = None, + components: Optional[List[Dict[str, Any]]] = None, + ) -> Optional[ReviewResult]: + """Phase 3: Review the execution results.""" + # Format plan operations for prompt + plan_ops_str = "" + for i, op in enumerate(plan.operations, 1): + plan_ops_str += f"\n{i}. {op.component_name} ({op.operation_type}):\n" + plan_ops_str += f" - Remove: {op.paths_to_remove}\n" + plan_ops_str += f" - Add: {op.paths_to_add}\n" + plan_ops_str += f" - Reason: {op.reason}\n" + + # Format execution results - include failed deletions + exec_str = "" + for er in execution_results: + exec_str += f"\n- {er['component']}: {er['status']}" + if er["status"] in ["success", "partial_success", "partial_failure"]: + requested = er.get("paths_requested_remove", er["paths_removed"]) + exec_str += f" (removed {er['paths_removed']}/{requested}, added {er['paths_added']})" + exec_str += ( + f" leaves: {er['initial_leaf_count']} โ†’ {er['final_leaf_count']}" + ) + # Highlight failed deletions + failed = er.get("failed_deletions", []) + if failed: + exec_str += f"\n [WARNING] FAILED TO REMOVE: {failed}" + + # Collect affected component names for focused review + affected_components = set() + for op in plan.operations: + affected_components.add(op.component_name) + + # Format state - only show affected components with full paths + # For others, just show leaf count + state_before_str = "" + for name, state in self.state_before.items(): + state_before_str += f"\n### {name}: {state['leaf_count']} leaves" + if name in affected_components: + state_before_str += "\nPaths:\n" + for path in state["paths"]: + state_before_str += f" - {path}\n" + + state_after_str = "" + for name, state in self.state_after.items(): + state_after_str += f"\n### {name}: {state['leaf_count']} leaves" + if name in affected_components: + state_after_str += "\nPaths:\n" + for path in state["paths"]: + state_after_str += f" - {path}\n" + + # Detect duplicate features across components + duplicate_features_str = "No duplicates detected." + if components: + duplicates = find_duplicate_features(components) + if duplicates: + duplicate_features_str = "[WARNING] DUPLICATES FOUND - These features exist in multiple components:\n" + for feature, locations in duplicates.items(): + duplicate_features_str += f"\n- '{feature}' found in:\n" + for comp_name, path in locations: + duplicate_features_str += f" - {comp_name}: {path}\n" + duplicate_features_str += "\nFor MOVE operations, duplicates should be removed from the source component." + + # Format model analysis if available + model_analysis_str = "No previous model analysis available." + if model_analysis: + model_analysis_str = json.dumps( + model_analysis, indent=2, ensure_ascii=False + )[:2000] + + prompt = PROMPT_TEMPLATE_EDIT_REVIEW.format( + edit_instruction=edit_instruction, + plan_summary=plan.summary, + plan_operations=plan_ops_str, + execution_results=exec_str, + state_before=state_before_str, + state_after=state_after_str, + duplicate_features=duplicate_features_str, + model_analysis=model_analysis_str, + ) + + self.logger.info("Reviewing execution results...") + + _, review, _ = self.llm.call_structured( + system_prompt=prompt, + user_prompt="", + response_model=ReviewResult, + purpose="review_execution", + ) + + return review + + def _display_review(self, review: ReviewResult, review_round: int = 1): + """Display the review results.""" + print("\n" + "โ”€" * 70) + print(f"REVIEW RESULTS (Round {review_round})") + print("โ”€" * 70) + + # 1. Display thinking process + if review.thinking: + print("\n Analysis:") + print(" " + "-" * 40) + # Format thinking with proper indentation + for line in review.thinking.split("\n"): + if line.strip(): + print(f" {line.strip()}") + print(" " + "-" * 40) + + # 2. Display summary (most important for user) + if review.summary: + print("\n Summary:") + print(f" {review.summary}") + + # 3. Status indicators + plan_status = "[OK]" if review.execution_matches_plan else "[FAIL]" + intent_status = "[OK]" if review.execution_matches_intent else "[FAIL]" + overall_status = ( + "[OK] SUCCESS" + if review.overall_success + else "[FAIL] NEEDS FIX" + if review.needs_fix + else "[FAIL] FAILED" + ) + + print("\n Verification:") + print(f" Execution matches plan: {plan_status}") + print(f" Execution matches intent: {intent_status}") + print(f" Confidence score: {review.confidence_score:.2f}") + print(f"\n [TARGET] Overall: {overall_status}") + + # 4. Issues found + if review.issues_found: + print(f"\n [WARNING] Issues Found ({len(review.issues_found)}):") + for issue in review.issues_found: + print(f" - {issue}") + + # 5. Suggestions + if review.suggestions: + print(f"\n Suggestions ({len(review.suggestions)}):") + for suggestion in review.suggestions: + print(f" โ†’ {suggestion}") + + # 6. Fix operations (if any) + if review.needs_fix and review.fix_operations: + print(f"\n Fix Operations Required ({len(review.fix_operations)}):") + for i, op in enumerate(review.fix_operations, 1): + comp_name = op.get("component_name", "Unknown") + op_type = op.get("operation_type", "FIX") + reason = op.get("reason", "") + paths_remove = op.get("paths_to_remove", []) + paths_add = op.get("paths_to_add", []) + + print(f" [{i}] {comp_name} - {op_type}") + print(f" Reason: {reason}") + if paths_remove: + print( + f" Remove: {paths_remove[:3]}{'...' if len(paths_remove) > 3 else ''}" + ) + if paths_add: + print( + f" Add: {paths_add[:3]}{'...' if len(paths_add) > 3 else ''}" + ) + + print("\n" + "โ”€" * 70) + + def _print_summary(self, result: Dict[str, Any], components: List[Dict[str, Any]]): + """Print execution summary.""" + print("\n") + print("=" * 80) + print("EXECUTION SUMMARY") + print("=" * 80) + + # 1. Operation Statistics + exec_results = result.get("execution_results", []) + if exec_results: + rows = [] + total_removed = 0 + total_added = 0 + for er in exec_results: + if er["status"] == "success": + rows.append( + [ + er["component"][:30], + er["operation_type"], + er["paths_removed"], + er["paths_added"], + f"{er['initial_leaf_count']} โ†’ {er['final_leaf_count']}", + ] + ) + total_removed += er["paths_removed"] + total_added += er["paths_added"] + + if rows: + rows.append(["TOTAL", "", total_removed, total_added, ""]) + print_unicode_table( + headers=["Component", "Operation", "Removed", "Added", "Leaves"], + rows=rows, + title="Operations Executed", + ) + + # 2. Deleted Paths + if self.paths_deleted: + print(f"\n Deleted Paths ({len(self.paths_deleted)}):") + for path in self.paths_deleted[:10]: + print(f" [FAIL] {path}") + if len(self.paths_deleted) > 10: + print(f" ... and {len(self.paths_deleted) - 10} more") + + # 3. Added Paths + if self.paths_added: + print(f"\n Added Paths ({len(self.paths_added)}):") + for path in self.paths_added[:10]: + print(f" [OK] {path}") + if len(self.paths_added) > 10: + print(f" ... and {len(self.paths_added) - 10} more") + + # 4. Final Component Stats + print("\n Final Component Statistics:") + for comp in components: + name = comp.get("name", "Unknown") + leaf_count = count_leaf_nodes(comp.get("refactored_subtree", {})) + print(f" - {name}: {leaf_count} leaves") + + print("\n" + "=" * 80) + + +# ============================================================================ +# Main Program +# ============================================================================ + + +def main(): + """Main function.""" + parser = argparse.ArgumentParser( + description="Edit Feature Tree - Three-phase approach (Plan + Execute + Review)", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Example usage: + python feature_edit.py + python feature_edit.py --log-level DEBUG + python feature_edit.py --no-review # Skip review phase (not recommended) + """, + ) + + parser.add_argument( + "--file", + help="Feature tree JSON file (input and output)", + default=str(FEATURE_TREE_FILE), + ) + parser.add_argument( + "--log-level", + default="INFO", + choices=["DEBUG", "INFO", "WARNING", "ERROR"], + help="Log level (default: INFO)", + ) + parser.add_argument( + "--no-review", + action="store_true", + help="Skip the review phase (not recommended)", + ) + parser.add_argument( + "--no-trajectory", + action="store_true", + help="Disable trajectory recording", + ) + + args = parser.parse_args() + + # Configure logging + logging.basicConfig( + level=getattr(logging, args.log_level), + format="%(asctime)s - %(levelname)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + logger = logging.getLogger(__name__) + + try: + # Load feature_tree.json + logger.info(f"Reading feature tree file: {args.file}") + with open(args.file, "r", encoding="utf-8") as f: + data = json.load(f) + + components = data.get("components") + if not components: + logger.error("[FAIL] File missing 'components' field") + return 1 + + # Extract data + repository_name = data.get("repository_name", "Unknown") + repository_purpose = data.get("repository_purpose", "") + edit_instruction = data.get("edit_instruction", "") + edit_history = data.get("edit_history", []) + + if not edit_instruction: + logger.error("[FAIL] No user edit instructions found") + return 1 + + logger.info(f"[OK] Loaded {len(components)} components") + logger.info(f"[OK] User instructions: {edit_instruction[:100]}...") + + repo_data = { + "repository_name": repository_name, + "repository_purpose": repository_purpose, + } + + # Extract model analysis from previous build/refactor if available + model_analysis = data.get("model_analysis", None) + if not model_analysis: + # Try to extract from components metadata + model_analysis = { + "source": "feature_tree.json", + "components_count": len(components), + "total_features": sum( + comp.get("actual_size", 0) for comp in components + ), + } + + # Initialize trajectory + trajectory = None + step_id = None + if not args.no_trajectory: + trajectory = load_or_create_trajectory("feature_edit") + trajectory.start(metadata={ + "input_file": args.file, + "edit_instruction": edit_instruction[:200], + "components_count": len(components), + }) + step = trajectory.add_step("feature_edit", "Edit feature tree based on user instructions") + trajectory.start_step(step.step_id) + step_id = step.step_id + + # Create LLM client and editor + llm_client = LLMClient(trajectory=trajectory, step_id=step_id) + editor = FeatureTreeEditor( + llm_client, + enable_review=not args.no_review, + ) + + # Execute three-phase editing + result = editor.edit( + components, + edit_instruction, + repo_data, + model_analysis=model_analysis, + ) + + if not result.get("success"): + logger.error(f"[FAIL] Edit failed: {result.get('error', 'Unknown error')}") + return 1 + + # Build result summary + review_data = result.get("review", {}) + result_summary = { + "status": review_data.get( + "final_status", "SUCCESS" if result.get("success") else "FAILED" + ), + "total_removed": len(result.get("paths_deleted", [])), + "total_added": len(result.get("paths_added", [])), + "review": { + "summary": review_data.get("summary", ""), + "confidence": review_data.get("confidence_score", 0.0), + "iterations": review_data.get("total_rounds", 1), + } + if review_data + else None, + } + + # Update edit history with new optimized structure + edit_record = { + "instruction": edit_instruction, + "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S+00:00"), + "plan_summary": result.get("plan_summary", ""), + "operations": result.get("operations_executed", []), + "result": result_summary, + } + + # Ensure history is a list of dicts + if not isinstance(edit_history, list): + edit_history = [] + + edit_history.append(edit_record) + data["edit_history"] = edit_history + + # Remove last_edit_result if exists (no longer needed) + if "last_edit_result" in data: + del data["last_edit_result"] + + # Check if review flagged issues + if review_data and not review_data.get("overall_success", True): + logger.warning("[WARNING] Review identified issues with the edit operation") + if review_data.get("issues_found"): + for issue in review_data["issues_found"]: + logger.warning(f" - {issue}") + + # Reorder output: put edit_instruction before edit_history + # Build ordered output dict + ordered_data = {} + # First, add all keys except edit_instruction and edit_history + for key in data: + if key not in ("edit_instruction", "edit_history"): + ordered_data[key] = data[key] + # Then add edit_instruction and edit_history in desired order + ordered_data["edit_instruction"] = data.get("edit_instruction", "") + ordered_data["edit_history"] = data.get("edit_history", []) + + # Save results back to the same file + logger.info(f"\nSaving results to: {args.file}") + with open(args.file, "w", encoding="utf-8") as f: + json.dump(ordered_data, f, indent=2, ensure_ascii=False) + + logger.info(f"\n{'=' * 70}") + logger.info("[OK] Edit complete!") + logger.info(f"{'=' * 70}\n") + + # Mark trajectory as complete + if trajectory: + if step_id is not None: + trajectory.complete_step(step_id, { + "total_removed": len(result.get("paths_deleted", [])), + "total_added": len(result.get("paths_added", [])), + }) + trajectory.complete(metadata={ + "total_removed": len(result.get("paths_deleted", [])), + "total_added": len(result.get("paths_added", [])), + }) + logger.info(f"[OK] Trajectory saved to: {trajectory.trajectory_file}") + + return 0 + + except FileNotFoundError as e: + logger.error(f"[FAIL] File not found: {e}") + if trajectory: + if step_id is not None: + trajectory.fail_step(step_id, str(e)) + trajectory.fail(str(e)) + return 1 + except json.JSONDecodeError as e: + logger.error(f"[FAIL] JSON parsing error: {e}") + if trajectory: + if step_id is not None: + trajectory.fail_step(step_id, str(e)) + trajectory.fail(str(e)) + return 1 + except Exception as e: + logger.error(f"[FAIL] Execution error: {e}", exc_info=True) + if trajectory: + if step_id is not None: + trajectory.fail_step(step_id, str(e)) + trajectory.fail(str(e)) + return 1 + + +if __name__ == "__main__": + exit(main()) diff --git a/RPG-Kit/scripts/feature_edit_validation.py b/RPG-Kit/scripts/feature_edit_validation.py new file mode 100644 index 0000000..df1d73c --- /dev/null +++ b/RPG-Kit/scripts/feature_edit_validation.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python3 +"""Inspect feature tree state and decide execution state for feature_edit. + +Decision rules: +- Check if .rpgkit/data/feature_tree.json exists +- Check if 'components' field exists and is not empty (generated by feature_refactor) +- Check if repository_name exists and is not empty +- Accept edit_instruction parameter and save to feature_tree.json +- Maintain instruction history with UTC timestamps + +The script prints EXACTLY ONE JSON object to stdout. +No extra text is allowed. +""" + +import json +import sys +import argparse +from pathlib import Path +from typing import Any, Dict + +from common.paths import FEATURE_TREE_FILE + + +def load_json(path: Path) -> Dict[str, Any] | None: + """Load JSON file and return data, or None if failed.""" + try: + with path.open("r", encoding="utf-8") as f: + data = json.load(f) + if isinstance(data, dict) and len(data) > 0: + return data + except Exception: + pass + return None + + +def save_json(path: Path, data: Dict[str, Any]) -> None: + """Save JSON data to file.""" + try: + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("w", encoding="utf-8") as f: + json.dump(data, f, ensure_ascii=False, indent=2) + except Exception as e: + print( + json.dumps({"error": f"Failed to save file: {str(e)}"}, ensure_ascii=False), + file=sys.stderr, + ) + + +def update_instructions(data: Dict[str, Any], new_instructions: str) -> Dict[str, Any]: + """Update edit_instruction field only. + + Note: edit_history is managed by feature_edit.py after successful execution. + This function only: + 1. Initializes edit_history if not exists + 2. Cleans up duplicate/legacy entries (keeps the one with most info) + 3. Sets edit_instruction + + Args: + data: Current JSON data + new_instructions: New instruction text (can be empty) + + Returns: + Updated data with edit_instruction + """ + # Initialize edit_history if not exists + if "edit_history" not in data: + data["edit_history"] = [] + + # Clean up: for duplicate instructions, keep the one with more fields (from feature_edit.py) + cleaned_history = [] + seen_instructions = {} # instruction -> (index, entry) + + for entry in data["edit_history"]: + if isinstance(entry, dict) and "instruction" in entry: + instruction_text = entry["instruction"] + entry_fields = len(entry) + + if instruction_text in seen_instructions: + # Keep the entry with more fields (more complete record) + existing_idx, existing_entry = seen_instructions[instruction_text] + if entry_fields > len(existing_entry): + # Replace with the more complete entry + seen_instructions[instruction_text] = (len(cleaned_history), entry) + cleaned_history[existing_idx] = entry + # else: keep existing, skip this one + else: + seen_instructions[instruction_text] = (len(cleaned_history), entry) + cleaned_history.append(entry) + elif isinstance(entry, str): + # Skip legacy string format + pass + + data["edit_history"] = cleaned_history + + # Update current instructions (edit_history is added by feature_edit.py after execution) + data["edit_instruction"] = new_instructions + + return data + + +def inspect_state(edit_instruction: str = "") -> Dict[str, Any]: + """Inspect repository state and return a decision object. + + Logic: + - Check if feature_tree.json exists and has valid feature_tree field + - Save edit_instruction to the file + + Args: + edit_instruction: User's edit instructions to save + + Returns: + Decision object with type field: "ready" or "error" + """ + # Check if feature_tree.json exists + if not FEATURE_TREE_FILE.exists(): + return { + "type": "error", + "error_code": "file_not_found", + "message": f"Input file '{FEATURE_TREE_FILE}' does not exist. Please run /rpgkit.feature_refactor first.", + "file": str(FEATURE_TREE_FILE), + } + + # Load feature_tree.json + data = load_json(FEATURE_TREE_FILE) + + if data is None: + return { + "type": "error", + "error_code": "file_invalid", + "message": f"File '{FEATURE_TREE_FILE}' is empty or contains invalid JSON.", + "file": str(FEATURE_TREE_FILE), + } + + # Check if components field exists and is not empty + components = data.get("components") + if not components or not isinstance(components, list) or len(components) == 0: + return { + "type": "error", + "error_code": "field_empty", + "message": f"Field 'components' is missing or empty in '{FEATURE_TREE_FILE}'. Please run /rpgkit.feature_refactor to generate components.", + "file": str(FEATURE_TREE_FILE), + "missing_field": "components", + } + + # Check required fields + repository_name = data.get("repository_name", "") + + if not repository_name: + return { + "type": "error", + "error_code": "field_missing", + "message": "Field 'repository_name' is missing or empty in the file.", + "file": str(FEATURE_TREE_FILE), + "missing_field": "repository_name", + } + + # Update with user instructions + data = update_instructions(data, edit_instruction) + + # Save updated file + save_json(FEATURE_TREE_FILE, data) + + # Return ready status + return { + "type": "ready", + "file": str(FEATURE_TREE_FILE), + "components_count": len(components), + "edit_instruction": edit_instruction, + "message": "Ready to execute feature_edit.py", + } + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Check edit feature prerequisites and save user instructions" + ) + parser.add_argument( + "--edit_instruction", + type=str, + default="", + help="User edit instructions for the feature tree (required)", + ) + + args = parser.parse_args() + + decision = inspect_state(args.edit_instruction) + print(json.dumps(decision, ensure_ascii=False, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/RPG-Kit/scripts/feature_refactor.py b/RPG-Kit/scripts/feature_refactor.py new file mode 100644 index 0000000..140b96c --- /dev/null +++ b/RPG-Kit/scripts/feature_refactor.py @@ -0,0 +1,1163 @@ +#!/usr/bin/env python3 +"""Simplified Feature Tree Refactoring Script. + +Function: Refactor feature tree into modular component architecture +- Step 1: Plan subtree structure based on domain analysis +- Step 2: Iteratively assign features to planned subtrees +""" + +import json +import logging +import argparse +import copy +from typing import Dict, Any, List, Optional, Union +from pydantic import BaseModel, Field + +from feature.prompts import ( + PROMPT_TEMPLATE_SUBTREE_PLANNING, + PROMPT_TEMPLATE_FEATURE_ORGANIZATION, +) +from common.paths import FEATURE_BUILD_FILE, FEATURE_TREE_FILE +from common import print_unicode_table, get_all_leaf_paths, get_leaf_name, get_all_leaf_descriptions +from common.llm_client import LLMClient +from common.trajectory import load_or_create_trajectory + + +# ============================================================================ +# Utility Functions +# ============================================================================ + + +def count_all_nodes(tree: Dict[str, Any]) -> int: + """Recursively count all nodes in the tree (including intermediate and leaf nodes). + + Args: + tree: Feature tree or subtree + + Returns: + Total node count + """ + if not tree: + return 0 + + count = 0 + if isinstance(tree, dict): + for key, value in tree.items(): + count += 1 # Count current key + if isinstance(value, dict): + count += count_all_nodes(value) # Recursively count subtree + elif isinstance(value, list): + count += len(value) # Count leaf nodes + elif isinstance(tree, list): + count = len(tree) + + return count + + +# ============================================================================ +# Pydantic Data Models +# ============================================================================ + + +class SubtreePlan(BaseModel): + """Subtree planning.""" + + name: str = Field(description="Subtree/component name") + purpose: str = Field(description="High-level purpose or theme of the subtree") + estimate_size: int = Field(description="Estimated feature count") + + +class SubtreePlanningOutput(BaseModel): + """Planning step output.""" + + total_subtrees: int = Field(description="Total number of planned subtrees") + subtree_plans: List[SubtreePlan] = Field(description="List of subtree plans") + reasoning: str = Field(description="Organizational rationale") + + +class FeatureAssignment(BaseModel): + """Feature assignment.""" + + subtree_name: str = Field(description="Target subtree name") + assigned_paths: List[str] = Field( + description="List of feature paths assigned to this subtree" + ) + + +class FeatureOrganizationOutput(BaseModel): + """Organization step output.""" + + assignments: List[FeatureAssignment] = Field( + description="Feature assignments for each subtree" + ) + + +# ============================================================================ +# Tree Operation Utility Functions (Inline Version) +# ============================================================================ + + +def extract_leaf_nodes(tree: Dict[str, Any]) -> List[str]: + """Extract all leaf node names.""" + leaf_names = set() + if isinstance(tree, dict): + for key, value in tree.items(): + if isinstance(value, dict): + if not value: + leaf_names.add(key) + else: + leaf_names.update(extract_leaf_nodes(value)) + elif isinstance(value, list): + for item in value: + leaf_names.add(get_leaf_name(item)) + else: + leaf_names.add(value) + return list(leaf_names) + + +def insert_path(tree: Dict[str, Any], path: str, delimiter: str = "/") -> None: + """Insert a single path in place.""" + parts = [p.strip() for p in path.split(delimiter) if p.strip()] + parent, key_in_parent = None, None + node = tree + i = 0 + + while i < len(parts): + part, last = parts[i], i == len(parts) - 1 + + if isinstance(node, dict): + mk = next((k for k in node if k.lower() == part.lower()), None) + if last: + if mk is None: + node[part] = [] + break + else: + if mk is None: + node[part] = {} + mk = part + elif isinstance(node[mk], list): + node[mk] = {x: [] for x in node[mk]} + elif not isinstance(node[mk], dict): + node[mk] = {} + parent, key_in_parent = node, mk + node = node[mk] + i += 1 + continue + + elif isinstance(node, list): + if last: + if part.lower() not in (x.lower() for x in node): + node.append(part) + break + else: + upgraded = {x: [] for x in node} + parent[key_in_parent] = upgraded + node = upgraded + continue + else: + upgraded = {} + parent[key_in_parent] = upgraded + node = upgraded + continue + + +def _collapse_leaf_dicts(node: Union[Dict, List]) -> Union[Dict, List]: + """Collapse pure leaf dicts into lists.""" + if isinstance(node, dict): + if not node: + return {} + collapsed = {k: _collapse_leaf_dicts(v) for k, v in node.items()} + if all(isinstance(v, list) and len(v) == 0 for v in collapsed.values()): + return list(collapsed.keys()) + return collapsed + elif isinstance(node, list): + return [_collapse_leaf_dicts(v) for v in node] + else: + return node + + +def apply_changes( + tree: Dict[str, Any], + changes: List[str], + delimiter: str = "/", + inplace: bool = True, + auto_collapse: bool = True, +) -> Dict[str, Any]: + """Batch insert paths.""" + target = tree if inplace else copy.deepcopy(tree) + for p in changes: + insert_path(target, p, delimiter) + if auto_collapse: + collapsed = _collapse_leaf_dicts(target) + if inplace: + tree.clear() + tree.update(collapsed) + return tree + else: + return collapsed + return target + + +def convert_leaves_to_list(tree: Dict[str, Any]) -> Dict[str, Any]: + """Ensure leaves are in list format.""" + if isinstance(tree, dict): + return {k: convert_leaves_to_list(v) for k, v in tree.items()} + elif isinstance(tree, list): + return tree if tree else {} + else: + return tree + + +def find_leaf_paths_by_node( + tree: Dict[str, Any], target_leaf_names: List[str], prefix: str = "" +) -> List[str]: + """Find complete paths for specified leaf nodes.""" + matches = [] + if isinstance(tree, dict): + for key, value in tree.items(): + new_prefix = f"{prefix}/{key}" if prefix else key + if isinstance(value, dict): + if not value and key in target_leaf_names: + matches.append(new_prefix) + else: + matches.extend( + find_leaf_paths_by_node(value, target_leaf_names, new_prefix) + ) + elif isinstance(value, list): + for item in value: + if item in target_leaf_names: + matches.append(f"{new_prefix}/{item}") + else: + if value in target_leaf_names: + matches.append(new_prefix) + return matches + + +def remove_paths( + tree: Dict[str, Any], paths: List[str], inplace: bool = False +) -> Dict[str, Any]: + """Remove specified paths from tree.""" + if not inplace: + tree = copy.deepcopy(tree) + + def delete_path(node, path_parts): + if not path_parts: + return False + key = path_parts[0] + if isinstance(node, dict): + if key not in node: + return False + if len(path_parts) == 2 and isinstance(node[key], list): + value_to_remove = path_parts[1] + if value_to_remove in node[key]: + node[key].remove(value_to_remove) + if not node[key]: + del node[key] + return True + if len(path_parts) == 1: + del node[key] + return True + child_deleted = delete_path(node[key], path_parts[1:]) + if isinstance(node[key], dict) and not node[key]: + del node[key] + return True + elif isinstance(node[key], list) and not node[key]: + del node[key] + return True + return child_deleted + return False + + for path in paths: + if not path or not isinstance(path, str): + continue + path_parts = [p for p in path.split("/") if p] + delete_path(tree, path_parts) + + return tree + + +def pre_order_traversal_to_list(feature_dict: Dict[str, Any]) -> List[str]: + """Pre-order traverse tree to list.""" + result = [] + if not isinstance(feature_dict, dict): + return result + for key, value in feature_dict.items(): + result.append(key) + if isinstance(value, dict): + result.extend(pre_order_traversal_to_list(value)) + elif isinstance(value, list): + for item in value: + if isinstance(item, dict): + result.extend(pre_order_traversal_to_list(item)) + elif isinstance(item, (str, int)): + result.append(item) + return result + + +def build_repo_info(repo_data: Dict[str, Any]) -> str: + """Build repository information string. + + Extracts repo metadata and spec fields. If background_and_overview or + functional_requirements are not at the top level, they are parsed from + the repository_specification field. List/dict values are serialized as + readable JSON. + """ + merged = dict(repo_data) + spec_keys = ["background_and_overview", "functional_requirements", + "non_functional_requirements"] + + if any(k not in merged for k in spec_keys): + raw_spec = repo_data.get("repository_specification", "") + if isinstance(raw_spec, str) and raw_spec.strip(): + try: + parsed_spec = json.loads(raw_spec) + for k in spec_keys: + if k not in merged and k in parsed_spec: + merged[k] = parsed_spec[k] + except (json.JSONDecodeError, TypeError): + pass + elif isinstance(raw_spec, dict): + for k in spec_keys: + if k not in merged and k in raw_spec: + merged[k] = raw_spec[k] + + info = "" + for key in [ + "repository_name", + "repository_purpose", + "background_and_overview", + "non_functional_requirements", + "functional_requirements", + ]: + if key in merged: + formatted_key = key.replace("_", " ").capitalize() + value = merged[key] + if isinstance(value, (list, dict)): + value = json.dumps(value, indent=2, ensure_ascii=False) + info += f"{formatted_key}: {value}\n" + + return info.strip() + + +def convert_component_to_features( + component_list: List[Dict[str, Any]], +) -> Dict[str, Any]: + """Convert component list to feature tree format. + + Extracts refactored_subtree from each component and merges them into a single tree. + Removes purpose, estimate_size, actual_size, util_percent fields. + + Args: + component_list: List of component dictionaries with refactored_subtree + + Returns: + Merged feature tree dictionary + """ + if not component_list or not isinstance(component_list, list): + return {} + + merged_tree = {} + + for component in component_list: + if not isinstance(component, dict): + continue + + # Get the refactored_subtree from component + subtree = component.get("refactored_subtree", {}) + + if not subtree or not isinstance(subtree, dict): + continue + + # Merge subtree into merged_tree + # Use component name as top-level key if subtree content should be namespaced + # Otherwise, merge directly (which may cause key conflicts) + component_name = component.get("name", "") + + # Option 1: Merge directly (may cause conflicts if keys overlap) + # merged_tree.update(subtree) + + # Option 2: Use component name as wrapper (safer, preserves structure) + if component_name: + # Create a clean key name from component name + # clean_name = component_name.replace(" ", "_").replace("&", "and").lower() + merged_tree[component_name] = subtree + else: + # If no name, merge directly + merged_tree.update(subtree) + + return merged_tree + + +# ============================================================================ +# Core Refactoring Class +# ============================================================================ + + +class FeatureTreeRefactor: + """Feature tree refactorer (simplified version).""" + + def __init__(self, llm_client: LLMClient, max_iterations: int = 20): + self.llm = llm_client + self.max_iterations = max_iterations + self.logger = logging.getLogger(__name__) + self.subtree_plans = [] + + # Tracking variables + self.current_feature_tree = {} + self.total_leaf_paths = 0 # Total paths count (primary tracking metric) + self.assigned_paths_count = 0 # Assigned paths count + self.feature_tree_len = 0 + + # Iteration history + self.iteration_history = [] + + def refactor( + self, feature_tree: Dict[str, Any], repo_data: dict = None + ) -> Dict[str, Any]: + """Execute complete two-step refactoring workflow.""" + self.logger.info("=" * 70) + self.logger.info("Starting feature tree refactoring") + self.logger.info("=" * 70) + + # Initialize tracking variables + self.current_feature_tree = feature_tree + all_leaf_paths = get_all_leaf_paths(feature_tree) + self.total_leaf_paths = len(all_leaf_paths) + self.assigned_paths_count = 0 + self.feature_tree_len = len(set(pre_order_traversal_to_list(feature_tree))) + + self.logger.info( + f"Feature tree statistics: {self.total_leaf_paths} leaf paths, " + f"{self.feature_tree_len} total nodes" + ) + + # Step 1: Plan subtrees + planning = self._step1_plan_subtrees(feature_tree, repo_data) + if not planning: + return {"error": "Planning step failed", "Features": feature_tree} + + # Step 2: Organize features + components = self._step2_organize_features(feature_tree, repo_data) + if not components: + return {"error": "Organization step failed", "Features": feature_tree} + + # Build result (maintaining Features and Component key compatibility) + # Inherit descriptions from input feature tree to refactored subtrees + input_descs = get_all_leaf_descriptions(feature_tree) + if input_descs: + for comp in components: + subtree = comp.get("refactored_subtree", {}) + if subtree: + from feature_build import attach_descriptions + comp["refactored_subtree"] = attach_descriptions(subtree, input_descs) + + statistics = self._calculate_statistics(components) + + repo_name = repo_data.get("repository_name", "Unknown") + result = { + "repository_name": repo_name, + "repository_purpose": repo_data.get("repository_purpose", ""), + "repository_specification": json.dumps( + repo_data.get("repository_specification", {}), indent=2 + ), + "features": feature_tree, + "components": components, + # "components_format": convert_component_to_features(components), + "planning_result": { + "total_subtrees": planning.total_subtrees, + "subtree_plans": [p.model_dump() for p in planning.subtree_plans], + "reasoning": planning.reasoning, + }, + "statistics": statistics, + "llm_call_history": [r.to_dict() for r in self.llm.get_call_history()], + "iteration_history": self.iteration_history, + } + + # Print detailed statistics table + self._print_statistics_table(components, statistics) + + return result + + def _step1_plan_subtrees( + self, feature_tree: Dict[str, Any], repo_data: Dict[str, Any] + ) -> Optional[SubtreePlanningOutput]: + """Step 1: Plan subtree structure.""" + self.logger.info("\n" + "-" * 70) + self.logger.info("[Step 1] Planning subtree structure...") + self.logger.info("-" * 70) + + # Build user prompt + all_paths = get_all_leaf_paths(feature_tree) + feature_tree_json = json.dumps(feature_tree, indent=2) + + user_prompt = f"""## Repository Information: +{build_repo_info(repo_data)} + +## Feature Tree to Refactor: +**Total Features**: {len(all_paths)} + +```json +{feature_tree_json} +``` + +## Task: +Analyze the feature tree and design a logical organization into functional subtrees/components. +Each subtree should represent a coherent functional area or module. + +Provide your subtree planning with: +1. Appropriate number of subtrees (determined by domain analysis, not fixed rules) +2. Clear names for each subtree +3. Purpose/theme for each subtree +4. Estimated feature count for each subtree + +Consider: +- Natural domain boundaries in the system +- Functional cohesion (related features together) +- Repository purpose and domain +- Clear separation of concerns +- Quality indicators: cohesion, naming clarity, balanced sizes +""" + + # Call LLM + _, result, _ = self.llm.call_structured( + system_prompt=PROMPT_TEMPLATE_SUBTREE_PLANNING, + user_prompt=user_prompt, + response_model=SubtreePlanningOutput, + purpose="step1_planning", + ) + + if result: + self.subtree_plans = result.subtree_plans + self.logger.info(f"\n[OK] Planned {len(self.subtree_plans)} subtrees:") + for i, plan in enumerate(self.subtree_plans, 1): + self.logger.info(f" {i}. {plan.name}") + self.logger.info(f" Purpose: {plan.purpose}") + self.logger.info(f" Estimated: ~{plan.estimate_size} features") + return result + + return None + + def _step2_organize_features( + self, feature_tree: Dict[str, Any], repo_data: Dict[str, Any] + ) -> Optional[List[Dict[str, Any]]]: + """Step 2: Iteratively organize features into subtrees.""" + self.logger.info("\n" + "-" * 70) + self.logger.info("[Step 2] Organizing features (iterative)...") + self.logger.info("-" * 70) + + if not self.subtree_plans: + self.logger.error("[FAIL] No subtree plans") + return None + + # Initialize + all_paths = set(get_all_leaf_paths(feature_tree)) + remaining_paths = all_paths.copy() + + # Initialize subtree structure + subtrees = [ + { + "name": plan.name, + "purpose": plan.purpose, + # "estimate_size": plan.estimate_size, + "refactored_subtree": {}, + "actual_size": 0, + "util_percent": 0.0, + } + for plan in self.subtree_plans + ] + + # Error handling: consecutive failure count + consecutive_failures = 0 + max_consecutive_failures = 3 + + # Iterative assignment + for iteration in range(1, self.max_iterations + 1): + if not remaining_paths: + self.logger.info("\n[OK] All paths assigned") + break + + # Calculate utilization based on paths (not leaf names) + utilization = self.assigned_paths_count / self.total_leaf_paths + if utilization >= 0.99: + self.logger.info("\n[OK] Reached 99% utilization, stopping iteration") + break + + if consecutive_failures >= max_consecutive_failures: + self.logger.warning( + f"\n[FAIL] {max_consecutive_failures} consecutive iterations failed, stopping" + ) + break + + remaining_count = len(remaining_paths) + self.logger.info(f"\n>>> Iteration {iteration}/{self.max_iterations}") + self.logger.info(f" Remaining: {remaining_count} paths") + self.logger.info(f" Utilization rate: {utilization:.1%}") + # Progress estimation + if iteration > 1: + avg_assigned_per_iter = self.assigned_paths_count / (iteration - 1) + remaining_to_assign = self.total_leaf_paths - self.assigned_paths_count + estimated_iters = ( + int(remaining_to_assign / avg_assigned_per_iter) + 1 + if avg_assigned_per_iter > 0 + else self.max_iterations - iteration + ) + self.logger.info( + f" Estimated still needed: ~{estimated_iters} iterations" + ) + + # Record iteration start state + iteration_record = { + "iteration": iteration, + "remaining_paths_count": remaining_count, + "assigned_paths_before": self.assigned_paths_count, + "utilization_before": utilization, + } + + # Build current feature tree + current_tree = apply_changes({}, list(remaining_paths), inplace=False) + current_tree = convert_leaves_to_list(current_tree) + + # Build user prompt + user_prompt = self._build_organization_prompt( + repo_data, subtrees, current_tree, len(remaining_paths), utilization + ) + + # Call LLM + _, result, _ = self.llm.call_structured( + system_prompt=PROMPT_TEMPLATE_FEATURE_ORGANIZATION, + user_prompt=user_prompt, + response_model=FeatureOrganizationOutput, + purpose=f"step2_iteration_{iteration}", + ) + + if not result: + consecutive_failures += 1 + self.logger.warning( + f" [FAIL] LLM call failed (consecutive failures: {consecutive_failures})" + ) + continue + + # Process assignment results + iteration_assigned, remaining_paths = self._process_assignments( + result, subtrees, remaining_paths + ) + + if iteration_assigned > 0: + consecutive_failures = 0 # Reset failure count + self.assigned_paths_count += iteration_assigned + self.logger.info( + f" [OK] Assigned this iteration: {iteration_assigned} paths" + ) + # Update iteration record + new_utilization = self.assigned_paths_count / self.total_leaf_paths + iteration_record["assigned_this_iteration"] = iteration_assigned + iteration_record["remaining_paths_after"] = len(remaining_paths) + iteration_record["assigned_paths_after"] = self.assigned_paths_count + iteration_record["utilization_after"] = new_utilization + iteration_record["status"] = "success" + else: + consecutive_failures += 1 + self.logger.warning( + f" [FAIL] No progress (consecutive failures: {consecutive_failures})" + ) + iteration_record["assigned_this_iteration"] = 0 + iteration_record["remaining_paths_after"] = iteration_record[ + "remaining_paths_count" + ] + iteration_record["assigned_paths_after"] = iteration_record[ + "assigned_paths_before" + ] + iteration_record["utilization_after"] = iteration_record[ + "utilization_before" + ] + iteration_record["status"] = "failed" + + # Save iteration record + self.iteration_history.append(iteration_record) + + # Calculate final statistics + self._finalize_subtrees(subtrees) + + final_util = self.assigned_paths_count / self.total_leaf_paths + unassigned_count = self.total_leaf_paths - self.assigned_paths_count + self.logger.info(f"\n[OK] Organization completed ({iteration} iterations)") + self.logger.info(f" Final utilization rate: {final_util:.1%}") + self.logger.info(f" Remaining unassigned: {unassigned_count} paths") + + return subtrees + + def _build_organization_prompt( + self, + repo_data: Dict[str, Any], + subtrees: List[Dict], + current_tree: Dict, + remaining_count: int, + utilization: float, + ) -> str: + """Build prompt for organization step.""" + # Filter out estimate_size to avoid misleading LLM to "match the numbers" + # Only pass fields useful for assignment decisions + subtrees_for_prompt = [ + { + "name": s["name"], + "purpose": s["purpose"], + "actual_size": s["actual_size"], + "refactored_subtree": s["refactored_subtree"], + } + for s in subtrees + ] + subtrees_json = json.dumps(subtrees_for_prompt, indent=2) + current_tree_json = json.dumps(current_tree, indent=2) + + return f"""## Repository Information: +{build_repo_info(repo_data)} + +## Current Subtrees Status: +{subtrees_json} + +## Remaining Feature Tree ({remaining_count} paths to organize): +Current Utilization: {utilization:.1%} + +```json +{current_tree_json} +``` + +## Task +Refactor the remaining feature leaves by rebuilding their paths so they fit the repository subtrees, +the existing feature subgraphs, and the actual semantics of the repo. +The original feature tree is provided only as semantic context and must not be preserved as-is. + +Rules: +1. Operate only on leaf nodes from the remaining feature tree. +2. For each leaf, construct a new full path under the appropriate subtree. + The refactored path must have 2-8 segments joined by '/', with the leaf as the final segment. +3. The new full path for a leaf must not be identical to its original full path string + from the remaining feature tree. +4. Do not follow or mirror the original intermediate hierarchy; reorganize leaves according to + subtree purposes, repository architecture, and feature subgraph relationships. +5. Each leaf must appear exactly once across all assigned paths. +6. Do not invent, rename, or modify leaf names. + +Return only the new subtree path assignments. +""" + + def _process_assignments( + self, + result: FeatureOrganizationOutput, + subtrees: List[Dict], + remaining_paths: set, + ) -> tuple[int, set]: + """Process assignment results returned by LLM.""" + iteration_assigned = 0 + total_proposed = 0 + total_rejected_depth = 0 + total_rejected_leaf = 0 + + # Calculate remaining leaf nodes + remaining_tree = apply_changes({}, list(remaining_paths), inplace=False) + remaining_leaf_nodes_set = set(extract_leaf_nodes(remaining_tree)) + + for assignment in result.assignments: + valid_paths = [] + rejected_depth = [] + rejected_leaf = [] + + # Validate each path + for path in assignment.assigned_paths: + total_proposed += 1 + parts = path.split("/") + + # Check 1: Depth must be 2-8 segments + if len(parts) < 2 or len(parts) > 8: + rejected_depth.append(path) + total_rejected_depth += 1 + continue + + # Check 2: Leaf node must be in remaining tree (and not already assigned in this iteration) + leaf_name = parts[-1] + if leaf_name not in remaining_leaf_nodes_set: + rejected_leaf.append(path) + total_rejected_leaf += 1 + continue + + valid_paths.append(path) + # Remove from remaining set to prevent duplicate assignment within same iteration + remaining_leaf_nodes_set.discard(leaf_name) + + # Record rejection information + if rejected_depth: + self.logger.info( + f" [FAIL] Rejected {len(rejected_depth)} paths with depth errors (expected 2-8 segments)" + ) + for p in rejected_depth[:5]: + self.logger.info(f" - {p} (depth: {len(p.split('/'))})") + if len(rejected_depth) > 5: + self.logger.info(f" ... and {len(rejected_depth) - 5} more") + if rejected_leaf: + self.logger.info( + f" [FAIL] Rejected {len(rejected_leaf)} paths with non-existent/already assigned leaves" + ) + for p in rejected_leaf[:5]: + leaf = p.split("/")[-1] + self.logger.info(f" - {p} (leaf '{leaf}' not in remaining)") + if len(rejected_leaf) > 5: + self.logger.info(f" ... and {len(rejected_leaf) - 5} more") + + if not valid_paths: + continue + + # Update corresponding subtree + for subtree in subtrees: + if subtree["name"] == assignment.subtree_name: + current_paths = get_all_leaf_paths(subtree["refactored_subtree"]) + current_paths.extend(valid_paths) + subtree["refactored_subtree"] = apply_changes({}, current_paths) + subtree["refactored_subtree"] = convert_leaves_to_list( + subtree["refactored_subtree"] + ) + break + + iteration_assigned += len(valid_paths) + + self.logger.info( + f" โ†’ {assignment.subtree_name}: +{len(valid_paths)} paths" + ) + + # Output iteration statistics + if total_proposed > 0: + acceptance_rate = iteration_assigned / total_proposed * 100 + self.logger.info( + f" Iteration stats: proposed {total_proposed} paths, " + f"accepted {iteration_assigned} ({acceptance_rate:.1f}%), " + f"rejected {total_rejected_depth + total_rejected_leaf} " + f"(depth: {total_rejected_depth}, leaf: {total_rejected_leaf})" + ) + + # Recalculate remaining paths (based on leaf nodes) + if iteration_assigned > 0: + selected_feature_paths = [] + for subtree in subtrees: + sub_tree = subtree.get("refactored_subtree", {}) + if sub_tree: + parsed_leaf_nodes = extract_leaf_nodes(sub_tree) + valid_leaf_paths = find_leaf_paths_by_node( + self.current_feature_tree, target_leaf_names=parsed_leaf_nodes + ) + selected_feature_paths.extend(valid_leaf_paths) + + selected_feature_paths = list(set(selected_feature_paths)) + filter_feature_tree = remove_paths( + self.current_feature_tree, selected_feature_paths, inplace=False + ) + remaining_paths = set(get_all_leaf_paths(filter_feature_tree)) + + return iteration_assigned, remaining_paths + + def _finalize_subtrees(self, subtrees: List[Dict]) -> None: + """Calculate final subtree statistics.""" + total_leaf_paths = self.total_leaf_paths + + for subtree in subtrees: + # Use path count instead of unique leaf names for accurate statistics + subtree_paths = ( + get_all_leaf_paths(subtree["refactored_subtree"]) + if subtree["refactored_subtree"] + else [] + ) + subtree["actual_size"] = len(subtree_paths) + subtree["util_percent"] = ( + len(subtree_paths) / total_leaf_paths if total_leaf_paths > 0 else 0.0 + ) + + def _calculate_statistics(self, components: List[Dict[str, Any]]) -> Dict[str, Any]: + """Calculate statistics.""" + # Use path count for accurate statistics (handles duplicate leaf names) + total_paths = self.total_leaf_paths + # Calculate assigned paths by summing actual paths in refactored subtrees + assigned_paths = sum( + len(get_all_leaf_paths(c["refactored_subtree"])) + for c in components + if c["refactored_subtree"] + ) + + return { + "original_leaf_count": total_paths, + "assigned_leaf_count": assigned_paths, + "unassigned_leaf_count": total_paths - assigned_paths, + "coverage_rate": assigned_paths / total_paths if total_paths > 0 else 0, + "subtree_count": len(components), + } + + def _print_statistics_table( + self, components: List[Dict[str, Any]], statistics: Dict[str, Any] + ) -> None: + """Print statistics in table format.""" + print("\n" + "=" * 80) + print("REFACTORING SUMMARY") + print("=" * 80) + + # 1. Structure comparison - original vs refactored + # Count top-level categories in original feature tree using path count (consistent with statistics) + original_categories = {} + for top_category, subtree in self.current_feature_tree.items(): + # Use get_all_leaf_paths for consistent counting + leaf_count = len(get_all_leaf_paths({top_category: subtree})) + original_categories[top_category] = leaf_count + + # Sort by category name + sorted_original = sorted(original_categories.items(), key=lambda x: x[0]) + + # Sort by component name + sorted_components = sorted(components, key=lambda c: c["name"]) + + # Build comparison rows - show side by side without implying correspondence + comparison_rows = [] + max_rows = max(len(sorted_original), len(sorted_components)) + + for i in range(max_rows): + original_cat = sorted_original[i][0] if i < len(sorted_original) else "" + original_count = sorted_original[i][1] if i < len(sorted_original) else "" + + # Add arrow separator only in the middle + separator = "โ†’" if i == max_rows // 2 else "" + + component_name = ( + sorted_components[i]["name"] if i < len(sorted_components) else "" + ) + component_count = ( + sorted_components[i]["actual_size"] + if i < len(sorted_components) + else "" + ) + + comparison_rows.append( + [ + original_cat, + original_count, + separator, + component_name, + component_count, + ] + ) + + # Add total row + comparison_rows.append( + [ + "TOTAL", + sum(original_categories.values()), + "โ†’", + "TOTAL", + statistics["assigned_leaf_count"], + ] + ) + + print("\n") + print_unicode_table( + headers=["Original Category", "Count", "", "Refactored Component", "Count"], + rows=comparison_rows, + title="Feature Tree Refactored", + ) + + # 2. Iteration process + if self.iteration_history: + iteration_rows = [] + for log in self.iteration_history: + status = "[OK]" if log.get("status") == "success" else "[FAIL]" + assigned = log.get("assigned_this_iteration", 0) + # Use remaining count after iteration ends + remaining_after = log.get( + "remaining_paths_after", + log.get("remaining_paths_count", 0), + ) + progress_after = log.get( + "utilization_after", log.get("utilization_before", 0) + ) + + iteration_rows.append( + [ + log["iteration"], + status, + assigned, + remaining_after, + f"{progress_after:.1%}", + ] + ) + + print("\n") + print_unicode_table( + headers=["Iteration", "Status", "Assigned", "Remaining", "Progress"], + rows=iteration_rows, + title="Iteration Process", + ) + + print("\n" + "=" * 80) + + +# ============================================================================ +# Main Program +# ============================================================================ + + +def main(): + """Main function.""" + parser = argparse.ArgumentParser( + description="Feature Tree Refactoring Tool - Organize flat feature tree into modular components", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Example usage: + python refactor_simple.py --input feature_build.json --output refactored.json + python refactor_simple.py --input feature_build.json --max-iterations 15 + """, + ) + + parser.add_argument( + "--input", + help="Input feature tree JSON file (feature_build.json format)", + default=str(FEATURE_BUILD_FILE), + ) + parser.add_argument( + "--output", + default=str(FEATURE_TREE_FILE), + help=f"Output result file (default: {FEATURE_TREE_FILE})", + ) + parser.add_argument( + "--max-iterations", + type=int, + default=10, + help="Maximum number of iterations (default: 10)", + ) + parser.add_argument( + "--log-level", + default="INFO", + choices=["DEBUG", "INFO", "WARNING", "ERROR"], + help="Log level (default: INFO)", + ) + parser.add_argument( + "--no-trajectory", + action="store_true", + help="Disable trajectory recording", + ) + + args = parser.parse_args() + + # Configure logging + logging.basicConfig( + level=getattr(logging, args.log_level), + format="%(asctime)s - %(levelname)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + logger = logging.getLogger(__name__) + + try: + # Load input data + logger.info(f"Reading input file: {args.input}") + with open(args.input, "r", encoding="utf-8") as f: + repo_specification_data = json.load(f) + + try: + with open(args.output, "r", encoding="utf-8") as f: + output_data = json.load(f) + except Exception: + output_data = {} + + # Extract feature tree and repository information + components = output_data.get("components", []) + if len(components) == 0: + feature_tree = repo_specification_data.get("feature_tree") + else: + # Convert component list to feature_tree format + component_list = output_data.get("components", []) + feature_tree = convert_component_to_features(component_list) + logger.info( + f"Converting feature tree from existing {len(component_list)} components" + ) + + if not feature_tree: + logger.error("[FAIL] Input file missing 'feature_tree' key") + return 1 + + repo_name = repo_specification_data.get("repository_name", "Unknown") + repository_purpose = repo_specification_data.get("repository_purpose", "") + background_and_overview = repo_specification_data.get( + "background_and_overview", [] + ) + functional_requirements = repo_specification_data.get( + "functional_requirements", [] + ) + + repo_data = { + "repository_name": repo_name, + "repository_purpose": repository_purpose, + "background_and_overview": background_and_overview, + "functional_requirements": functional_requirements, + "repository_specification": repo_specification_data, + } + # Create LLM client + # Initialize trajectory + trajectory = None + step_id = None + if not args.no_trajectory: + trajectory = load_or_create_trajectory("feature_refactor") + trajectory.start(metadata={ + "input_file": args.input, + "output_file": args.output, + "max_iterations": args.max_iterations, + }) + step = trajectory.add_step("feature_refactor", "Refactor flat feature tree into modular components") + trajectory.start_step(step.step_id) + step_id = step.step_id + + llm_client = LLMClient(trajectory=trajectory, step_id=step_id) + + # Create refactorer and execute + refactor = FeatureTreeRefactor(llm_client, max_iterations=args.max_iterations) + result = refactor.refactor(feature_tree, repo_data) + + # Save results + logger.info(f"\nSaving results to: {args.output}") + with open(args.output, "w", encoding="utf-8") as f: + json.dump(result, f, indent=2, ensure_ascii=False) + + logger.info(f"\n{'=' * 70}") + logger.info("[OK] Refactoring complete!") + logger.info(f"{'=' * 70}\n") + + # Mark trajectory as complete + if trajectory: + components = result.get("components", []) + if step_id is not None: + trajectory.complete_step(step_id, { + "components_count": len(components), + }) + trajectory.complete(metadata={ + "components_count": len(components), + }) + logger.info(f"[OK] Trajectory saved to: {trajectory.trajectory_file}") + + return 0 + + except FileNotFoundError: + logger.error(f"[FAIL] Input file not found: {args.input}") + if trajectory: + if step_id is not None: + trajectory.fail_step(step_id, "Input file not found") + trajectory.fail("Input file not found") + return 1 + except json.JSONDecodeError as e: + logger.error(f"[FAIL] JSON parsing error: {e}") + if trajectory: + if step_id is not None: + trajectory.fail_step(step_id, str(e)) + trajectory.fail(str(e)) + return 1 + except Exception as e: + logger.error(f"[FAIL] Execution error: {e}", exc_info=True) + if trajectory: + if step_id is not None: + trajectory.fail_step(step_id, str(e)) + trajectory.fail(str(e)) + return 1 + + +if __name__ == "__main__": + exit(main()) diff --git a/RPG-Kit/scripts/feature_refactor_validation.py b/RPG-Kit/scripts/feature_refactor_validation.py new file mode 100644 index 0000000..9aa47ec --- /dev/null +++ b/RPG-Kit/scripts/feature_refactor_validation.py @@ -0,0 +1,222 @@ +#!/usr/bin/env python3 +"""Validate feature_build.json (input) and feature_tree.json (output) for /rpgkit.feature_refactor command. + +This script checks: +1. Input file: .rpgkit/data/feature_build.json + - File existence + - Required fields: repository_name, repository_purpose, feature_tree + - Fields must exist and not be empty + +2. Output file: .rpgkit/data/feature_tree.json + - File existence + - Fields status: repository_name, repository_purpose, feature_tree, components + +Output: +- Status messages are printed to stderr (user-friendly progress info) +- JSON result is printed to stdout (for agent parsing) + +Exit codes: +- 0: Input file is valid (output file status is informational only) +- 1: Input file has errors (missing or invalid) +""" + +import json +import sys +from pathlib import Path +from typing import Any, Dict + +from common.paths import FEATURE_BUILD_FILE, FEATURE_TREE_FILE + +# File paths +INPUT_FILE = FEATURE_BUILD_FILE +OUTPUT_FILE = FEATURE_TREE_FILE + +# Required fields for input file +INPUT_REQUIRED_FIELDS = ["repository_name", "repository_purpose", "feature_tree"] + +# Fields to check in output file +OUTPUT_CHECK_FIELDS = [ + "repository_name", + "repository_purpose", + "feature_tree", + "components", +] + + +def print_status(message: str) -> None: + """Print status message to stderr to keep stdout clean for JSON.""" + print(message, file=sys.stderr) + + +def load_json(path: Path) -> Dict[str, Any] | None: + """Load JSON file and return data if valid, None otherwise.""" + try: + with path.open("r", encoding="utf-8") as f: + data = json.load(f) + if isinstance(data, dict): + return data + except json.JSONDecodeError as e: + print_status(f" [FAIL] JSON parse error: {e}") + except Exception as e: + print_status(f" [FAIL] Failed to read file: {e}") + return None + + +def is_field_valid(data: Dict[str, Any], field: str) -> bool: + """Check if a field exists and is not empty.""" + if field not in data: + return False + + value = data[field] + + if value is None: + return False + if isinstance(value, str) and not value.strip(): + return False + if isinstance(value, list) and len(value) == 0: + return False + if isinstance(value, dict) and len(value) == 0: + return False + + return True + + +def count_feature_tree_leaves(tree: Dict[str, Any]) -> int: + """Recursively count all leaf nodes in the feature tree.""" + count = 0 + if isinstance(tree, dict): + for key, value in tree.items(): + if isinstance(value, dict): + if not value: + count += 1 + else: + count += count_feature_tree_leaves(value) + elif isinstance(value, list): + count += len(value) + else: + count += 1 + return count + + +def validate_input_file() -> Dict[str, Any]: + """Validate the input file (.rpgkit/data/feature_build.json).""" + result = { + "valid": False, + "exists": False, + "errors": [], + } + + if not INPUT_FILE.exists(): + result["errors"].append(f"Input file not found: {INPUT_FILE}") + print_status(f"[INPUT] [FAIL] {INPUT_FILE} not found") + return result + + result["exists"] = True + + data = load_json(INPUT_FILE) + if data is None: + result["errors"].append("Failed to parse JSON or file is empty") + print_status("[INPUT] [FAIL] Invalid JSON") + return result + + all_fields_valid = True + missing_fields = [] + for field in INPUT_REQUIRED_FIELDS: + if not is_field_valid(data, field): + all_fields_valid = False + missing_fields.append(field) + if field not in data: + result["errors"].append(f"Missing required field: {field}") + else: + result["errors"].append(f"Field is empty: {field}") + + if is_field_valid(data, "feature_tree"): + leaf_count = count_feature_tree_leaves(data.get("feature_tree", {})) + result["feature_tree_leaf_count"] = leaf_count + + if all_fields_valid: + result["valid"] = True + print_status( + f"[INPUT] [OK] Valid ({result.get('feature_tree_leaf_count', 0)} leaves)" + ) + else: + print_status(f"[INPUT] [FAIL] Missing: {', '.join(missing_fields)}") + + return result + + +def check_output_file() -> Dict[str, Any]: + """Check the output file (.rpgkit/data/feature_tree.json) status.""" + result = { + "exists": False, + "has_content": False, + "errors": [], + } + + if not OUTPUT_FILE.exists(): + print_status("[OUTPUT] [-] Not exists (will create)") + return result + + result["exists"] = True + + data = load_json(OUTPUT_FILE) + if data is None: + result["errors"].append("Invalid JSON or empty file") + print_status("[OUTPUT] [-] Exists but invalid JSON") + return result + + # Check if output has valid content (components field with content) + if is_field_valid(data, "components"): + result["has_content"] = True + print_status("[OUTPUT] [OK] Exists with content") + else: + print_status("[OUTPUT] [-] Exists but no valid content") + + return result + + +def main() -> None: + input_result = validate_input_file() + output_result = check_output_file() + + # Build simplified result (validation status only, no file content) + result = { + "input_file": str(INPUT_FILE), + "output_file": str(OUTPUT_FILE), + "input": { + "valid": input_result["valid"], + "exists": input_result["exists"], + "errors": input_result["errors"], + }, + "output": { + "exists": output_result["exists"], + "has_content": output_result["has_content"], + "errors": output_result["errors"], + }, + } + + if not input_result["valid"]: + result["status"] = "error" + result["message"] = "Input invalid" + result["action"] = "none" + elif output_result["exists"] and output_result["has_content"]: + result["status"] = "ready" + result["message"] = "Output exists" + result["action"] = "overwrite_or_skip" + else: + result["status"] = "ready" + result["message"] = "Ready to create" + result["action"] = "create" + + print_status(f"[RESULT] status={result['status']}, action={result['action']}") + + print(json.dumps(result, ensure_ascii=False, indent=2)) + + if result["status"] == "error": + sys.exit(1) + else: + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/RPG-Kit/scripts/feature_spec_to_json.py b/RPG-Kit/scripts/feature_spec_to_json.py new file mode 100644 index 0000000..21a0f68 --- /dev/null +++ b/RPG-Kit/scripts/feature_spec_to_json.py @@ -0,0 +1,432 @@ +#!/usr/bin/env python3 +"""Build feature specification JSON from Markdown documentation files. + +This script parses: + - feature_spec.md: Contains meta, background, NFR sections and feature tree links + - features/*.md: Contains detailed feature hierarchies + +Output: A structured JSON file with all parsed content. + +Usage: + python .rpgkit/scripts/feature_spec_to_json.py [--input-dir DIR] [--output FILE] [--no-evidence] + +Arguments: + --input-dir Directory containing feature_spec.md and features/ folder + Default: .rpgkit/data/feature_spec + --output Output JSON file path + Default: feature_spec.json in input directory + --no-evidence Exclude evidence fields from output for compact JSON +""" + +import argparse +import json +import re +import sys +from pathlib import Path +from typing import Optional + + +def parse_evidence_line(line: str) -> Optional[dict]: + """Parse an evidence reference line. + + Format: " - evidence_id | document.md Lstart-Lend". + """ + line = line.strip() + if not line.startswith("- "): + return None + + content = line[2:].strip() + + # Pattern: "evidence_id | document.md Lstart-Lend" or "evidence_id | document.md Lstart" + match = re.match(r'^([^\|]+)\s*\|\s*(\S+)\s+L(\d+)(?:-L?(\d+))?$', content) + if match: + evidence_id = match.group(1).strip() + document = match.group(2).strip() + line_start = int(match.group(3)) + line_end = int(match.group(4)) if match.group(4) else line_start + return { + "evidence_id": evidence_id, + "document_id": document, + "line_start": line_start, + "line_end": line_end + } + + return None + + +def parse_meta_section(lines: list, start_idx: int) -> tuple: + """Parse the Meta section.""" + meta = {} + i = start_idx + + while i < len(lines): + line = lines[i].strip() + + # Stop at next section + if line.startswith("## ") and not line.startswith("## Meta"): + break + + if line.startswith("- **Repository Name**:"): + meta["repository_name"] = line.split(":", 1)[1].strip() + elif line.startswith("- **Repository Purpose**:"): + meta["repository_purpose"] = line.split(":", 1)[1].strip() + elif line.startswith("- **Project Types**:"): + # Comma- or bracket-list of UPPERCASE tokens, e.g. "WEB, CLI" + # or "[WEB, CLI]" or '["WEB", "CLI"]' (JSON-style). Strip + # wrappers, split on comma, then strip stray quotes/whitespace + # from each token. validate_project_types() further filters + # against the whitelist. + raw = line.split(":", 1)[1].strip() + raw = raw.strip("[]").strip() + tokens = [] + for t in raw.split(","): + t = t.strip().strip('"').strip("'").strip() + if t: + tokens.append(t) + meta["project_types"] = tokens + elif line.startswith("- **Project Notes**:"): + meta["project_notes"] = line.split(":", 1)[1].strip() + elif line.startswith("- **Generated At**:"): + meta["generated_at"] = line.split(":", 1)[1].strip() + elif line.startswith("- **Source Documents**:"): + docs = line.split(":", 1)[1].strip() + meta["source_documents"] = [d.strip() for d in docs.split(",")] + + i += 1 + + return meta, i + + +def parse_bg_or_nfr_item(lines: list, start_idx: int, include_evidence: bool = True) -> tuple: + """Parse a single BG or NFR item.""" + i = start_idx + line = lines[i].strip() + + # Parse header: "### BG-001: Title" or "### NFR-001: Title" + match = re.match(r'^###\s+(BG|NFR)-(\d+):\s+(.+)$', line) + if not match: + return None, i + 1 + + item_type = match.group(1) + item_num = match.group(2) + title = match.group(3).strip() + item_id = f"{item_type}-{item_num}" + + item = { + "id": item_id, + "title": title, + } + if include_evidence: + item["evidence"] = [] + + i += 1 + in_evidence = False + + while i < len(lines): + line = lines[i].strip() + + # Stop at next item or section + if line.startswith("### ") or line.startswith("## "): + break + + if line.startswith("- **Description**:"): + item["description"] = line.split(":", 1)[1].strip() + elif line.startswith("- **Evidence**:"): + in_evidence = True + elif in_evidence and line.startswith("- ") and include_evidence: + evidence = parse_evidence_line(line) + if evidence: + item["evidence"].append(evidence) + elif not line.startswith("-") and line: + in_evidence = False + + i += 1 + + return item, i + + +def parse_background_section(lines: list, start_idx: int, include_evidence: bool = True) -> tuple: + """Parse the Background section.""" + backgrounds = [] + i = start_idx + + while i < len(lines): + line = lines[i].strip() + + # Stop at next major section + if line.startswith("## ") and not line.startswith("## Background"): + break + + if line.startswith("### BG-"): + item, i = parse_bg_or_nfr_item(lines, i, include_evidence) + if item: + backgrounds.append(item) + else: + i += 1 + + return backgrounds, i + + +def parse_nfr_section(lines: list, start_idx: int, include_evidence: bool = True) -> tuple: + """Parse the NFR section.""" + nfrs = [] + i = start_idx + + while i < len(lines): + line = lines[i].strip() + + # Stop at next major section (or end) + if line.startswith("## ") and not line.startswith("## NFR"): + break + + if line.startswith("### NFR-"): + item, i = parse_bg_or_nfr_item(lines, i, include_evidence) + if item: + nfrs.append(item) + else: + i += 1 + + return nfrs, i + + +def parse_feature_tree_links(lines: list, start_idx: int) -> tuple: + """Parse Feature Tree links to get feature file references.""" + links = [] + i = start_idx + + while i < len(lines): + line = lines[i].strip() + + # Stop at next section + if line.startswith("## ") and not line.startswith("## Feature Tree"): + break + + # Pattern: "- [FT-001: Title](features/FT-001.md)" + match = re.match(r'^-\s+\[([^\]]+)\]\(([^\)]+)\)$', line) + if match: + title = match.group(1) + path = match.group(2) + links.append({"title": title, "path": path}) + + i += 1 + + return links, i + + +def parse_feature_file(file_path: Path, include_evidence: bool = True) -> Optional[dict]: + """Parse a single feature file (e.g., FT-001.md).""" + if not file_path.exists(): + return None + + content = file_path.read_text(encoding="utf-8") + lines = content.split("\n") + + feature = None + stack = [] # Stack to track parent features at each level + + i = 0 + while i < len(lines): + line = lines[i] + stripped = line.strip() + + # Match feature headers at any level + # # FT-001: Title (level 1) + # ## FT-001-001: Title (level 2) + # ### FT-001-001-001: Title (level 3) + header_match = re.match(r'^(#+)\s+(FT-[\d-]+):\s+(.+)$', stripped) + + if header_match: + level = len(header_match.group(1)) + feature_id = header_match.group(2) + name = header_match.group(3).strip() + + new_feature = { + "id": feature_id, + "name": name, + "description": "", + "children": [] + } + if include_evidence: + new_feature["evidence"] = [] + + # Parse description and evidence + i += 1 + in_evidence = False + + while i < len(lines): + current = lines[i].strip() + + # Stop if we hit another header + if re.match(r'^#+\s+(FT-[\d-]+):', current): + break + + if current.startswith("- **Description**:"): + new_feature["description"] = current.split(":", 1)[1].strip() + elif current.startswith("- **Evidence**:"): + in_evidence = True + elif in_evidence and current.startswith("- ") and include_evidence: + evidence = parse_evidence_line(current) + if evidence: + new_feature["evidence"].append(evidence) + elif not current.startswith("-") and current: + in_evidence = False + + i += 1 + + # Determine where to place this feature + if level == 1: + feature = new_feature + stack = [(1, feature)] + else: + # Find parent at level - 1 + while stack and stack[-1][0] >= level: + stack.pop() + + if stack: + parent = stack[-1][1] + parent["children"].append(new_feature) + + stack.append((level, new_feature)) + else: + i += 1 + + return feature + + +def parse_feature_spec(input_dir: Path, include_evidence: bool = True) -> dict: + """Parse the complete feature specification from Markdown files.""" + spec_file = input_dir / "feature_spec.md" + + if not spec_file.exists(): + raise FileNotFoundError(f"feature_spec.md not found in {input_dir}") + + content = spec_file.read_text(encoding="utf-8") + lines = content.split("\n") + + result = { + "meta": {}, + "background_and_overview": [], + "non_functional_requirements": [], + "functional_requirements": [] + } + + i = 0 + while i < len(lines): + line = lines[i].strip() + + if line == "## Meta": + meta, i = parse_meta_section(lines, i + 1) + result["meta"] = meta + elif line == "## Background": + backgrounds, i = parse_background_section(lines, i + 1, include_evidence) + result["background_and_overview"] = backgrounds + elif line == "## NFR": + nfrs, i = parse_nfr_section(lines, i + 1, include_evidence) + result["non_functional_requirements"] = nfrs + else: + i += 1 + + # Scan features/ directory for feature files + features_dir = input_dir / "features" + if features_dir.exists(): + for feature_file in sorted(features_dir.glob("FT-*.md")): + feature = parse_feature_file(feature_file, include_evidence) + if feature: + result["functional_requirements"].append(feature) + + # Extract repository info from meta + if "repository_name" in result["meta"]: + result["repository_name"] = result["meta"].pop("repository_name") + if "repository_purpose" in result["meta"]: + result["repository_purpose"] = result["meta"].pop("repository_purpose") + + return result + + +def main(): + parser = argparse.ArgumentParser( + description="Convert Markdown feature specification to JSON format" + ) + parser.add_argument( + "--input-dir", + type=Path, + default=None, + help="Directory containing feature_spec.md and features/ folder" + ) + parser.add_argument( + "--output", + type=Path, + default=None, + help="Output JSON file path" + ) + parser.add_argument( + "--no-evidence", + action="store_true", + default=True, + help="Exclude evidence fields from output" + ) + + args = parser.parse_args() + + # Determine input directory + if args.input_dir: + input_dir = args.input_dir + else: + # Try to find .rpgkit/data/feature_spec relative to current directory + cwd = Path.cwd() + default_path = cwd / ".rpgkit" / "data" / "feature_spec" + if default_path.exists(): + input_dir = default_path + else: + # Try relative to script location + script_dir = Path(__file__).parent + input_dir = script_dir.parent / "data" / "feature_spec" + + if not input_dir.exists(): + print(f"Error: Input directory not found: {input_dir}", file=sys.stderr) + sys.exit(1) + + # Determine output file + if args.output: + output_file = args.output + else: + # Default output is in parent directory of input_dir + output_file = input_dir.parent / "feature_spec.json" + + include_evidence = not args.no_evidence + + print(f"Parsing feature specification from: {input_dir}") + print(f"Include evidence: {include_evidence}") + + try: + spec = parse_feature_spec(input_dir, include_evidence) + + # Write output + output_file.parent.mkdir(parents=True, exist_ok=True) + with open(output_file, "w", encoding="utf-8") as f: + json.dump(spec, f, indent=2, ensure_ascii=False) + + # Print summary + print(f"\nOutput written to: {output_file}") + print(f" - Repository: {spec.get('repository_name', 'N/A')}") + print(f" - Background items: {len(spec.get('background_and_overview', []))}") + print(f" - NFR items: {len(spec.get('non_functional_requirements', []))}") + print(f" - Top-level features: {len(spec.get('functional_requirements', []))}") + + # Count total feature nodes + def count_features(features: list) -> int: + count = len(features) + for f in features: + count += count_features(f.get("children", [])) + return count + + total_features = count_features(spec.get("functional_requirements", [])) + print(f" - Total feature nodes: {total_features}") + + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/RPG-Kit/scripts/func_design/__init__.py b/RPG-Kit/scripts/func_design/__init__.py new file mode 100644 index 0000000..9ab9948 --- /dev/null +++ b/RPG-Kit/scripts/func_design/__init__.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python3 +"""Func Design Module. + +This module provides the complete func_design workflow: +1. Data Flow Design - Design inter-component data flow (DAG) +2. Base Class Design - Design shared base classes and data structures +3. Interface Design - Design function/class interfaces for each file +""" + +# Data Flow +from .data_flow_agent import ( + DataFlowAgent, + LLMClient as DataFlowLLMClient, + DataFlowEdge, + DataFlowOutput, + validate_data_flow, + compute_topological_order, +) +from .data_flow_prompts import ( + DATA_FLOW_PROMPT, + DATA_FLOW_REVIEW_PROMPT, + format_functional_areas, +) + +# Base Class +from .base_class_agent import ( + BaseClassAgent, + LLMClient as BaseClassLLMClient, + extract_class_names, + validate_base_classes, + DataStructureDefinition, + extract_data_flow_types, + validate_data_structures, +) +from .base_class_prompts import ( + BASE_CLASS_PROMPT, + BASE_CLASS_REVIEW_PROMPT, +) + +# Interface +from .interface_agent import ( + InterfaceAgent, + SubtreeInterfaceAgent, + InterfaceOrchestrator, + DependencyCollector, + GlobalInterfaceRegistry, + cross_validate_imports_vs_calls, + LLMClient as InterfaceLLMClient, +) +from .interface_prompts import ( + INTERFACE_PROMPT, + PLAN_FILE_PROMPT, + SUBTREE_INTERFACE_PROMPT, + ORPHAN_REVIEW_PROMPT, +) + +# Global Interface Review +from .interface_review import ( + InterfaceReviewer, + check_call_graph_connectivity, + check_feature_dependency_coverage, + print_review_summary, + prune_orphan_interfaces, + review_orphan_units, +) + +# Unified InterfacesStore +from .interfaces_store import ( + InterfacesStore, + InterfaceUnit, + InheritanceEdge, + InvocationEdge, + ReferenceEdge, + PruneSummary, + OrphanFeature, + RPGUpdateSummary, +) + +# Unified Entry Point +from .func_designer import FuncDesigner + +__all__ = [ + # Data Flow + "DataFlowAgent", + "DataFlowLLMClient", + "DataFlowEdge", + "DataFlowOutput", + "validate_data_flow", + "compute_topological_order", + "DATA_FLOW_PROMPT", + "DATA_FLOW_REVIEW_PROMPT", + "format_functional_areas", + # Base Class + "BaseClassAgent", + "BaseClassLLMClient", + "extract_class_names", + "validate_base_classes", + "DataStructureDefinition", + "extract_data_flow_types", + "validate_data_structures", + "BASE_CLASS_PROMPT", + "BASE_CLASS_REVIEW_PROMPT", + # Interface + "InterfaceAgent", + "SubtreeInterfaceAgent", + "InterfaceOrchestrator", + "DependencyCollector", + "GlobalInterfaceRegistry", + "cross_validate_imports_vs_calls", + "InterfaceLLMClient", + "INTERFACE_PROMPT", + "PLAN_FILE_PROMPT", + "SUBTREE_INTERFACE_PROMPT", + "ORPHAN_REVIEW_PROMPT", + # Global Review + "InterfaceReviewer", + "check_call_graph_connectivity", + "check_feature_dependency_coverage", + "print_review_summary", + "prune_orphan_interfaces", + "review_orphan_units", + # InterfacesStore + "InterfacesStore", + "InterfaceUnit", + "InheritanceEdge", + "InvocationEdge", + "ReferenceEdge", + "PruneSummary", + "OrphanFeature", + "RPGUpdateSummary", + # Unified Entry Point + "FuncDesigner", +] diff --git a/RPG-Kit/scripts/func_design/base_class_agent.py b/RPG-Kit/scripts/func_design/base_class_agent.py new file mode 100644 index 0000000..e49ebd3 --- /dev/null +++ b/RPG-Kit/scripts/func_design/base_class_agent.py @@ -0,0 +1,513 @@ +#!/usr/bin/env python3 +"""Base Class Agent. + +This module provides the BaseClassAgent for designing shared base classes +and data structures for the repository. + +Key components: +- BaseClassAgent: Orchestrates base class generation with validation +- Syntax validation for generated Python code +""" + +import json +import logging +import ast +from typing import Dict, List, Optional, Tuple, Any +from pydantic import BaseModel, Field, field_validator + +from .base_class_prompts import ( + BASE_CLASS_PROMPT, + BASE_CLASS_REVIEW_PROMPT, +) + +# Import common LLMClient with trajectory support +import sys +from pathlib import Path +sys.path.insert(0, str(Path(__file__).parent.parent)) +from common import ( + LLMClient, + validate_python_syntax, + extract_class_names, +) + + +# ============================================================================ +# Data Models +# ============================================================================ + +class BaseClassDefinition(BaseModel): + """Definition of a base class or data structure.""" + file_path: str = Field(..., description="Path where this base class should be placed") + code: str = Field(..., description="Full Python code for the class") + scope: str = Field(..., description="Scope: 'global' or a specific subtree/component name") + subclasses: Dict[str, List[str]] = Field(..., description="Mapping from base class name to list of concrete subclass names (each list must have at least 2 items)") + + @field_validator('subclasses') + @classmethod + def validate_subclasses(cls, v): + if not v: + raise ValueError("subclasses must be a non-empty dict") + for base_name, sub_list in v.items(): + if not isinstance(sub_list, list) or len(sub_list) < 2: + raise ValueError(f"Base class '{base_name}' must have at least 2 subclasses") + return v + + +class DataStructureDefinition(BaseModel): + """Definition of a shared data structure derived from data flow types. + + Unlike BaseClassDefinition, these do not require subclasses. + They represent the concrete data types that flow between components + as defined in data_flow.json. + + Note: file_path is NOT assigned here. It will be assigned later by + the interface designer and written back to base_classes.json. + """ + code: str = Field(..., description="Python stub code (dataclass skeleton with fields and type annotations)") + subtree: str = Field(..., description="The functional area / subtree this data structure belongs to (must be a valid subtree name, NOT 'global')") + data_flow_types: List[str] = Field(..., min_length=1, description="Which data_flow data_type names this definition covers") + file_path: str = Field(default="", description="File path assigned later by the interface designer. Leave empty during base class design.") + + +class BaseClassOutput(BaseModel): + """Output from LLM for base class design.""" + base_classes: List[BaseClassDefinition] = Field(default_factory=list, description="List of base class definitions (may be empty for simple projects that don't need behavioral abstractions)") + data_structures: List[DataStructureDefinition] = Field(default_factory=list, description="List of data flow data structure stubs (may be empty if all data types are already covered by base classes)") + + +# ============================================================================ +# Validation Functions +# ============================================================================ + +def validate_base_classes_model( + model: "BaseClassOutput", + valid_subtrees: Optional[List[str]] = None +) -> Tuple[bool, str]: + """Validate base class definitions from Pydantic model: 1. Code has valid Python syntax 2. Scope is either 'global' or an exact match to a valid subtree name. + + Args: + model: BaseClassOutput Pydantic model + valid_subtrees: List of valid subtree/component names (from skeleton) + + Returns: (is_valid, error_message) + """ + # Build set of valid scope values + valid_scopes = {"global"} + if valid_subtrees: + for st in valid_subtrees: + valid_scopes.add(st) + + errors = [] + for i, bc in enumerate(model.base_classes): + # Validate scope value - must be exact match + if bc.scope not in valid_scopes: + valid_list = sorted(valid_scopes) + errors.append( + f"Base class {i} ({bc.file_path}): invalid scope '{bc.scope}'. " + f"Must be exactly one of: {valid_list}" + ) + continue + + # Validate Python syntax + is_valid, error_msg = validate_python_syntax(bc.code) + if not is_valid: + errors.append(f"Base class {i} ({bc.file_path}): syntax error - {error_msg}") + + if errors: + return False, "\n".join(errors) + + return True, "All base classes are valid" + + +def validate_base_classes( + base_classes: List[Dict[str, Any]], + valid_subtrees: Optional[List[str]] = None +) -> Tuple[bool, str]: + """Validate base class definitions: 1. Each has file_path, code, and scope 2. Code has valid Python syntax 3. Scope is either 'global' or an exact match to a valid subtree name. + + Args: + base_classes: List of base class definitions + valid_subtrees: List of valid subtree/component names (from skeleton) + + Returns: (is_valid, error_message) + """ + if not base_classes: + return False, "Empty base classes provided" + + # Build set of valid scope values + valid_scopes = {"global"} + if valid_subtrees: + for st in valid_subtrees: + valid_scopes.add(st) + + errors = [] + for i, bc in enumerate(base_classes): + file_path = bc.get("file_path", "") + code = bc.get("code", "") + scope = bc.get("scope", "") + subclasses = bc.get("subclasses", {}) + + if not file_path: + errors.append(f"Base class {i}: missing file_path") + continue + + # Validate subclasses - must be a dict with each base class having at least 2 subclasses + if not isinstance(subclasses, dict) or not subclasses: + errors.append(f"Base class {i} ({file_path}): 'subclasses' must be a non-empty dict mapping base class names to subclass lists") + continue + + for base_name, sub_list in subclasses.items(): + if not isinstance(sub_list, list) or len(sub_list) < 2: + errors.append(f"Base class {i} ({file_path}): base class '{base_name}' must have at least 2 subclasses, got {len(sub_list) if isinstance(sub_list, list) else 0}") + break + + if not code: + errors.append(f"Base class {i} ({file_path}): missing code") + continue + + # # Validate that subclasses keys cover all class definitions in code + # defined_classes = set(extract_class_names(code)) + # declared_bases = set(subclasses.keys()) + # missing_bases = defined_classes - declared_bases + # if missing_bases: + # errors.append(f"Base class {i} ({file_path}): 'subclasses' is missing entries for: {sorted(missing_bases)}") + # continue + + if not scope: + errors.append(f"Base class {i} ({file_path}): missing scope (should be 'global' or a subtree name)") + continue + + # Validate scope value - must be exact match + if scope not in valid_scopes: + valid_list = sorted(valid_scopes) + errors.append( + f"Base class {i} ({file_path}): invalid scope '{scope}'. " + f"Must be exactly one of: {valid_list}" + ) + continue + + is_valid, error_msg = validate_python_syntax(code) + if not is_valid: + errors.append(f"Base class {i} ({file_path}): syntax error - {error_msg}") + + if errors: + return False, "\n".join(errors) + + return True, "All base classes are valid" + + +# ============================================================================ +# Data Flow Type Extraction +# ============================================================================ + +def extract_data_flow_types(data_flow: List[Dict[str, Any]]) -> List[str]: + """Extract unique data_type values from data flow edges. + + Args: + data_flow: List of data flow edge dicts + + Returns: + Sorted list of unique data_type strings + """ + types = set() + for edge in data_flow: + dt = edge.get("data_type", "").strip() + if dt: + types.add(dt) + return sorted(types) + + +def validate_data_structures( + data_structures: List[Dict[str, Any]], + data_flow_types: List[str], + valid_subtrees: Optional[List[str]] = None +) -> Tuple[bool, str]: + """Validate data structure definitions: 1. Each has code, subtree, and data_flow_types 2. Code has valid Python syntax 3. Subtree is a valid subtree name (NOT 'global'). + + Note: file_path is NOT validated here โ€” it is assigned later by the + interface designer. + + Args: + data_structures: List of data structure definitions + data_flow_types: All unique data_type values from data flow + valid_subtrees: List of valid subtree names + + Returns: (is_valid, error_message) + """ + # Build set of valid subtree values (no 'global' for data structures) + valid_subtree_set = set() + if valid_subtrees: + for st in valid_subtrees: + valid_subtree_set.add(st) + + errors = [] + covered_types = set() + + for i, ds in enumerate(data_structures): + code = ds.get("code", "") + subtree = ds.get("subtree", "") + ds_types = ds.get("data_flow_types", []) + + if not code: + errors.append(f"Data structure {i}: missing code") + continue + + if not subtree: + errors.append(f"Data structure {i}: missing subtree") + continue + + if subtree.lower() == "global": + errors.append( + f"Data structure {i}: subtree cannot be 'global'. " + f"Data structures must belong to a specific subtree." + ) + continue + + if valid_subtree_set and subtree not in valid_subtree_set: + valid_list = sorted(valid_subtree_set) + errors.append( + f"Data structure {i}: invalid subtree '{subtree}'. " + f"Must be exactly one of: {valid_list}" + ) + continue + + if not ds_types: + errors.append(f"Data structure {i}: data_flow_types must not be empty") + continue + + # Validate Python syntax + is_valid, error_msg = validate_python_syntax(code) + if not is_valid: + errors.append(f"Data structure {i} (subtree={subtree}): syntax error - {error_msg}") + + covered_types.update(ds_types) + + if errors: + return False, "\n".join(errors) + + return True, "All data structures are valid" + + +# ============================================================================ +# Base Class Agent +# ============================================================================ + +class BaseClassAgent: + """Agent for designing shared base classes and data structures.""" + + def __init__( + self, + llm_client: Optional[LLMClient] = None, + max_iterations: int = 5, + logger: Optional[logging.Logger] = None, + trajectory: Optional[Any] = None, + step_id: Optional[int] = None + ): + # Create LLMClient with trajectory support if not provided + if llm_client is None: + self.llm = LLMClient(trajectory=trajectory, step_id=step_id) + else: + self.llm = llm_client + # Update trajectory info on existing client + if trajectory is not None: + self.llm.set_trajectory(trajectory, step_id) + self.max_iterations = max_iterations + self.logger = logger or logging.getLogger(__name__) + + def design_base_classes( + self, + repo_name: str, + repo_info: str, + data_flow: List[Dict[str, Any]], + skeleton_tree: str, + functional_areas: List[str], + functional_areas_overview: str = "", + project_background: str = "", + ) -> Dict[str, Any]: + """Design base classes for the repository. + + Args: + repo_name: Repository name + repo_info: Repository description + data_flow: Data flow edges from previous step + skeleton_tree: Tree string of skeleton + functional_areas: List of functional area names + functional_areas_overview: Hierarchical overview of functional areas with sub-features + project_background: Project background/technology context from feature_spec + + Returns: + Dict containing: + - base_classes: List of base class definitions + - success: Whether the operation succeeded + """ + self.logger.info(f"[BaseClassAgent] Designing base classes for {repo_name}") + + # Build system prompt (tool description is now integrated) + system_prompt = BASE_CLASS_PROMPT + + # Extract unique data_type values from data flow (for post-validation) + data_flow_type_names = extract_data_flow_types(data_flow) + + # Format data flow as raw JSON for full context + data_flow_json_str = json.dumps(data_flow, indent=2, ensure_ascii=False) + + # Use hierarchical overview if available, otherwise fall back to flat list + if functional_areas_overview: + areas_section = functional_areas_overview + else: + areas_section = "Functional Areas: " + ", ".join(functional_areas) + + # Build user prompt + # Include project background when available โ€” gives the LLM context + # about technology stack so it can design framework-appropriate base classes. + technology_section = "" + if project_background and project_background.strip(): + technology_section = f""" +{project_background} +When the project specifies a concrete technology stack (framework, database, etc.), +design base classes that are idiomatic for those technologies rather than purely +abstract. For example, if the project uses Flask, prefer Flask Blueprint patterns +over generic abstract request handlers. If no specific technology is mentioned, +use abstract base classes (ABC). +""" + + user_prompt = f"""Based on the repository structure and data flow, generate base class definitions: +Repository Name: {repo_name} +Repository Info: {repo_info} +{technology_section} +Repository Skeleton: +{skeleton_tree} + +Functional Areas Overview: +{areas_section} + +(Use the exact top-level component names above as scope/subtree values, NOT directory paths.) + +Data Flow (JSON): +{data_flow_json_str} + +Please use the generate_base_classes tool to create base class definitions and data structure stubs. + +Focus on: +1. Shared behavioral abstractions (base classes with abstract methods) +2. Common data structures that flow between components +3. Keep it minimal - only create abstractions that will be reused by multiple components +4. Use dataclasses for data structures, ABC for behavioral abstractions + +Additionally, for data_structures: +- Data flow types that are generic enough to serve as base classes (with subclasses) should go into base_classes, not data_structures +- The remaining data flow types that are NOT absorbed by base classes should be defined as data_structures +- Use @dataclass with explicit fields, type annotations, and docstrings +- These are stubs (skeleton code) โ€” they will be fully implemented later +- Each data structure must belong to a specific subtree (not global) +- Do NOT specify file_path โ€” it will be assigned by the interface designer later""" + + # Iterate until valid or max iterations + last_error = "" + + for iteration in range(self.max_iterations): + self.logger.info(f"[BaseClassAgent] Iteration {iteration + 1}/{self.max_iterations}") + + # Build prompt with error feedback if needed + current_user_prompt = user_prompt + if last_error: + current_user_prompt += f"\n\n[Validation Failed]\nError: {last_error}\nPlease fix the issues and try again." + + # Call LLM with Pydantic validation + _, result_model, _ = self.llm.call_structured( + system_prompt=system_prompt, + user_prompt=current_user_prompt, + response_model=BaseClassOutput, + purpose=f"base_class_design_{iteration + 1}", + max_retries=1 # Handle retries at this level + ) + + if not result_model: + last_error = "Failed to parse LLM response or Pydantic validation failed." + continue + + # Convert to dict list for custom validation + base_classes = [bc.model_dump() for bc in result_model.base_classes] + data_structures = [ds.model_dump() for ds in result_model.data_structures] + + # Custom validation (scope and syntax) for base classes + is_valid, error_msg = validate_base_classes_model(result_model, valid_subtrees=functional_areas) + + if not is_valid: + self.logger.warning(f"[BaseClassAgent] Base class validation failed: {error_msg}") + last_error = error_msg + continue + + # Validate data structures + ds_valid, ds_error = validate_data_structures( + data_structures, data_flow_type_names, valid_subtrees=functional_areas + ) + + if not ds_valid: + self.logger.warning(f"[BaseClassAgent] Data structure validation failed: {ds_error}") + last_error = ds_error + continue + + # Extract class names for logging + all_classes = [] + for bc in base_classes: + class_names = extract_class_names(bc.get("code", "")) + all_classes.extend(class_names) + + # Extract data structure class names + ds_class_names = [] + for ds in data_structures: + class_names = extract_class_names(ds.get("code", "")) + ds_class_names.extend(class_names) + + # Check data_flow_type coverage (base_classes code may also cover some types) + bc_class_set = set(all_classes) + ds_covered_types = set() + for ds in data_structures: + ds_covered_types.update(ds.get("data_flow_types", [])) + uncovered = set(data_flow_type_names) - ds_covered_types - bc_class_set + + self.logger.info( + f"[BaseClassAgent] Validated: {len(base_classes)} base classes, " + f"{len(data_structures)} data structures, " + f"{len(uncovered)} uncovered data flow types" + ) + if uncovered: + self.logger.warning(f"[BaseClassAgent] Uncovered data flow types: {sorted(uncovered)}") + + return { + "base_classes": base_classes, + "data_structures": data_structures, + "class_names": all_classes, + "data_structure_names": ds_class_names, + "uncovered_data_flow_types": sorted(uncovered), + "success": True, + "iterations": iteration + 1 + } + + # Failed after all iterations + self.logger.error(f"[BaseClassAgent] Failed after {self.max_iterations} iterations") + return { + "base_classes": [], + "data_structures": [], + "success": False, + "error": last_error, + "iterations": self.max_iterations + } + + +if __name__ == "__main__": + # Test + logging.basicConfig(level=logging.INFO) + + agent = BaseClassAgent() + result = agent.design_base_classes( + repo_name="test-repo", + repo_info="A test repository", + data_flow=[ + {"source": "A", "target": "B", "data_type": "Data"} + ], + skeleton_tree="src/\n module/\n file.py", + functional_areas=["A", "B"] + ) + print(json.dumps(result, indent=2)) diff --git a/RPG-Kit/scripts/func_design/base_class_prompts.py b/RPG-Kit/scripts/func_design/base_class_prompts.py new file mode 100644 index 0000000..b8fb4b1 --- /dev/null +++ b/RPG-Kit/scripts/func_design/base_class_prompts.py @@ -0,0 +1,181 @@ +#!/usr/bin/env python3 +"""Base Class Design Prompts. + +This module contains prompts for base class design stage. +""" + +# ============================================================================ +# Base Class Design Prompts +# ============================================================================ + +BASE_CLASS_PROMPT = """ +You are an expert software engineer designing reusable abstractions and shared data structures for a Python codebase. + +Your objective is to introduce only the minimum necessary set of well-justified base classes and shared data structures โ€” enough to improve modularity and consistency, but not so many that the system becomes rigid or over-engineered. + +The goal is pragmatic, balanced design. + +## Core Constraints +1. Define shared data structures that unify scattered, inconsistent data representations across modules. +2. Prefer explicit, typed, schema-aware containers with clearly defined fields and metadata. +3. Avoid unnecessary wrappers around third-party types โ€” only abstract when it adds real value. +4. Introduce a base class only when you can name at least 2 concrete modules that will use it. List them explicitly in your reasoning. + +You may introduce two kinds of components: + +## 1. Functional Base Class (behavioral abstraction) +Purpose: +Establish shared behavior or lifecycle across multiple modules using inheritance and polymorphism. + +Requirements: +- Represent a clearly defined behavioral contract. +- Consist mainly of abstract methods or method stubs. +- Avoid complex business logic or internal state. +- Define recognizable lifecycle patterns such as: run, validate, transform, execute. + +Design Guidelines: +- Avoid speculative abstractions created "just in case". +- Typically, one to three base classes for the entire system is sufficient unless there is strong justification. + +## 2. Global Data Structure (shared data format) +Purpose: +Provide standardized data containers that flow across subtrees and pipeline components. + +Requirements: +- Should be fully implemented (for example, dataclasses). +- Must use explicit fields with type annotations and meaningful docstrings. +- Represent real semantic units, not generic catch-all containers. + +Design Guidelines: +- Keep them primarily structural with only light validation logic. +- Avoid embedding algorithms or business workflows inside data objects. +- Merge aggressively: prefer fewer, well-defined shared structures over many narrowly scoped ones. + +## 3. Data Flow Data Structure (data flow type stubs) +Purpose: +Some `data_type` labels from the data flow graph may be generic enough to be modeled as base classes (with subclasses). Those should go into `base_classes` above. The **remaining** data flow types โ€” those that are concrete, self-contained data containers โ€” should be defined here as data structure stubs. These stubs ensure design continuity and will be fully implemented during later code generation batches. + +Requirements: +- Should be `@dataclass` stubs with explicit fields, type annotations, and docstrings. +- Fields should be inferred from the data flow context (source, target, transformation descriptions). +- Mark fields with reasonable defaults or `None` where the full implementation is not yet known. +- These are **stubs** โ€” they will be fully implemented later. Keep them minimal but structurally correct. +- Each data structure must belong to a specific subtree (functional area), **NOT** "global". +- Do **NOT** specify `file_path` โ€” it will be assigned by the interface designer in the next step. + +Design Guidelines: +- Do NOT duplicate types that are already defined as base classes. +- If a data_type is generic enough to be a base class (with subclasses), put it in base_classes instead. +- Together, base_classes and data_structures should ideally cover all `data_type` labels from the data flow, but the split is a design judgment โ€” prioritize correctness over forced coverage. + +## Scope Specification +For each base class or data structure, you must explicitly assign one of the following scopes: +- "global": Fundamental base classes at repository root level (L0). Use this only for cross-cutting concerns that are universally applicable and have no dependencies on L1 modules. +- "": Module-local abstractions at subtree/functional area level (L1). Use this for types that define a module's core logic or data. Although other modules may import these, the "source of truth" and all subclasses must stay within this subtree. + +CRITICAL: must be exactly one of the functional area names listed in the "Functional Areas" list โ€” **NOT** a directory path or folder name. For example, if the functional area is "data_processing", the scope is "data_processing", not "src/data_processing" or "data_processing/". + +## General Principle +Favor "just enough abstraction": +Introduce the smallest number of base classes and shared data formats that make the system clearer, safer, and easier to extend โ€” but never add layers that do not have concrete, immediate purpose. + +## Output Format +Your response must contain exactly one block and exactly one block, with no other content outside these two blocks: + +Your internal reasoning and drafts โ€” this is scratch space for evaluating tradeoffs, alternatives, and incremental refinements. + + +{{ + "base_classes": [ + {{ + "file_path": "Path to the Python file where the base class code should live (string).", + "code": "Full Python source code for that file, including base class definitions (string).", + "scope": "'global' for repository-wide (L0) base class, or a specific subtree/functional area name (**NOT** directory name) for module-level (L1) base class (string, required).", + "subclasses": "Mapping from each base class name to its concrete subclass names (object, required). Example: {\"BaseNode\": [\"ItemNode\", \"FunctionNode\"], \"BaseConfig\": [\"RunConfig\", \"TestConfig\"]}. Each base class must have at least 2 subclasses." + }} + ], + "data_structures": [ + {{ + "code": "Python stub code: @dataclass skeleton with fields, type annotations, and docstrings (string).", + "subtree": "The functional area / subtree name this data structure belongs to (string, required). Must be one of the Functional Areas listed in the prompt. Do NOT use 'global'.", + "data_flow_types": "List of data_type names from the data flow that this definition covers (list of strings, required, at least 1). Example: [\"ParsedExpression\", \"TokenList\"]", + "file_path": "Path to the Python file where this data structure stub should live (string, optional). If not provided, the interface designer will assign it during integration." + }} + ] +}} + + +Constraints: +- Each base class must have at least 2 subclasses listed. +- data_structures subtree must be one of the Functional Areas listed in the prompt. +- data_structures file_path is optional; if not provided, the interface designer will assign it. +""" + +BASE_CLASS_REVIEW_PROMPT = """ +You are a senior software architect reviewing a set of functional base classes and global shared data structures for a Python repo. +These abstractions are foundational contracts for future modules and subtrees. + +Core constraint: +- The goal is to define custom shared data structures that replace or unify pandas-style tabular formats. +- Do not recommend or mimic pandas.DataFrame or other third-party tabular types. +- Prefer explicit, typed, schema-aware containers with clear fields and metadata. + +You must judge both what is present and what is missing. + +## Review Perspective + +You are reviewing from the perspective of a **repository maintainer** and **architecture owner**. Your review should prioritize: + +- Clarity and maintainability of the abstraction layer +- Whether reuse is **real and demonstrated**, not hypothetical +- Whether the abstraction scope is **too wide, too narrow, or just right** +- Whether any **obvious responsibilities or data formats have been omitted** +- Correct placement of abstractions: global (`General`) vs local (per-subtree) + +## Review Criteria +1) Design Quality +- Are the classes conceptually clean, internally consistent, and easy to reason about? +- Do they reflect real responsibilities instead of accidental structure or one-off needs? +- Are concerns separated appropriately (no mixing of unrelated roles)? +2) Reusability +- Can the base classes and data structures be meaningfully reused in at least two modules or subtrees? +- Do they actually reduce duplication and simplify implementations? +- Are there clear opportunities for reuse that were missed? +3) Abstraction Level +- Is the abstraction at the right level (not just a thin wrapper, not an over-general "god-interface")? +- Is it concrete enough to be practical, but general enough to be stable over time? +- Are there abstractions that are too speculative or too tightly coupled to a single use case? +4) Interface Clarity +- Is the intended role of each base class or data structure clear from its name, API, and docstring? +- Do methods have understandable signatures and concise docstrings (intent, args, returns)? +- Is it easy for a new contributor to know how to implement or use the abstraction correctly? + +## Output Format +Return **only** a valid JSON object in the following format: +{ + "review": { + "Design Quality": { + "feedback": "", + "pass": true/false + }, + "Reusability": { + "feedback": "", + "pass": true/false + }, + "Abstraction Level": { + "feedback": "", + "pass": true/false + }, + "Interface Clarity": { + "feedback": "", + "pass": true/false + } + }, + "final_pass": true/false +} + +Rules: +- `final_pass` should be `true` only if all four dimensions pass, or if remaining issues are minor and easily fixable. +- All `feedback` fields must provide concrete, actionable guidance. +- Do not add new fields or categories beyond the four listed. +""" diff --git a/RPG-Kit/scripts/func_design/data_flow_agent.py b/RPG-Kit/scripts/func_design/data_flow_agent.py new file mode 100644 index 0000000..8c45973 --- /dev/null +++ b/RPG-Kit/scripts/func_design/data_flow_agent.py @@ -0,0 +1,322 @@ +#!/usr/bin/env python3 +"""Data Flow Agent. + +This module provides the DataFlowAgent for designing inter-component data flow +as a directed acyclic graph (DAG). + +Key components: +- DataFlowAgent: Orchestrates data flow generation with validation +- Validation functions for DAG properties +""" + +import json +import logging +from typing import Dict, List, Optional, Tuple, Any +from collections import defaultdict, deque +from pydantic import BaseModel, Field + +from .data_flow_prompts import ( + DATA_FLOW_PROMPT, + DATA_FLOW_REVIEW_PROMPT, + format_functional_areas +) + +# Import common LLMClient with trajectory support +import sys +from pathlib import Path +sys.path.insert(0, str(Path(__file__).parent.parent)) +from common import LLMClient + + +# ============================================================================ +# Data Models +# ============================================================================ + +class DataFlowEdge(BaseModel): + """Single data flow edge between components.""" + source: str = Field(..., description="Name of the source component that produces this data") + target: str = Field(..., description="Name of the target component that consumes this data") + data_id: str = Field(..., description="Unique identifier for this data exchange") + data_type: str = Field(..., description="Logical type or schema of the data") + transformation: str = Field(default="", description="Description of how data is processed during transfer") + + +class DataFlowOutput(BaseModel): + """Output from LLM for data flow design.""" + data_flow: List[DataFlowEdge] = Field(..., min_length=1, description="List of data flow edges (must not be empty)") + + +# ============================================================================ +# Validation Functions +# ============================================================================ + +def validate_data_flow( + data_flow: List[Dict[str, Any]], + required_components: List[str] +) -> Tuple[bool, str]: + """Validate data flow graph: 1. All source/target are valid components 2. No self-loops 3. No cycles (must be a DAG) 4. All components are covered (appear at least once). + + Returns: (is_valid, error_message_or_success) + """ + errors = [] + component_set = set(required_components) + used_components = set() + + # Build graph for cycle detection + graph = defaultdict(list) + edge_indices: Dict[Tuple[str, str], List[int]] = defaultdict(list) + + for i, edge in enumerate(data_flow): + source = edge.get("source", "") + target = edge.get("target", "") + + # Check source validity + if not source: + errors.append(f"Item {i}: 'source' is missing.") + continue + if source not in component_set: + errors.append(f"Item {i}: 'source' node '{source}' is not in required_keys.") + + # Check target validity + if not target: + errors.append(f"Item {i}: 'target' is missing.") + continue + if target not in component_set: + errors.append(f"Item {i}: 'target' node '{target}' is not in required_keys.") + + # Check self-loop + if source == target: + errors.append(f"Item {i}: self-loop detected ({source} -> {source})") + + # Add to graph + if source and target and source != target: + graph[source].append(target) + used_components.add(source) + used_components.add(target) + edge_indices[(source, target)].append(i) + + # Check for unused components + unused = component_set - used_components + if unused: + errors.append( + f"Unused nodes from required_keys (i.e., no data flow defined): {sorted(unused)}" + ) + + # Check for cycles using DFS if no basic errors + if not errors: + visited = set() + rec_stack = set() + cycle_found = False + cycle_path = [] + + def has_cycle(node: str, path: List[str]) -> bool: + nonlocal cycle_found, cycle_path + visited.add(node) + rec_stack.add(node) + + for neighbor in graph.get(node, []): + if neighbor not in visited: + if has_cycle(neighbor, path + [neighbor]): + return True + elif neighbor in rec_stack: + cycle_path = path + [neighbor] + cycle_found = True + return True + + rec_stack.remove(node) + return False + + for node in list(graph.keys()): + if node not in visited: + if has_cycle(node, [node]): + break + + if cycle_found: + errors.append(f"Cycle detected in data flow: {' -> '.join(cycle_path)}") + + if errors: + return False, "\n".join(errors) + return True, "All data flow checks passed." + + +def compute_topological_order(data_flow: List[Dict[str, Any]]) -> List[str]: + """Compute topological order of components based on data flow. + + Returns components in dependency order (sources before targets). + """ + graph = defaultdict(list) + in_degree = defaultdict(int) + all_nodes = set() + + for edge in data_flow: + source = edge.get("source", "") + target = edge.get("target", "") + if source and target: + graph[source].append(target) + in_degree[target] += 1 + all_nodes.add(source) + all_nodes.add(target) + + # Initialize in_degree for all nodes + for node in all_nodes: + if node not in in_degree: + in_degree[node] = 0 + + # Kahn's algorithm + queue = deque([n for n in all_nodes if in_degree[n] == 0]) + result = [] + + while queue: + node = queue.popleft() + result.append(node) + for neighbor in graph.get(node, []): + in_degree[neighbor] -= 1 + if in_degree[neighbor] == 0: + queue.append(neighbor) + + # Handle remaining nodes (cycle case - should not happen after validation) + remaining = [n for n in all_nodes if n not in result] + result.extend(remaining) + + return result + + +# ============================================================================ +# Data Flow Agent +# ============================================================================ + +class DataFlowAgent: + """Agent for designing inter-component data flow.""" + + def __init__( + self, + llm_client: Optional[LLMClient] = None, + max_iterations: int = 5, + logger: Optional[logging.Logger] = None, + trajectory: Optional[Any] = None, + step_id: Optional[int] = None + ): + # Create LLMClient with trajectory support if not provided + if llm_client is None: + self.llm = LLMClient(trajectory=trajectory, step_id=step_id) + else: + self.llm = llm_client + # Update trajectory info on existing client + if trajectory is not None: + self.llm.set_trajectory(trajectory, step_id) + self.max_iterations = max_iterations + self.logger = logger or logging.getLogger(__name__) + + def build_data_flow( + self, + repo_name: str, + repo_info: str, + functional_areas: List[str], + component_dirs: Optional[Dict[str, str]] = None, + skeleton_tree: Optional[str] = None + ) -> Dict[str, Any]: + """Build data flow between functional areas. + + Args: + repo_name: Repository name + repo_info: Repository description + functional_areas: List of functional area names (components) + component_dirs: Optional mapping of component to directory + skeleton_tree: Optional tree string of skeleton + + Returns: + Dict containing: + - data_flow: List of data flow edges + - subtree_order: Topological order of components + - success: Whether the operation succeeded + """ + self.logger.info(f"[DataFlowAgent] Building data flow for {len(functional_areas)} components") + + # Build system prompt (tool description is now integrated) + system_prompt = DATA_FLOW_PROMPT + + # Build user prompt + areas_str = format_functional_areas(functional_areas, component_dirs) + + user_prompt = f"""[Step 1 User Query]: Based on the repository structure and dependency relationships, generate data flow between components: +Repository Name: {repo_name} +Repository Info: {repo_info} +Functional Graph Overview: {areas_str} +Component Names: {', '.join(functional_areas)} +Please use the generate_data_flow tool to create comprehensive data flow definitions. +Focus on: +1. What data flows between components +2. Data types and formats +3. Any transformations applied +4. Direction of data flow""" + + if skeleton_tree: + user_prompt += f"\n\nRepository Skeleton:\n{skeleton_tree}" + + # Iterate until valid or max iterations + last_error = "" + + for iteration in range(self.max_iterations): + self.logger.info(f"[DataFlowAgent] Iteration {iteration + 1}/{self.max_iterations}") + + # Build prompt with error feedback if needed + current_user_prompt = user_prompt + if last_error: + current_user_prompt += f"\n\n[Validation Failed]\nError: {last_error}\nPlease fix the issues and try again." + + # Call LLM with Pydantic validation + _, result_model, _ = self.llm.call_structured( + system_prompt=system_prompt, + user_prompt=current_user_prompt, + response_model=DataFlowOutput, + purpose=f"data_flow_design_{iteration + 1}", + max_retries=1 # Handle retries at this level + ) + + if not result_model: + last_error = "Failed to parse LLM response or Pydantic validation failed." + continue + + # Convert to dict list for custom validation + data_flow = [edge.model_dump() for edge in result_model.data_flow] + + # Custom DAG validation + is_valid, error_msg = validate_data_flow(data_flow, functional_areas) + + if is_valid: + # Compute subtree order + subtree_order = compute_topological_order(data_flow) + + self.logger.info(f"[DataFlowAgent] Data flow validated successfully with {len(data_flow)} edges") + return { + "data_flow": data_flow, + "subtree_order": subtree_order, + "success": True, + "iterations": iteration + 1 + } + else: + self.logger.warning(f"[DataFlowAgent] Validation failed: {error_msg}") + last_error = error_msg + + # Failed after all iterations + self.logger.error(f"[DataFlowAgent] Failed after {self.max_iterations} iterations") + return { + "data_flow": [], + "subtree_order": [], + "success": False, + "error": last_error, + "iterations": self.max_iterations + } + + +if __name__ == "__main__": + # Test + logging.basicConfig(level=logging.INFO) + + agent = DataFlowAgent() + result = agent.build_data_flow( + repo_name="test-repo", + repo_info="A test repository", + functional_areas=["ComponentA", "ComponentB", "ComponentC"] + ) + print(json.dumps(result, indent=2)) diff --git a/RPG-Kit/scripts/func_design/data_flow_prompts.py b/RPG-Kit/scripts/func_design/data_flow_prompts.py new file mode 100644 index 0000000..3c9b926 --- /dev/null +++ b/RPG-Kit/scripts/func_design/data_flow_prompts.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +"""Data Flow Design Prompts. + +This module contains prompts for data flow design stage. +""" + +# ============================================================================ +# Data Flow Design Prompts +# ============================================================================ + +DATA_FLOW_PROMPT = """ +You are a system architect designing the **inter-subtree data flow** for a Python repository. + +Your goal is to describe **how data moves** between functional subtrees as a **directed acyclic graph (DAG)** of edges. +Each edge represents one data object passed from one subtree to another. + +## Hard Constraint: The Graph MUST Be Acyclic +- The final data flow **must be a strict DAG**: + There must be **no path** that starts from a subtree and eventually returns to the **same** subtree via one or more edges. +- In particular, you **must not** create: + - Direct 2-node cycles, e.g. `A โ†’ B` and `B โ†’ A` + - Longer cycles, e.g. `A โ†’ B โ†’ C โ†’ A` + - Any chain that, when followed, returns to an earlier subtree. +- If the natural design seems to require feedback or iteration, you **must instead**: + - Introduce explicit, one-directional stages (e.g. `Trainer โ†’ MetricsCollector โ†’ Reporting`), or + - Model the feedback as a new, downstream component instead of sending data "backwards". +- Before producing output, mentally verify that your proposed `data_flow` is a DAG with **no cycles of any length**. + +## Data Flow Guidelines +- Treat each edge as a meaningful data handoff between two **distinct** subtrees (no self-loops like `A โ†’ A`). +- Every subtree defined in the system must appear **at least once** as a producer or consumer. +- Reuse logical data types across edges when they represent the same structure. +- Prefer explicit, schema-aware data descriptions; do not use pandas-style tabular types. +- Ensure naming is consistent and domain-aware. + +## Output Format +Your response must contain exactly one block and exactly one block: + + +Architectural scratch work: +- Enumerate the subtrees and their responsibilities. +- Propose candidate edges and check whether they introduce any cycles. +- If you detect a potential cycle, refactor until the graph is acyclic. + + +{{ + "data_flow": [ + {{ + "source": "source_subtree_name", + "target": "target_subtree_name", + "data_id": "unique name or description of the data exchange", + "data_type": "logical type or structure of the data (e.g., 'FeatureBatch', 'InferenceResult')", + "transformation": "1โ€“2 sentences describing how the data is processed / validated / serialized / enriched during this transfer" + }} + ] +}} + + +Constraints: +- source != target (no self-loops) +- No cycles of any length in the overall graph +- Every required subtree should appear at least once as producer or consumer +- transformation must not be empty +""".strip() + + +DATA_FLOW_REVIEW_PROMPT = """ +You are reviewing the cross-subsystem data architecture of the repository. + +The submitted data flow graph defines how subtrees collaborate, what data contracts they expose, and how responsibilities are split. +If this graph is incorrect, vague, or overcomplicated, the entire system will suffer from tight coupling and unclear interfaces. + +Review this as a strategic decision about how information moves across architectural boundaries. + +## Constraints +- Every subtree must appear at least once as a producer ("source") or consumer ("target"). +- The graph must be a Directed Acyclic Graph (no cycles, no self-loops). +- Data edges should be semantically plausible (realistic producer โ†’ consumer relationships). +- Prefer clear, reusable data types over ad-hoc labels; avoid vague types like "object" or "any". + +## Review Dimensions +1. Data Integrity + - Are data types and contracts consistent and believable across edges? + - Are there obvious type mismatches or broken assumptions between producer and consumer? +2. Flow Logic + - Do the directions of edges make sense given each subtree's role? + - Is the graph acyclic and free of self-loops and obviously redundant or unjustified flows? +3. Transformation Clarity + - Is it clear what happens to data at each hop (transformation field)? + - Do transformations align with the roles of the involved subtrees, or are they vague/hand-wavy? +4. Coverage + - Are all subtrees from {trees_names} represented, with no missing or extraneous names? + - Are there isolated or under-connected subtrees that indicate gaps or unclear responsibilities? + +## Output Format +Return **only** a valid JSON object in the following format: +{ + "review": { + "Data Integrity": { + "feedback": "", + "pass": true/false + }, + "Flow Logic": { + "feedback": "", + "pass": true/false + }, + "Transformation Clarity": { + "feedback": "", + "pass": true/false + }, + "Coverage": { + "feedback": "", + "pass": true/false + } + }, + "final_pass": true/false +} + +Rules: +- `final_pass` should be `true` only if all four dimensions pass, or if remaining issues are minor and easily fixable. +- All `feedback` fields must provide concrete, actionable guidance. +- Do not add new fields or categories beyond the four listed. +""".strip() + + +# ============================================================================ +# Utility Functions for Prompt Building +# ============================================================================ + +def format_functional_areas(functional_areas: list, component_dirs: dict = None) -> str: + """Format functional areas for prompt display.""" + lines = [] + for area in functional_areas: + if component_dirs and area in component_dirs: + lines.append(f"- {area} [{component_dirs[area]}]") + else: + lines.append(f"- {area}") + return "\n".join(lines) diff --git a/RPG-Kit/scripts/func_design/func_designer.py b/RPG-Kit/scripts/func_design/func_designer.py new file mode 100644 index 0000000..18fd5e5 --- /dev/null +++ b/RPG-Kit/scripts/func_design/func_designer.py @@ -0,0 +1,497 @@ +#!/usr/bin/env python3 +"""Func Designer - Unified Entry Point. + +This module provides a unified interface for running the complete +func_design workflow: +1. Data Flow Design - Design inter-component data flow (DAG) +2. Base Class Design - Design shared base classes and data structures +3. Interface Design - Design function/class interfaces for each file + +Can be run as a complete pipeline or individual phases. +""" + +import json +import logging +import argparse +import sys +from pathlib import Path +from typing import Dict, Any, Optional + +# Add scripts directory to path for common module imports +_scripts_dir = Path(__file__).resolve().parent.parent +if str(_scripts_dir) not in sys.path: + sys.path.insert(0, str(_scripts_dir)) + +from common.trajectory import Trajectory, load_or_create_trajectory +from common import ( + get_skeleton_tree_string, + extract_functional_areas_from_skeleton, + format_functional_graph_overview, + extract_component_directories, +) + +# Import agents +from .data_flow_agent import DataFlowAgent +from .base_class_agent import BaseClassAgent +from .interface_agent import ( + InterfaceOrchestrator +) + +# Import centralized paths +import sys +sys.path.insert(0, str(Path(__file__).parent.parent)) +from common.paths import ( + SKELETON_FILE as INPUT_SKELETON, + DATA_FLOW_FILE as OUTPUT_DATA_FLOW, + BASE_CLASSES_FILE as OUTPUT_BASE_CLASSES, + INTERFACES_FILE as OUTPUT_INTERFACES, +) + + +# ============================================================================ +# Func Designer +# ============================================================================ + +class FuncDesigner: + """Main orchestrator for func_design workflow. + + Manages three phases: + 1. Data Flow: Design inter-component data flow as a DAG + 2. Base Classes: Design shared base classes and data structures + 3. Interfaces: Design function/class interfaces for each file + """ + + def __init__( + self, + max_data_flow_iterations: int = 5, + max_base_class_iterations: int = 5, + max_interface_iterations: int = 10, + trajectory: Optional[Trajectory] = None + ): + self.max_data_flow_iterations = max_data_flow_iterations + self.max_base_class_iterations = max_base_class_iterations + self.max_interface_iterations = max_interface_iterations + self.trajectory = trajectory + self.logger = logging.getLogger(__name__) + self._current_step_id: Optional[int] = None + + # State + self.skeleton = None + self.data_flow = None + self.base_classes = None + self.interfaces = None + self.repo_name = "" + self.repo_info = "" + + def load_skeleton(self, skeleton_path: Path = INPUT_SKELETON) -> bool: + """Load skeleton from file.""" + if not skeleton_path.exists(): + self.logger.error(f"Skeleton file not found: {skeleton_path}") + return False + + try: + with open(skeleton_path, "r", encoding="utf-8") as f: + self.skeleton = json.load(f) + + self.repo_name = self.skeleton.get("repository_name", "project") + self.repo_info = self.skeleton.get("repository_purpose", "") + + self.logger.info(f"Loaded skeleton for {self.repo_name}") + return True + except Exception as e: + self.logger.error(f"Failed to load skeleton: {e}") + return False + + def run_data_flow_phase(self) -> Dict[str, Any]: + """Run data flow design phase.""" + self.logger.info("=" * 70) + self.logger.info("PHASE 1: DATA FLOW DESIGN") + self.logger.info("=" * 70) + + if not self.skeleton: + return {"success": False, "error": "Skeleton not loaded"} + + # Extract functional areas + functional_areas = extract_functional_areas_from_skeleton(self.skeleton) + component_dirs = extract_component_directories(self.skeleton) + skeleton_tree = get_skeleton_tree_string(self.skeleton) + + self.logger.info(f"Found {len(functional_areas)} functional areas") + + if len(functional_areas) < 2: + self.logger.warning("Less than 2 components, skipping data flow") + self.data_flow = { + "data_flow": [], + "subtree_order": functional_areas, + "components": functional_areas, + "success": True + } + return self.data_flow + + # Add step to trajectory + step_id = None + if self.trajectory: + step = self.trajectory.add_step("data_flow_design", "Design inter-component data flow") + step_id = step.step_id + self._current_step_id = step_id + self.trajectory.start_step(step_id) + + # Initialize agent with trajectory + agent = DataFlowAgent( + max_iterations=self.max_data_flow_iterations, + logger=self.logger, + trajectory=self.trajectory, + step_id=step_id + ) + + # Run + result = agent.build_data_flow( + repo_name=self.repo_name, + repo_info=self.repo_info, + functional_areas=functional_areas, + component_dirs=component_dirs, + skeleton_tree=skeleton_tree + ) + + result["components"] = functional_areas + self.data_flow = result + + # Complete trajectory step + if self.trajectory and step_id: + if result.get("success"): + self.trajectory.complete_step(step_id, {"edge_count": len(result.get("data_flow", []))}) + else: + self.trajectory.fail_step(step_id, result.get("error", "Data flow design failed")) + + # Save + OUTPUT_DATA_FLOW.parent.mkdir(parents=True, exist_ok=True) + with open(OUTPUT_DATA_FLOW, "w", encoding="utf-8") as f: + json.dump(result, f, indent=2, ensure_ascii=False) + + self.logger.info(f"Data flow saved to {OUTPUT_DATA_FLOW}") + return result + + def run_base_class_phase(self) -> Dict[str, Any]: + """Run base class design phase.""" + self.logger.info("=" * 70) + self.logger.info("PHASE 2: BASE CLASS DESIGN") + self.logger.info("=" * 70) + + if not self.skeleton: + return {"success": False, "error": "Skeleton not loaded"} + + if not self.data_flow: + # Try to load from file + if OUTPUT_DATA_FLOW.exists(): + with open(OUTPUT_DATA_FLOW, "r", encoding="utf-8") as f: + self.data_flow = json.load(f) + else: + return {"success": False, "error": "Data flow not available"} + + # Extract info + functional_areas = extract_functional_areas_from_skeleton(self.skeleton) + functional_areas_overview = format_functional_graph_overview(self.skeleton) + skeleton_tree = get_skeleton_tree_string(self.skeleton) + data_flow_edges = self.data_flow.get("data_flow", []) + + # Add step to trajectory + step_id = None + if self.trajectory: + step = self.trajectory.add_step("base_class_design", "Design shared base classes") + step_id = step.step_id + self._current_step_id = step_id + self.trajectory.start_step(step_id) + + # Initialize agent with trajectory + agent = BaseClassAgent( + max_iterations=self.max_base_class_iterations, + logger=self.logger, + trajectory=self.trajectory, + step_id=step_id + ) + + # Run + result = agent.design_base_classes( + repo_name=self.repo_name, + repo_info=self.repo_info, + data_flow=data_flow_edges, + skeleton_tree=skeleton_tree, + functional_areas=functional_areas, + functional_areas_overview=functional_areas_overview + ) + + self.base_classes = result + + # Complete trajectory step + if self.trajectory and step_id: + if result.get("success"): + self.trajectory.complete_step(step_id, {"class_count": len(result.get("base_classes", []))}) + else: + self.trajectory.fail_step(step_id, result.get("error", "Base class design failed")) + + # Save + OUTPUT_BASE_CLASSES.parent.mkdir(parents=True, exist_ok=True) + with open(OUTPUT_BASE_CLASSES, "w", encoding="utf-8") as f: + json.dump(result, f, indent=2, ensure_ascii=False) + + self.logger.info(f"Base classes saved to {OUTPUT_BASE_CLASSES}") + return result + + def run_interface_phase(self) -> Dict[str, Any]: + """Run interface design phase.""" + self.logger.info("=" * 70) + self.logger.info("PHASE 3: INTERFACE DESIGN") + self.logger.info("=" * 70) + + if not self.skeleton: + return {"success": False, "error": "Skeleton not loaded"} + + # Load data flow if not available + if not self.data_flow: + if OUTPUT_DATA_FLOW.exists(): + with open(OUTPUT_DATA_FLOW, "r", encoding="utf-8") as f: + self.data_flow = json.load(f) + else: + return {"success": False, "error": "Data flow not available"} + + # Load base classes if not available + if not self.base_classes: + if OUTPUT_BASE_CLASSES.exists(): + with open(OUTPUT_BASE_CLASSES, "r", encoding="utf-8") as f: + self.base_classes = json.load(f) + else: + self.base_classes = {"base_classes": []} + + # Get base classes list + base_classes_list = self.base_classes.get("base_classes", []) + + # Add step to trajectory + step_id = None + if self.trajectory: + step = self.trajectory.add_step("interface_design", "Design function/class interfaces") + step_id = step.step_id + self._current_step_id = step_id + self.trajectory.start_step(step_id) + + # Initialize orchestrator with trajectory + orchestrator = InterfaceOrchestrator( + max_file_iterations=self.max_interface_iterations, + logger=self.logger, + trajectory=self.trajectory, + step_id=step_id + ) + + # Run + result = orchestrator.design_all_interfaces( + skeleton=self.skeleton, + data_flow=self.data_flow, + base_classes=base_classes_list, + repo_info=self.repo_info + ) + + self.interfaces = result + + # Complete trajectory step + if self.trajectory and step_id: + if result.get("success"): + self.trajectory.complete_step(step_id, {"interface_count": len(result.get("subtrees", {}))}) + else: + self.trajectory.fail_step(step_id, result.get("error", "Interface design failed")) + + # Save + OUTPUT_INTERFACES.parent.mkdir(parents=True, exist_ok=True) + with open(OUTPUT_INTERFACES, "w", encoding="utf-8") as f: + json.dump(result, f, indent=2, ensure_ascii=False) + + self.logger.info(f"Interfaces saved to {OUTPUT_INTERFACES}") + return result + + def run_full_pipeline(self) -> Dict[str, Any]: + """Run complete func_design pipeline.""" + self.logger.info("=" * 70) + self.logger.info("FUNC DESIGNER - FULL PIPELINE") + self.logger.info("=" * 70) + + results = { + "data_flow_phase": None, + "base_classes_phase": None, + "interfaces_phase": None, + "success": True + } + + # Phase 1: Data Flow + data_flow_result = self.run_data_flow_phase() + results["data_flow_phase"] = data_flow_result + + if not data_flow_result.get("success", False): + self.logger.warning("Data flow phase had issues, continuing...") + + # Phase 2: Base Classes + base_class_result = self.run_base_class_phase() + results["base_classes_phase"] = base_class_result + + if not base_class_result.get("success", False): + self.logger.warning("Base class phase had issues, continuing...") + + # Phase 3: Interfaces + interface_result = self.run_interface_phase() + results["interfaces_phase"] = interface_result + + if not interface_result.get("success", False): + self.logger.warning("Interface phase had issues") + results["success"] = False + + # Summary + self.print_summary(results) + + return results + + def print_summary(self, results: Dict[str, Any]) -> None: + """Print summary of all phases.""" + print("\n" + "=" * 70) + print("FUNC DESIGNER - SUMMARY") + print("=" * 70) + + # Data Flow + df = results.get("data_flow_phase", {}) + df_status = "[OK]" if df.get("success") else "[FAIL]" + df_edges = len(df.get("data_flow", [])) + print(f"\n[{df_status}] Data Flow: {df_edges} edges") + + # Base Classes + bc = results.get("base_classes_phase", {}) + bc_status = "[OK]" if bc.get("success") else "[FAIL]" + bc_count = len(bc.get("base_classes", [])) + print(f"[{bc_status}] Base Classes: {bc_count} files") + + # Interfaces + itf = results.get("interfaces_phase", {}) + itf_status = "[OK]" if itf.get("success") else "[FAIL]" + subtrees = itf.get("subtrees", {}) + # Support both "interfaces" (reference format) and "files" (old format) + total_files = sum( + len(st.get("interfaces", st.get("files", {}))) + for st in subtrees.values() + ) + success_files = sum( + sum(1 for f in st.get("interfaces", st.get("files", {})).values() if f.get("units")) + for st in subtrees.values() + ) + print(f"[{itf_status}] Interfaces: {success_files}/{total_files} files") + + print("\n" + "=" * 70) + + if results.get("success"): + print("[OK] All phases completed successfully!") + else: + print("[WARNING] Some phases had issues. Check logs for details.") + + print("=" * 70) + + +# ============================================================================ +# Main Entry Point +# ============================================================================ + +def main(): + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Run func_design workflow (data flow, base classes, interfaces)" + ) + parser.add_argument( + "--phase", + choices=["all", "data_flow", "base_classes", "interfaces"], + default="all", + help="Which phase(s) to run (default: all)" + ) + parser.add_argument( + "--skeleton", "-s", + type=str, + default=str(INPUT_SKELETON), + help=f"Skeleton input file (default: {INPUT_SKELETON})" + ) + parser.add_argument( + "--max-iterations", "-m", + type=int, + default=5, + help="Max iterations per phase (default: 5)" + ) + parser.add_argument( + "--verbose", "-v", + action="store_true", + help="Enable verbose logging" + ) + parser.add_argument( + "--no-trajectory", + action="store_true", + help="Disable trajectory recording" + ) + + args = parser.parse_args() + + # Setup logging + log_level = logging.DEBUG if args.verbose else logging.INFO + logging.basicConfig( + level=log_level, + format="%(asctime)s - %(levelname)s - %(message)s" + ) + logger = logging.getLogger(__name__) + + # Initialize trajectory + trajectory = None + if not args.no_trajectory: + trajectory = load_or_create_trajectory("func_designer") + trajectory.start(metadata={ + "phase": args.phase, + "skeleton_file": args.skeleton, + "max_iterations": args.max_iterations + }) + + try: + # Initialize designer + designer = FuncDesigner( + max_data_flow_iterations=args.max_iterations, + max_base_class_iterations=args.max_iterations, + max_interface_iterations=args.max_iterations * 2, + trajectory=trajectory + ) + + # Load skeleton + if not designer.load_skeleton(Path(args.skeleton)): + print(f"ERROR: Could not load skeleton from {args.skeleton}") + return 1 + + # Run appropriate phase(s) + if args.phase == "all": + result = designer.run_full_pipeline() + elif args.phase == "data_flow": + result = designer.run_data_flow_phase() + elif args.phase == "base_classes": + result = designer.run_base_class_phase() + elif args.phase == "interfaces": + result = designer.run_interface_phase() + else: + print(f"Unknown phase: {args.phase}") + return 1 + + # Check result + if not result.get("success", False): + if trajectory: + trajectory.fail(result.get("error", "Phase failed")) + return 1 + + # Mark trajectory as complete + if trajectory: + trajectory.complete(metadata={"phase": args.phase}) + + return 0 + + except Exception as e: + logger.error(f"Func designer failed: {e}") + if trajectory: + trajectory.fail(str(e)) + raise + + +if __name__ == "__main__": + exit(main()) diff --git a/RPG-Kit/scripts/func_design/interface_agent.py b/RPG-Kit/scripts/func_design/interface_agent.py new file mode 100644 index 0000000..7489612 --- /dev/null +++ b/RPG-Kit/scripts/func_design/interface_agent.py @@ -0,0 +1,2412 @@ +#!/usr/bin/env python3 +"""Interface Agent. + +This module provides the InterfaceAgent for designing function/class interfaces +for each file in the repository skeleton. + +Key components: +- InterfaceAgent: Orchestrates interface design for a single file +- InterfaceOrchestrator: Manages the full interface design workflow across subtrees +- Validation functions for interface code +""" + +import json +import logging +import ast +import re +from typing import Dict, List, Optional, Tuple, Any, Set +from collections import defaultdict, deque +from pydantic import BaseModel, Field + +# Import ParsedFile and CodeUnit for code parsing +import sys +from pathlib import Path +sys.path.insert(0, str(Path(__file__).parent.parent)) +from rpg.code_unit import ParsedFile, CodeUnit + +# Import common LLMClient with trajectory support +from common import ( + LLMClient, + format_data_flow_edges, + format_base_classes, + format_base_classes_and_data_structures, +) + +from .interface_prompts import ( + INTERFACE_PROMPT, + PLAN_FILE_PROMPT, + SUBTREE_INTERFACE_PROMPT, +) +from common.import_normalizer import build_import_convention_snippet + + +# ============================================================================ +# Data Models +# ============================================================================ + +class InterfaceDependency(BaseModel): + """Dependency information for an interface.""" + inherits_from: List[str] = Field(default_factory=list, description="Base classes inherited") + calls: List[str] = Field(default_factory=list, description="Functions/methods expected to call") + uses_types: List[str] = Field(default_factory=list, description="Types used in parameters/returns") + + +class InterfaceDefinition(BaseModel): + """Definition of a single interface.""" + features: List[str] = Field(default_factory=list, description="List of feature paths this interface handles (both existing and new)") + code: str = Field(..., description="Python code for the interface") + dependencies: Optional[InterfaceDependency] = Field(default=None, description="Declared dependencies") + + +class InterfaceOutput(BaseModel): + """Output from LLM for interface design.""" + interfaces: List[InterfaceDefinition] = Field(..., min_length=1, description="List of interface definitions (must not be empty)") + + +class FileInterfaceBlock(BaseModel): + """Block of interface definitions for a single file within a subtree batch.""" + file_path: str = Field(..., description="Path to the file being designed") + interfaces: List[InterfaceDefinition] = Field(..., min_length=1, description="Interface definitions for this file") + + +class SubtreeInterfaceOutput(BaseModel): + """Output from LLM for subtree-level interface design (all files at once).""" + files: List[FileInterfaceBlock] = Field(..., min_length=1, description="Interface blocks organized by file, in implementation order") + + +class FileImplementationGraph(BaseModel): + """Graph of file implementation order.""" + file_implementation_graph: List[Dict[str, str]] = Field(default_factory=list) + + +# ============================================================================ +# Dependency Collector +# ============================================================================ + +class DependencyCollector: + """Collect fine-grained dependencies discovered during interface design. + + Dependencies are collected from two sources: + 1. Program analysis (AST parsing) - inheritance and type references from code + 2. LLM declarations - expected function calls declared by LLM + """ + + def __init__(self, known_base_classes: Set[str], known_types: Set[str]): + """Initialize the dependency collector. + + Args: + known_base_classes: Set of base class names from base_classes.json + known_types: Set of known type names (data structures, etc.) + """ + self.known_base_classes = known_base_classes + self.known_types = known_types + self.original_edges: List[Dict[str, Any]] = [] + self.inheritance_edges: List[Dict[str, Any]] = [] + self.invocation_edges: List[Dict[str, Any]] = [] + self.reference_edges: List[Dict[str, Any]] = [] + + def set_original_edges(self, edges: List[Dict[str, Any]]): + """Store original coarse-grained data flow edges.""" + self.original_edges = edges.copy() if edges else [] + + def add_inheritance( + self, + child_class: str, + parent_class: str, + source_file: str, + parent_file: Optional[str] = None + ): + """Add an inheritance relationship (child extends parent).""" + self.inheritance_edges.append({ + "child": child_class, + "parent": parent_class, + "source_file": source_file, + "parent_file": parent_file, + "edge_type": "inherits", + "generator": "design_interfaces" + }) + + def add_invocation( + self, + caller: str, + callee: str, + caller_file: str, + callee_file: Optional[str] = None + ): + """Add an invocation relationship (caller calls callee). + + Self-calls (same bare name + same or unknown file) are silently skipped. + """ + # --- self-call filter --- + bare_caller = caller.split(" ", 1)[-1] if " " in caller else caller + bare_callee = callee.split(" ", 1)[-1] if " " in callee else callee + if bare_caller == bare_callee and (callee_file is None or callee_file == caller_file): + return + + self.invocation_edges.append({ + "caller": caller, + "callee": callee, + "caller_file": caller_file, + "callee_file": callee_file, + "edge_type": "invokes", + "generator": "design_interfaces" + }) + + def add_reference( + self, + unit_name: str, + referenced_type: str, + source_file: str, + type_file: Optional[str] = None + ): + """Add a type reference relationship.""" + self.reference_edges.append({ + "unit": unit_name, + "referenced_type": referenced_type, + "source_file": source_file, + "type_file": type_file, + "edge_type": "references", + "generator": "design_interfaces" + }) + + def analyze_code_dependencies( + self, + code: str, + file_path: str, + base_class_files: Dict[str, str] + ): + """Analyze code to extract dependencies via AST parsing. + + Extracts: + - Inheritance relationships (class X(BaseClass)) + - Type references in annotations + + Args: + code: Python source code to analyze + file_path: Path of the file containing this code + base_class_files: Mapping of class names to their file paths + """ + try: + tree = ast.parse(code) + except SyntaxError: + return + + for node in ast.walk(tree): + # Extract inheritance + if isinstance(node, ast.ClassDef): + child_class = node.name + for base in node.bases: + parent_name = _extract_name_from_node(base) + if parent_name and parent_name in self.known_base_classes: + parent_file = base_class_files.get(parent_name) + self.add_inheritance(child_class, parent_name, file_path, parent_file) + + # Extract type references from function annotations + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): + func_name = node.name + # Check parameter types + for arg in node.args.args: + if arg.annotation: + types = _extract_type_names(arg.annotation) + for t in types: + if t in self.known_types: + type_file = base_class_files.get(t) + self.add_reference(f"function {func_name}", t, file_path, type_file) + # Check return type + if node.returns: + types = _extract_type_names(node.returns) + for t in types: + if t in self.known_types: + type_file = base_class_files.get(t) + self.add_reference(f"function {func_name}", t, file_path, type_file) + + def process_llm_dependencies( + self, + unit_name: str, + dependencies: Optional[Dict[str, Any]], + file_path: str, + base_class_files: Dict[str, str] + ): + """Process dependencies declared by LLM. + + Args: + unit_name: Name of the interface (e.g., "class Foo" or "function bar") + dependencies: Dependencies dict from LLM with inherits_from, calls, uses_types + file_path: Path of the file containing this interface + base_class_files: Mapping of class/function names to their file paths + """ + if not dependencies: + return + + # Process calls (LLM-declared invocations) + for callee in dependencies.get("calls", []): + callee_file = base_class_files.get(callee) + self.add_invocation(unit_name, callee, file_path, callee_file) + + # Note: inherits_from and uses_types are also analyzed from code, + # but LLM declarations can catch additional cases not in annotations + for parent in dependencies.get("inherits_from", []): + if parent in self.known_base_classes: + # Check if not already added by code analysis + existing = [e for e in self.inheritance_edges + if e["child"] in unit_name and e["parent"] == parent] + if not existing: + parent_file = base_class_files.get(parent) + # Extract class name from unit_name like "class Foo" + class_name = unit_name.replace("class ", "") if unit_name.startswith("class ") else unit_name + self.add_inheritance(class_name, parent, file_path, parent_file) + + for type_name in dependencies.get("uses_types", []): + if type_name in self.known_types: + # Check if not already added by code analysis + existing = [e for e in self.reference_edges + if e["unit"] == unit_name and e["referenced_type"] == type_name] + if not existing: + type_file = base_class_files.get(type_name) + self.add_reference(unit_name, type_name, file_path, type_file) + + def post_process_edges(self, global_registry: "GlobalInterfaceRegistry"): + """Normalise invocation edges after all subtrees have been designed. + + For each invocation edge: + 1. Resolve bare callee names to their full unit name + (``"function foo"`` / ``"class Bar"``). + 2. Handle ``Class.method`` patterns โ†’ resolve to ``"class Class"``. + 3. Fill in missing ``callee_file`` via *global_registry*. + 4. Drop edges whose callee cannot be resolved at all. + """ + if not global_registry: + return + + cleaned: List[Dict[str, Any]] = [] + for edge in self.invocation_edges: + callee = edge["callee"] + callee_file = edge.get("callee_file") + + # --- 1. Handle "Class.method" patterns --- + if "." in callee: + class_name = callee.split(".")[0] + resolved_file = callee_file or global_registry.resolve_callee(class_name) + if resolved_file: + edge["callee"] = f"class {class_name}" + edge["callee_file"] = resolved_file + cleaned.append(edge) + continue # skip unresolvable Class.method + + # --- 2. Normalise bare name โ†’ "function X" / "class X" --- + if not callee.startswith("function ") and not callee.startswith("class "): + # Check registry for the canonical unit name + unit_info = global_registry.units.get(f"function {callee}") or \ + global_registry.units.get(f"class {callee}") + if unit_info: + edge["callee"] = f"{unit_info['unit_type']} {callee}" + if not callee_file: + edge["callee_file"] = unit_info["file_path"] + elif callee in global_registry.function_to_file: + edge["callee"] = f"function {callee}" + if not callee_file: + edge["callee_file"] = global_registry.function_to_file[callee] + elif callee in global_registry.class_to_file: + edge["callee"] = f"class {callee}" + if not callee_file: + edge["callee_file"] = global_registry.class_to_file[callee] + # else: keep bare name as-is (external or unresolvable) + + # --- 3. Fill missing callee_file --- + if not edge.get("callee_file"): + bare = edge["callee"] + if bare.startswith("function ") or bare.startswith("class "): + bare = bare.split(" ", 1)[1] + resolved = global_registry.resolve_callee(bare) + if resolved: + edge["callee_file"] = resolved + + cleaned.append(edge) + + self.invocation_edges = cleaned + + def to_dict(self) -> Dict[str, Any]: + """Convert collected dependencies to dictionary.""" + return { + "original_edges": self.original_edges, + "inheritance_edges": self.inheritance_edges, + "invocation_edges": self.invocation_edges, + "reference_edges": self.reference_edges + } + + def get_summary(self) -> Dict[str, int]: + """Get summary counts of collected dependencies.""" + return { + "original_edges": len(self.original_edges), + "inheritance_edges": len(self.inheritance_edges), + "invocation_edges": len(self.invocation_edges), + "reference_edges": len(self.reference_edges) + } + + +def _extract_name_from_node(node: ast.expr) -> Optional[str]: + """Extract name string from AST node.""" + if isinstance(node, ast.Name): + return node.id + elif isinstance(node, ast.Attribute): + return node.attr + return None + + +def _extract_type_names(node: ast.expr) -> List[str]: + """Extract all type names from a type annotation AST node.""" + names = [] + if isinstance(node, ast.Name): + names.append(node.id) + elif isinstance(node, ast.Attribute): + names.append(node.attr) + elif isinstance(node, ast.Subscript): + # Handle generic types like List[X], Optional[X], etc. + names.extend(_extract_type_names(node.slice)) + if isinstance(node.value, ast.Name): + names.append(node.value.id) + elif isinstance(node, ast.Tuple): + for elt in node.elts: + names.extend(_extract_type_names(elt)) + elif isinstance(node, ast.BinOp): + # Handle Union types with | operator (Python 3.10+) + names.extend(_extract_type_names(node.left)) + names.extend(_extract_type_names(node.right)) + return names + + +# ============================================================================ +# Global Interface Registry +# ============================================================================ + +class GlobalInterfaceRegistry: + """Track all designed interfaces across subtrees for cross-subtree dependency resolution. + + As each subtree is designed, its interfaces are registered here. + Later subtrees can use this registry to resolve callee names to file paths, + enabling accurate cross-subtree dependency edges. + """ + + def __init__(self): + # unit_name -> {file_path, subtree_name, unit_type, signature_summary, features} + self.units: Dict[str, Dict[str, Any]] = {} + # class_name -> file_path (for quick lookup) + self.class_to_file: Dict[str, str] = {} + # function_name -> file_path + self.function_to_file: Dict[str, str] = {} + # file_path -> list of unit info dicts + self.file_units: Dict[str, List[Dict[str, Any]]] = {} + + def register_from_subtree_result( + self, + subtree_name: str, + subtree_interfaces: Dict[str, Dict[str, Any]] + ): + """Register all designed interfaces from a completed subtree. + + Args: + subtree_name: Name of the subtree + subtree_interfaces: Dict mapping file_path -> file result dict + (with keys: units, units_to_features, units_to_code, file_code) + """ + for file_path, file_data in subtree_interfaces.items(): + units = file_data.get("units", []) + units_to_features = file_data.get("units_to_features", {}) + units_to_code = file_data.get("units_to_code", {}) + + file_unit_list = [] + + for unit_name in units: + features = units_to_features.get(unit_name, []) + code = units_to_code.get(unit_name, "") + + # Determine unit type and bare name + if unit_name.startswith("class "): + unit_type = "class" + bare_name = unit_name[len("class "):] + self.class_to_file[bare_name] = file_path + elif unit_name.startswith("function "): + unit_type = "function" + bare_name = unit_name[len("function "):] + self.function_to_file[bare_name] = file_path + else: + unit_type = "unknown" + bare_name = unit_name + + # Extract a signature summary from the code (first non-import, non-blank line) + signature_summary = self._extract_signature_summary(code, unit_type, bare_name) + + unit_info = { + "file_path": file_path, + "subtree_name": subtree_name, + "unit_type": unit_type, + "bare_name": bare_name, + "signature_summary": signature_summary, + "features": features, + } + + self.units[unit_name] = unit_info + file_unit_list.append(unit_info) + + if file_unit_list: + if file_path not in self.file_units: + self.file_units[file_path] = [] + self.file_units[file_path].extend(file_unit_list) + + def resolve_callee(self, callee_name: str) -> Optional[str]: + """Resolve a callee name to its file_path across all registered subtrees. + + Tries: + 1. Exact match in class_to_file + 2. Exact match in function_to_file + 3. Fuzzy match (case-insensitive) in both + + Returns: + file_path if found, None otherwise + """ + # Exact match + if callee_name in self.class_to_file: + return self.class_to_file[callee_name] + if callee_name in self.function_to_file: + return self.function_to_file[callee_name] + + # Try with "class " or "function " prefix stripped + stripped = callee_name + if callee_name.startswith("class "): + stripped = callee_name[len("class "):] + elif callee_name.startswith("function "): + stripped = callee_name[len("function "):] + + if stripped != callee_name: + if stripped in self.class_to_file: + return self.class_to_file[stripped] + if stripped in self.function_to_file: + return self.function_to_file[stripped] + + # Case-insensitive fallback + callee_lower = callee_name.lower() + for name, path in self.class_to_file.items(): + if name.lower() == callee_lower: + return path + for name, path in self.function_to_file.items(): + if name.lower() == callee_lower: + return path + + return None + + def get_all_public_symbols(self) -> Dict[str, str]: + """Return {symbol_name: file_path} for all registered public symbols. + + This can be merged into base_class_files to enable cross-subtree + dependency resolution. + """ + symbols = {} + symbols.update(self.class_to_file) + symbols.update(self.function_to_file) + return symbols + + def get_structured_interface_listing(self, subtree_name: str) -> str: + """Build a structured interface listing for a specific subtree, suitable for inclusion in upstream context prompts. + + Returns a formatted string like: + From "Physics Engine Core": + - src/physics/forces.py: + - function calculate_gravity(mass1: float, ...) -> Vector2D + - function calculate_drag(...) + - src/physics/dynamics.py: + - class DynamicsEngine: + - method step(particles: List[Particle], dt: float) -> None + """ + parts = [] + + # Group file_units by file_path for this subtree + subtree_files: Dict[str, List[Dict[str, Any]]] = {} + for file_path, unit_list in self.file_units.items(): + for unit_info in unit_list: + if unit_info["subtree_name"] == subtree_name: + if file_path not in subtree_files: + subtree_files[file_path] = [] + subtree_files[file_path].append(unit_info) + + if not subtree_files: + return "" + + parts.append(f'From "{subtree_name}":') + for file_path in sorted(subtree_files.keys()): + parts.append(f" - {file_path}:") + for unit_info in subtree_files[file_path]: + sig = unit_info.get("signature_summary", unit_info["bare_name"]) + parts.append(f" - {unit_info['unit_type']} {sig}") + + return "\n".join(parts) + + def get_all_structured_listings_for_upstream( + self, + upstream_subtree_names: Set[str] + ) -> str: + """Build structured interface listings for all upstream subtrees. + + Args: + upstream_subtree_names: Set of subtree names to include + + Returns: + Formatted string with all upstream interface listings + """ + listings = [] + for subtree_name in sorted(upstream_subtree_names): + listing = self.get_structured_interface_listing(subtree_name) + if listing: + listings.append(listing) + + if not listings: + return "No upstream interfaces available." + + return "\n\n".join(listings) + + @staticmethod + def _extract_signature_summary(code: str, unit_type: str, bare_name: str) -> str: + """Extract a concise signature summary from interface code.""" + if not code: + return bare_name + + try: + tree = ast.parse(code) + for node in ast.iter_child_nodes(tree): + if unit_type == "class" and isinstance(node, ast.ClassDef) and node.name == bare_name: + # For classes, list public methods with signatures + methods = [] + for item in node.body: + if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)): + if not item.name.startswith("_") or item.name == "__init__": + sig = GlobalInterfaceRegistry._format_func_signature(item) + methods.append(sig) + bases_str = "" + if node.bases: + bases = [_extract_name_from_node(b) for b in node.bases] + bases = [b for b in bases if b] + if bases: + bases_str = f"({', '.join(bases)})" + if methods: + return f"{bare_name}{bases_str} [{', '.join(methods[:5])}]" + return f"{bare_name}{bases_str}" + + elif unit_type == "function" and isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) and node.name == bare_name: + return GlobalInterfaceRegistry._format_func_signature(node) + except SyntaxError: + pass + + return bare_name + + @staticmethod + def _format_func_signature(node) -> str: + """Format a function/method AST node into a concise signature string.""" + name = node.name + params = [] + for arg in node.args.args: + if arg.arg == "self": + continue + param_str = arg.arg + if arg.annotation: + type_str = ast.unparse(arg.annotation) if hasattr(ast, 'unparse') else "" + if type_str: + param_str = f"{arg.arg}: {type_str}" + params.append(param_str) + + ret_str = "" + if node.returns: + ret_type = ast.unparse(node.returns) if hasattr(ast, 'unparse') else "" + if ret_type: + ret_str = f" -> {ret_type}" + + # Truncate params if too many + if len(params) > 4: + params_str = ", ".join(params[:3]) + ", ..." + else: + params_str = ", ".join(params) + + return f"{name}({params_str}){ret_str}" + + +# ============================================================================ +# Import Cross-Validation (A2) +# ============================================================================ + +def cross_validate_imports_vs_calls( + code: str, + file_path: str, + declared_calls: List[str], + global_registry: GlobalInterfaceRegistry +) -> List[Dict[str, str]]: + """Parse import statements in interface code and cross-validate against declared calls. Identifies symbols that are imported from modules in the global registry but not declared as call dependencies. + + This is an auxiliary validation โ€” results are warnings, not auto-added edges. + + Args: + code: Interface source code (signatures + imports only) + file_path: Path of the file being validated + declared_calls: List of callee names from LLM's dependencies.calls + global_registry: Registry of all designed interfaces + + Returns: + List of warning dicts: {imported_symbol, imported_from, resolved_file, file_path} + """ + warnings = [] + declared_set = set(declared_calls) + + try: + tree = ast.parse(code) + except SyntaxError: + return warnings + + for node in ast.walk(tree): + if isinstance(node, ast.ImportFrom): + module = node.module or "" + for alias in node.names: + symbol = alias.name + # Check if this symbol is in the global registry + resolved_file = global_registry.resolve_callee(symbol) + if resolved_file and resolved_file != file_path: + # Symbol is a known interface from another file + if symbol not in declared_set: + warnings.append({ + "imported_symbol": symbol, + "imported_from": module, + "resolved_file": resolved_file, + "file_path": file_path, + "message": ( + f"'{symbol}' is imported from '{module}' and is a known " + f"interface in '{resolved_file}', but not declared in " + f"dependencies.calls" + ) + }) + elif isinstance(node, ast.Import): + for alias in node.names: + symbol = alias.name.split(".")[-1] if "." in alias.name else alias.name + resolved_file = global_registry.resolve_callee(symbol) + if resolved_file and resolved_file != file_path: + if symbol not in declared_set: + warnings.append({ + "imported_symbol": symbol, + "imported_from": alias.name, + "resolved_file": resolved_file, + "file_path": file_path, + "message": ( + f"'{symbol}' is imported and is a known interface in " + f"'{resolved_file}', but not declared in dependencies.calls" + ) + }) + + return warnings + + +# ============================================================================ +# Validation Functions +# ============================================================================ + +def extract_top_level_definitions(code: str) -> Tuple[List[str], List[str]]: + """Extract top-level function and class names from code.""" + functions = [] + classes = [] + try: + tree = ast.parse(code) + for node in ast.iter_child_nodes(tree): + if isinstance(node, ast.FunctionDef): + functions.append(node.name) + elif isinstance(node, ast.AsyncFunctionDef): + functions.append(node.name) + elif isinstance(node, ast.ClassDef): + classes.append(node.name) + except SyntaxError: + pass + return functions, classes + + +def check_has_docstring(code: str) -> Tuple[bool, str]: + """Check if top-level functions/classes have docstrings.""" + errors = [] + try: + tree = ast.parse(code) + for node in ast.iter_child_nodes(tree): + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)): + if not ast.get_docstring(node): + errors.append(f"{type(node).__name__} '{node.name}' is missing a docstring") + except SyntaxError: + pass + + if errors: + return False, "; ".join(errors) + return True, "" + + +def validate_interface( + interface: Dict[str, Any], + target_features: Set[str], + covered_features: Set[str] +) -> Tuple[bool, str, Dict[str, Any]]: + """Validate a single interface definition using ParsedFile. + + Returns: (is_valid, error_message, parsed_info) + """ + features = interface.get("features", []) + code = interface.get("code", "") + errors = [] + + # Check features + if not features: + errors.append("Interface must have at least one feature") + else: + feature_set = set(features) + + # Check for overlap with already covered features + overlap = feature_set & covered_features + if overlap: + errors.append(f"Features {list(overlap)} are already covered by another interface") + + # Check if features are in target features + if target_features: + invalid_features = feature_set - target_features + if invalid_features: + errors.append(f"Features {list(invalid_features)} are not in target features") + + # Auto-fix hyphenated module names in import statements + # (e.g., "from blog-system.security import ..." -> "from blog_system.security import ...") + code = re.sub( + r'^(\s*(?:from|import)\s+)([\w\-]+(?:\.[\w\-]+)*)', + lambda m: m.group(1) + m.group(2).replace('-', '_'), + code, + flags=re.MULTILINE, + ) + # Persist the fixed code back so downstream consumers get corrected imports + interface["code"] = code + + # Parse code with ParsedFile + parsed_file = ParsedFile(code=code, file_path="temp_interface.py") + + # Check for syntax errors + if parsed_file.has_error(): + error = parsed_file.error + errors.append(f"Syntax error: line {error.lineno}, column {error.offset}: {error.msg}") + return False, "; ".join(errors), {} + + # Extract only class and function units (not methods) + interface_units = [ + unit for unit in parsed_file.units + if unit.unit_type in ["function", "class"] + ] + + if not interface_units: + errors.append("No valid functions/classes found in code") + + # Check docstrings + for unit in interface_units: + if not unit.docstring and unit.unit_type in ["function", "class"]: + errors.append( + f"Missing docstring for {unit.unit_type} '{unit.name}' " + f"in features {features}" + ) + + if errors: + return False, "; ".join(errors), {} + + # Build parsed info with CodeUnit objects + functions = [u.name for u in interface_units if u.unit_type == "function"] + classes = [u.name for u in interface_units if u.unit_type == "class"] + + return True, "", { + "functions": functions, + "classes": classes, + "features": features, + "units": interface_units # Include CodeUnit objects + } + + +def validate_file_implementation_graph( + graph: List[Dict[str, str]], + file_names: List[str] +) -> Tuple[str, bool]: + """Validate file implementation graph. + + Returns: (feedback_message, is_valid) + """ + file_set = set(file_names) + feedbacks = [] + is_valid = True + + # Check all files are valid + for edge in graph: + from_f = edge.get("from", "") + to_f = edge.get("to", "") + + if from_f not in file_set: + feedbacks.append(f"Invalid file reference: `{from_f}` is not in the file list.") + is_valid = False + if to_f not in file_set: + feedbacks.append(f"Invalid file reference: `{to_f}` is not in the file list.") + is_valid = False + + if feedbacks: + feedbacks.append("Please ensure all file references are from the provided file list.") + + # Check for cycles + adj = defaultdict(list) + indegree = defaultdict(int) + for edge in graph: + f, t = edge.get("from", ""), edge.get("to", "") + adj[f].append(t) + indegree[t] += 1 + + queue = deque([f for f in file_set if indegree[f] == 0]) + visited = set() + + while queue: + node = queue.popleft() + visited.add(node) + for neighbor in adj.get(node, []): + indegree[neighbor] -= 1 + if indegree[neighbor] == 0: + queue.append(neighbor) + + if len(visited) != len(file_set): + feedbacks.append( + "Cycle detected or not all files are connected. " + "The graph must form a valid DAG (Directed Acyclic Graph)." + ) + is_valid = False + + # Check coverage + used_files = {e.get("from", "") for e in graph} | {e.get("to", "") for e in graph} + missing = file_set - used_files + if missing: + feedbacks.append( + f"Missing files: {sorted(missing)}. Please include all files in the graph." + ) + is_valid = False + + return "\n".join(feedbacks) if feedbacks else "Valid graph", is_valid + + +def topo_sort_file_graph(graph: List[Dict[str, str]]) -> Optional[List[str]]: + """Topologically sort file graph. Returns None if cycle detected.""" + adj = defaultdict(list) + indegree = defaultdict(int) + nodes = set() + + for edge in graph: + from_f = edge.get("from", "") + to_f = edge.get("to", "") + adj[from_f].append(to_f) + indegree[to_f] += 1 + nodes.add(from_f) + nodes.add(to_f) + + # Initialize indegree for source nodes + for node in nodes: + if node not in indegree: + indegree[node] = 0 + + queue = deque([n for n in nodes if indegree[n] == 0]) + sorted_list = [] + + while queue: + node = queue.popleft() + sorted_list.append(node) + for neighbor in adj[node]: + indegree[neighbor] -= 1 + if indegree[neighbor] == 0: + queue.append(neighbor) + + if len(sorted_list) != len(nodes): + return None + + return sorted_list + + +# ============================================================================ +# Interface Agent (Single File) +# ============================================================================ + +class InterfaceAgent: + """Agent for designing interfaces for a single file.""" + + def __init__( + self, + llm_client: Optional[LLMClient] = None, + max_iterations: int = 10, + logger: Optional[logging.Logger] = None, + trajectory: Optional[Any] = None, + step_id: Optional[int] = None + ): + # Create LLMClient with trajectory support if not provided + if llm_client is None: + self.llm = LLMClient(trajectory=trajectory, step_id=step_id) + else: + self.llm = llm_client + # Update trajectory info on existing client + if trajectory is not None: + self.llm.set_trajectory(trajectory, step_id) + self.max_iterations = max_iterations + self.logger = logger or logging.getLogger(__name__) + + def design_file_interface( + self, + file_path: str, + file_features: List[str], + repo_info: str, + data_flow_str: str, + base_classes_str: str, + upstream_context: str, + implemented_summary: str, + dependency_collector: Optional[DependencyCollector] = None, + base_class_files: Optional[Dict[str, str]] = None + ) -> Dict[str, Any]: + """Design interfaces for a single file. + + Args: + file_path: Path to the file being designed + file_features: List of feature paths for this file + repo_info: Repository description + data_flow_str: Formatted data flow string + base_classes_str: Formatted base classes string + upstream_context: Context from upstream modules + implemented_summary: Summary of already implemented files + dependency_collector: Optional collector for fine-grained dependencies + base_class_files: Optional mapping of class/type names to file paths + + Returns: + Dict containing interfaces, code, feature_map, success + """ + self.logger.info(f"[InterfaceAgent] Designing interfaces for {file_path}") + + target_features = set(file_features) + covered_features = set() + all_interfaces = [] + all_code_blocks = [] + feature_interface_map = {} + + # Build system prompt (tool description is now integrated) + system_prompt = INTERFACE_PROMPT + + # Build user prompt + features_str = "\n".join([f"- {f}" for f in file_features]) + + user_prompt = f"""[Begin Iteration] +Design interfaces for file: `{file_path}`. + +Requirements: +- ONLY cover the following feature paths: +{features_str} +- When calling `design_itfs_for_feature`, ONLY use feature paths listed above. +- Do NOT introduce new/unspecified feature paths. +- Define interfaces only (imports + signature + docstring + `pass`). +- Prefer one function/class per feature or a small group of closely related features. +- Keep each interface focused and with narrow responsibility. +- You MAY import and reuse symbols from upstream context and base classes. + +Global context you can use: +=== Repository Info === +{repo_info} + +=== Data Flow Graph === +{data_flow_str} + +=== Upstream Context === +{upstream_context} + +=== Implemented Summary === +{implemented_summary} + +=== Available Base Classes === +{base_classes_str} +""" + + combined_prompt = f"{system_prompt}\n\n{user_prompt}" + last_error = "" + + for iteration in range(self.max_iterations): + # Check if all features covered + if covered_features >= target_features: + self.logger.info(f"[InterfaceAgent] All features covered for {file_path}") + break + + self.logger.info(f"[InterfaceAgent] Iteration {iteration + 1}/{self.max_iterations} for {file_path}") + + current_prompt = combined_prompt + if last_error: + current_prompt += f"\n\n[Tool Execution Feedback]: {last_error}" + + # Add progress info + remaining = target_features - covered_features + if covered_features: + current_prompt += f"\n\n[Progress]: Covered {len(covered_features)}/{len(target_features)} features. Remaining: {list(remaining)}" + + try: + # Use call_structured for Pydantic validation + _, result_model, _ = self.llm.call_structured( + system_prompt="", # Already included in current_prompt + user_prompt=current_prompt, + response_model=InterfaceOutput, + purpose=f"interface_design_{file_path}_{iteration + 1}", + max_retries=1 # Handle retries at this level + ) + + if not result_model: + last_error = "Failed to parse result_json or Pydantic validation failed. Please use tags with valid JSON." + continue + + # Convert Pydantic models to dicts for existing validation logic + interfaces = [iface.model_dump() for iface in result_model.interfaces] + + # Validate each interface + valid_interfaces = [] + for interface in interfaces: + is_valid, error, info = validate_interface( + interface, target_features, covered_features + ) + + if is_valid: + # Add name field from parsed info + if info.get("classes"): + interface["name"] = f"class {info['classes'][0]}" + elif info.get("functions"): + interface["name"] = f"function {info['functions'][0]}" + + # Store CodeUnit objects + interface["parsed_units"] = info.get("units", []) + + valid_interfaces.append(interface) + # Update covered features + for feat in interface.get("features", []): + covered_features.add(feat) + + # Update feature map + for func in info.get("functions", []): + feature_interface_map[f"function {func}"] = interface.get("features", []) + for cls in info.get("classes", []): + feature_interface_map[f"class {cls}"] = interface.get("features", []) + + # Collect code + all_code_blocks.append(interface.get("code", "")) + + # Collect dependencies if collector is provided + if dependency_collector and base_class_files: + code = interface.get("code", "") + unit_name = interface.get("name", "") + + # Analyze code for inheritance and type references + dependency_collector.analyze_code_dependencies( + code=code, + file_path=file_path, + base_class_files=base_class_files + ) + + # Process LLM-declared dependencies + llm_deps = interface.get("dependencies") + if llm_deps: + dependency_collector.process_llm_dependencies( + unit_name=unit_name, + dependencies=llm_deps, + file_path=file_path, + base_class_files=base_class_files + ) + else: + self.logger.warning(f"Interface validation failed: {error}") + last_error = error + + if valid_interfaces: + all_interfaces.extend(valid_interfaces) + last_error = "" # Clear error on success + + except Exception as e: + self.logger.error(f"[InterfaceAgent] Error: {e}") + last_error = str(e) + + # Merge all code blocks + final_code = "\n\n".join(all_code_blocks) if all_code_blocks else "" + + success = covered_features >= target_features + + # Build units list and mappings in the reference format (ZeroRepo compatible) + units = [] + units_to_features = {} + units_to_code = {} + designed_interfaces = {} # For storing CodeUnit objects + + for interface in all_interfaces: + interface_name = interface.get("name", "") + if not interface_name: + continue + + features = interface.get("features", []) + parsed_units = interface.get("parsed_units", []) + + if parsed_units: + # Each parsed unit gets its own entry keyed by its actual name + for unit in parsed_units: + unit_key = f"{unit.unit_type} {unit.name}" + if unit_key not in units: + units.append(unit_key) + units_to_features[unit_key] = features + try: + # Use count_lines to get the unit code (ZeroRepo compatible) + _, unit_code = unit.count_lines(original=True, return_code=True) + units_to_code[unit_key] = unit_code + except Exception: + # Fallback to full interface code + units_to_code[unit_key] = interface.get("code", "") + # Store the CodeUnit object + designed_interfaces[unit_key] = { + "unit": unit, + "features": features + } + else: + # No parsed units โ€” use the interface name as-is + if interface_name not in units: + units.append(interface_name) + units_to_features[interface_name] = features + units_to_code[interface_name] = interface.get("code", "") + + return { + "file_path": file_path, + "file_code": final_code, + "units": units, + "units_to_features": units_to_features, + "units_to_code": units_to_code, + "designed_interfaces": designed_interfaces, + "success": success, + "iterations": iteration + 1 + } + + +# ============================================================================ +# Subtree Interface Agent (All Files in One Subtree) +# ============================================================================ + +class SubtreeInterfaceAgent: + """Agent for designing interfaces for ALL files in a subtree in a single LLM session. + + Instead of making one LLM call per file, this agent batches all files in a subtree + into a single prompt, instructing the LLM to design interfaces for each file + sequentially (following file implementation order). This saves LLM calls and avoids + redundant context loading. + + The agent supports iteration: if some files' features are not fully covered after + the first call, it retries with feedback, including already-accepted interfaces + as context. + """ + + def __init__( + self, + llm_client: Optional[LLMClient] = None, + max_iterations: int = 10, + logger: Optional[logging.Logger] = None, + trajectory: Optional[Any] = None, + step_id: Optional[int] = None + ): + if llm_client is None: + self.llm = LLMClient(trajectory=trajectory, step_id=step_id) + else: + self.llm = llm_client + if trajectory is not None: + self.llm.set_trajectory(trajectory, step_id) + self.max_iterations = max_iterations + self.logger = logger or logging.getLogger(__name__) + + def design_subtree_interfaces( + self, + file_nodes: List[Dict[str, Any]], + file_order: List[str], + repo_info: str, + data_flow_str: str, + base_classes_str: str, + upstream_context: str, + dependency_collector: Optional[DependencyCollector] = None, + base_class_files: Optional[Dict[str, str]] = None, + subtree_name: str = "", + ) -> Dict[str, Dict[str, Any]]: + """Design interfaces for all files in a subtree in batched LLM calls. + + Args: + file_nodes: List of file dicts with 'path' and 'feature_paths' + file_order: Ordered list of file paths (implementation dependency order) + repo_info: Repository description + data_flow_str: Formatted data flow string for this subtree + base_classes_str: Formatted base classes and data structures + upstream_context: Context from upstream subtrees + dependency_collector: Optional collector for fine-grained dependencies + base_class_files: Optional mapping of class/type names to file paths + + Returns: + Dict mapping file_path -> result dict with keys: + file_code, units, units_to_features, units_to_code, success + """ + # Build file info lookup + file_info_map = {f["path"]: f for f in file_nodes} + + # Per-file state tracking + # file_path -> {target_features, covered_features, all_interfaces, all_code_blocks} + file_states: Dict[str, Dict[str, Any]] = {} + for file_path in file_order: + if file_path not in file_info_map: + continue + features = file_info_map[file_path].get("feature_paths", []) + if not features: + continue + file_states[file_path] = { + "target_features": set(features), + "covered_features": set(), + "all_interfaces": [], + "all_code_blocks": [], + } + + if not file_states: + self.logger.warning("[SubtreeInterfaceAgent] No files with features to design") + return {} + + # Build system prompt (tool description is now integrated) + system_prompt = SUBTREE_INTERFACE_PROMPT + + last_error = "" + + for iteration in range(self.max_iterations): + # Determine which files still need work + remaining_files = [ + fp for fp in file_order + if fp in file_states + and file_states[fp]["covered_features"] < file_states[fp]["target_features"] + ] + + if not remaining_files: + self.logger.info("[SubtreeInterfaceAgent] All files fully covered") + break + + self.logger.info( + f"[SubtreeInterfaceAgent] Iteration {iteration + 1}/{self.max_iterations}, " + f"{len(remaining_files)} files remaining" + ) + + # Build user prompt + user_prompt = self._build_subtree_user_prompt( + remaining_files=remaining_files, + file_states=file_states, + file_info_map=file_info_map, + repo_info=repo_info, + data_flow_str=data_flow_str, + base_classes_str=base_classes_str, + upstream_context=upstream_context, + last_error=last_error, + ) + + combined_prompt = f"{system_prompt}\n\n{user_prompt}" + + try: + _, result_model, _ = self.llm.call_structured( + system_prompt="", + user_prompt=combined_prompt, + response_model=SubtreeInterfaceOutput, + purpose=f"subtree_interface_design_{subtree_name}_{iteration + 1}", + max_retries=1, + ) + + if not result_model: + last_error = ( + "Failed to parse result_json or Pydantic validation failed. " + "Please use tags with valid JSON " + "matching the design_subtree_interfaces schema." + ) + continue + + # Process each file block from LLM response + last_error = "" + file_errors = [] + + for file_block in result_model.files: + file_path = file_block.file_path + + if file_path not in file_states: + self.logger.warning( + f"[SubtreeInterfaceAgent] Unknown file path from LLM: {file_path}" + ) + continue + + state = file_states[file_path] + target_features = state["target_features"] + covered_features = state["covered_features"] + + # Validate each interface in this file block + for interface in file_block.interfaces: + iface_dict = interface.model_dump() + is_valid, error, info = validate_interface( + iface_dict, target_features, covered_features + ) + + if is_valid: + # Add name from parsed info + if info.get("classes"): + iface_dict["name"] = f"class {info['classes'][0]}" + elif info.get("functions"): + iface_dict["name"] = f"function {info['functions'][0]}" + + iface_dict["parsed_units"] = info.get("units", []) + + state["all_interfaces"].append(iface_dict) + state["all_code_blocks"].append(iface_dict.get("code", "")) + + for feat in iface_dict.get("features", []): + covered_features.add(feat) + + # Collect dependencies + if dependency_collector and base_class_files: + code = iface_dict.get("code", "") + unit_name = iface_dict.get("name", "") + + dependency_collector.analyze_code_dependencies( + code=code, + file_path=file_path, + base_class_files=base_class_files + ) + + llm_deps = iface_dict.get("dependencies") + if llm_deps: + dependency_collector.process_llm_dependencies( + unit_name=unit_name, + dependencies=llm_deps, + file_path=file_path, + base_class_files=base_class_files + ) + + # Update base_class_files so later files can reference + if base_class_files is not None: + name = iface_dict.get("name", "") + parts = name.split(" ", 1) + if len(parts) == 2: + base_class_files[parts[1]] = file_path + else: + self.logger.warning( + f"[SubtreeInterfaceAgent] Validation failed for " + f"{file_path}: {error}" + ) + file_errors.append(f"{file_path}: {error}") + + if file_errors: + last_error = "[Validation Errors]\n" + "\n".join(file_errors) + + except Exception as e: + self.logger.error(f"[SubtreeInterfaceAgent] Error: {e}") + last_error = str(e) + + # Build final results for each file + results: Dict[str, Dict[str, Any]] = {} + all_new_features: List[Dict[str, str]] = [] + + for file_path in file_order: + if file_path not in file_states: + continue + + state = file_states[file_path] + file_result, new_features = self._build_file_result( + file_path=file_path, + all_interfaces=state["all_interfaces"], + all_code_blocks=state["all_code_blocks"], + target_features=state["target_features"], + covered_features=state["covered_features"], + ) + results[file_path] = file_result + all_new_features.extend(new_features) + + # Attach new features to results for caller to process + if all_new_features: + # Store in a special key that will be extracted by the orchestrator + results["__new_features__"] = all_new_features + + return results + + def _build_subtree_user_prompt( + self, + remaining_files: List[str], + file_states: Dict[str, Dict[str, Any]], + file_info_map: Dict[str, Dict[str, Any]], + repo_info: str, + data_flow_str: str, + base_classes_str: str, + upstream_context: str, + last_error: str, + ) -> str: + """Build the user prompt for subtree interface design.""" + # Build file list section + files_section_parts = [] + for i, file_path in enumerate(remaining_files, 1): + state = file_states.get(file_path, {}) + target_features = state.get("target_features", set()) + covered_features = state.get("covered_features", set()) + remaining_features = target_features - covered_features + + features_str = "\n".join([f" - {f}" for f in sorted(remaining_features)]) + files_section_parts.append( + f" {i}. `{file_path}`\n" + f" Features to design:\n{features_str}" + ) + + files_section = "\n\n".join(files_section_parts) + + # Build already-completed context (from files fully or partially done) + completed_parts = [] + for file_path, state in file_states.items(): + if file_path in remaining_files and not state["all_code_blocks"]: + continue # Skip files with nothing completed yet in remaining list + if not state["all_code_blocks"]: + continue + + code_preview = "\n\n".join(state["all_code_blocks"]) + # Truncate if very long + code_lines = code_preview.split("\n") + if len(code_lines) > 40: + code_preview = "\n".join(code_lines[:40]) + "\n# ... (truncated)" + + completed_parts.append( + f"File: `{file_path}` (already designed)\n" + f"```python\n{code_preview}\n```" + ) + + completed_context = ( + "\n\n".join(completed_parts) if completed_parts + else "No files designed yet in this subtree." + ) + + # Assemble user prompt + # Detect import convention from file paths + import_convention = "" + if remaining_files: + # Infer prefix from file paths in this subtree + sample_path = remaining_files[0] + parts = sample_path.replace("\\", "/").split("/") + if len(parts) >= 2 and parts[0] == "src": + prefix = f"src.{parts[1]}" + import_convention = build_import_convention_snippet(prefix=prefix) + + prompt = f"""[Begin Subtree Interface Design] + +Design interfaces for ALL of the following files, in the listed order. +Each file's features must be fully covered. Later files may import from earlier ones. + +{import_convention} +=== Files to Design (in implementation order) === +{files_section} + +=== Global Context === + +--- Repository Info --- +{repo_info} + +--- Data Flow Graph --- +{data_flow_str} + +--- Upstream Context (from other subtrees) --- +{upstream_context} + +--- Already Designed in This Subtree --- +{completed_context} + +--- Available Base Classes & Data Structures --- +{base_classes_str} +""" + + if last_error: + prompt += f"\n\n[Previous Iteration Feedback]: {last_error}" + + # Add overall progress + total_target = sum( + len(file_states[fp]["target_features"]) + for fp in remaining_files if fp in file_states + ) + total_covered = sum( + len(file_states[fp]["covered_features"]) + for fp in remaining_files if fp in file_states + ) + if total_covered > 0: + prompt += ( + f"\n\n[Progress]: {total_covered}/{total_target + total_covered} features " + f"covered across remaining files. " + f"Please cover all remaining features." + ) + + return prompt + + @staticmethod + def _build_file_result( + file_path: str, + all_interfaces: List[Dict[str, Any]], + all_code_blocks: List[str], + target_features: Set[str], + covered_features: Set[str], + ) -> Tuple[Dict[str, Any], List[Dict[str, str]]]: + """Build the result dict for a single file (compatible with InterfaceAgent output). + + Returns: + Tuple of (file_result_dict, new_features_list) + where new_features_list contains dicts with feature_path, unit_name, file_path + """ + final_code = "\n\n".join(all_code_blocks) if all_code_blocks else "" + success = covered_features >= target_features + + units = [] + units_to_features = {} + units_to_code = {} + new_features_list = [] # Collect new features for top-level reporting + + for interface in all_interfaces: + interface_name = interface.get("name", "") + if not interface_name: + continue + + features = interface.get("features", []) + parsed_units = interface.get("parsed_units", []) + + # Identify new features (those not in target_features) + new_features = [f for f in features if f not in target_features] + + if parsed_units: + for unit in parsed_units: + unit_key = f"{unit.unit_type} {unit.name}" + if unit_key not in units: + units.append(unit_key) + units_to_features[unit_key] = features + # Track new features + for nf in new_features: + new_features_list.append({ + "feature_path": nf, + "unit_name": unit_key, + "file_path": file_path, + }) + try: + _, unit_code = unit.count_lines(original=True, return_code=True) + units_to_code[unit_key] = unit_code + except Exception: + units_to_code[unit_key] = interface.get("code", "") + else: + if interface_name not in units: + units.append(interface_name) + units_to_features[interface_name] = features + # Track new features + for nf in new_features: + new_features_list.append({ + "feature_path": nf, + "unit_name": interface_name, + "file_path": file_path, + }) + units_to_code[interface_name] = interface.get("code", "") + + result = { + "file_path": file_path, + "file_code": final_code, + "units": units, + "units_to_features": units_to_features, + "units_to_code": units_to_code, + "success": success, + } + + return result, new_features_list + + +# ============================================================================ +# Interface Orchestrator (Full Workflow) +# ============================================================================ + +class InterfaceOrchestrator: + """Orchestrates interface design across all subtrees and files.""" + + def __init__( + self, + llm_client: Optional[LLMClient] = None, + max_file_iterations: int = 10, + max_planning_retries: int = 3, + logger: Optional[logging.Logger] = None, + trajectory: Optional[Any] = None, + step_id: Optional[int] = None, + output_path: Optional[str] = None + ): + # Create LLMClient with trajectory support if not provided + if llm_client is None: + self.llm = LLMClient(trajectory=trajectory, step_id=step_id) + else: + self.llm = llm_client + # Update trajectory info on existing client + if trajectory is not None: + self.llm.set_trajectory(trajectory, step_id) + self.max_file_iterations = max_file_iterations + self.max_planning_retries = max_planning_retries + self.logger = logger or logging.getLogger(__name__) + self.trajectory = trajectory + self.step_id = step_id + self.output_path = output_path + + def design_all_interfaces( + self, + skeleton: Dict[str, Any], + data_flow: Dict[str, Any], + base_classes: List[Dict[str, Any]], + repo_info: str, + dependency_collector: Optional[DependencyCollector] = None, + data_structures: Optional[List[Dict[str, Any]]] = None + ) -> Dict[str, Any]: + """Design interfaces for all files in the skeleton. + + Args: + skeleton: The skeleton.json data + data_flow: The data_flow.json data + base_classes: List of base class definitions + repo_info: Repository description + dependency_collector: Optional collector for fine-grained dependencies + data_structures: Optional list of data flow data structure definitions + + Returns: + Dict with all interfaces organized by subtree + """ + # Get subtree order from data flow + subtree_order = data_flow.get("subtree_order", []) + data_flow_edges = data_flow.get("data_flow", []) + + # If no subtree order, extract from skeleton + if not subtree_order: + subtree_order = self._extract_subtree_names(skeleton) + + self.logger.info(f"[InterfaceOrchestrator] Processing {len(subtree_order)} subtrees") + self.logger.info(f"[InterfaceOrchestrator] Subtree order: {subtree_order}") + + # Format base classes and data structures together for prompt context + base_classes_str = format_base_classes_and_data_structures( + base_classes, data_structures or [] + ) + + # Build base_class_files mapping for dependency analysis + # Include both base_classes and data_structures + base_class_files = self._build_base_class_files_mapping( + base_classes, data_structures=data_structures + ) + + # --- Initialize GlobalInterfaceRegistry --- + global_registry = GlobalInterfaceRegistry() + + # Track state across subtrees + all_interfaces = {} + implemented_subtrees = {} # subtree -> list of implemented file info + all_import_warnings = [] # collect import cross-validation warnings + all_new_features = [] # collect new features created across all subtrees + + # Process each subtree + for subtree_name in subtree_order: + self.logger.info(f"[InterfaceOrchestrator] Processing subtree: {subtree_name}") + + # Find files for this subtree + file_nodes = self._find_files_for_subtree(skeleton, subtree_name) + if not file_nodes: + self.logger.warning(f"No files found for subtree: {subtree_name}") + continue + + self.logger.info(f"[InterfaceOrchestrator] Found {len(file_nodes)} files for {subtree_name}") + + # --- Merge global registry symbols into base_class_files --- + # This allows DependencyCollector to resolve cross-subtree callees + global_symbols = global_registry.get_all_public_symbols() + for symbol_name, symbol_file in global_symbols.items(): + if symbol_name not in base_class_files: + base_class_files[symbol_name] = symbol_file + + # Plan file order + file_order = self._plan_file_order(file_nodes, repo_info, subtree_name=subtree_name) + + # Build context once for the whole subtree + filtered_data_flow_str = self._filter_data_flow_for_subtree( + data_flow_edges, subtree_name + ) + + # --- Enhanced upstream context with structured interface listings --- + upstream_context = self._build_upstream_context_for_subtree( + data_flow_edges, subtree_name, implemented_subtrees, + global_registry=global_registry + ) + + # Design all files in this subtree in a single LLM session + agent = SubtreeInterfaceAgent( + llm_client=self.llm, + max_iterations=self.max_file_iterations, + logger=self.logger + ) + + file_results = agent.design_subtree_interfaces( + file_nodes=file_nodes, + file_order=file_order, + repo_info=repo_info, + data_flow_str=filtered_data_flow_str, + base_classes_str=base_classes_str, + upstream_context=upstream_context, + dependency_collector=dependency_collector, + base_class_files=base_class_files, + subtree_name=subtree_name, + ) + + # Extract new features from this subtree + subtree_new_features = file_results.pop("__new_features__", []) + for nf in subtree_new_features: + nf["subtree"] = subtree_name + all_new_features.extend(subtree_new_features) + + # Process results for each file + subtree_implemented = [] + subtree_interfaces = {} + + for file_path in file_order: + result = file_results.get(file_path) + if not result: + continue + + # Store interface data + subtree_interfaces[file_path] = { + "file_code": result.get("file_code", ""), + "units": result.get("units", []), + "units_to_features": result.get("units_to_features", {}), + "units_to_code": result.get("units_to_code", {}) + } + + file_node = next((f for f in file_nodes if f["path"] == file_path), None) + file_features = file_node.get("feature_paths", []) if file_node else [] + + if result.get("success"): + subtree_implemented.append({ + "path": file_path, + "features": file_features, + "code": result.get("file_code", ""), + "units": result.get("units", []), + "units_to_features": result.get("units_to_features", {}) + }) + self.logger.info(f"[InterfaceOrchestrator] [OK] Completed {file_path}") + else: + self.logger.warning(f"[InterfaceOrchestrator] [FAIL] Failed {file_path}") + + # --- A1: Register completed subtree interfaces to GlobalInterfaceRegistry --- + global_registry.register_from_subtree_result(subtree_name, subtree_interfaces) + self.logger.info( + f"[InterfaceOrchestrator] Registered {len(subtree_interfaces)} files " + f"from '{subtree_name}' to GlobalInterfaceRegistry " + f"(total symbols: {len(global_registry.get_all_public_symbols())})" + ) + + # --- A2: Import cross-validation for this subtree --- + for file_path, file_data in subtree_interfaces.items(): + file_code = file_data.get("file_code", "") + # Collect declared calls from dependency_collector for this file + declared_calls = set() + if dependency_collector: + for edge in dependency_collector.invocation_edges: + if edge.get("caller_file") == file_path: + declared_calls.add(edge.get("callee", "")) + + warnings = cross_validate_imports_vs_calls( + code=file_code, + file_path=file_path, + declared_calls=list(declared_calls), + global_registry=global_registry + ) + if warnings: + all_import_warnings.extend(warnings) + for w in warnings: + self.logger.info( + f"[ImportValidation] {w['message']}" + ) + + # Store subtree results + all_interfaces[subtree_name] = { + "files_order": file_order, + "interfaces": subtree_interfaces + } + implemented_subtrees[subtree_name] = subtree_implemented + + # Save after each subtree + self._save_interfaces( + self._build_result(all_interfaces, subtree_order, implemented_subtrees) + ) + + # Compile final result + final_result = self._build_result(all_interfaces, subtree_order, implemented_subtrees) + + # Store import warnings and global registry in result for downstream use + final_result["_import_warnings"] = all_import_warnings + final_result["_global_registry"] = global_registry + + # Store new features for output and RPG update + if all_new_features: + final_result["new_features"] = all_new_features + self.logger.info( + f"[InterfaceOrchestrator] Created {len(all_new_features)} new features " + f"for glue/orchestration code" + ) + + self._save_interfaces(final_result) + return final_result + + def _build_result( + self, + all_interfaces: Dict[str, Any], + subtree_order: List[str], + implemented_subtrees: Dict[str, List[Dict[str, Any]]] + ) -> Dict[str, Any]: + """Build the result dict from current state.""" + return { + "subtrees": all_interfaces, + "subtree_order": subtree_order, + "implemented_subtrees": { + st: [f["path"] for f in files] + for st, files in implemented_subtrees.items() + }, + "success": True + } + + def _save_interfaces(self, result: Dict[str, Any]) -> None: + """Save current interfaces result to output_path (if configured). + + Strips internal keys (prefixed with '_') that contain non-serializable + objects before writing to JSON. + """ + if not self.output_path: + return + try: + output = Path(self.output_path) + output.parent.mkdir(parents=True, exist_ok=True) + # Filter out non-serializable internal keys + serializable = { + k: v for k, v in result.items() + if not k.startswith("_") + } + with open(output, "w", encoding="utf-8") as f: + json.dump(serializable, f, indent=2, ensure_ascii=False) + self.logger.info(f"[InterfaceOrchestrator] Saved interfaces to {output}") + except Exception as e: + self.logger.warning(f"[InterfaceOrchestrator] Failed to save interfaces: {e}") + + def _build_base_class_files_mapping( + self, + base_classes: List[Dict[str, Any]], + data_structures: Optional[List[Dict[str, Any]]] = None + ) -> Dict[str, str]: + """Build a mapping from class/type names to their file paths. + + Args: + base_classes: List of base class definitions from base_classes.json + data_structures: Optional list of data structure definitions + (only entries with file_path assigned are included) + + Returns: + Dict mapping class/type names to file paths + """ + mapping = {} + + # Process base classes + for bc in base_classes: + file_path = bc.get("file_path", "") + code = bc.get("code", "") + + if not file_path or not code: + continue + + # Parse code to extract class and type names + try: + tree = ast.parse(code) + for node in ast.walk(tree): + if isinstance(node, ast.ClassDef): + mapping[node.name] = file_path + elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): + # Top-level functions might be utilities + mapping[node.name] = file_path + except SyntaxError: + continue + + # Process data structures (only those with file_path already assigned) + if data_structures: + for ds in data_structures: + file_path = ds.get("file_path", "") + code = ds.get("code", "") + + if not file_path or not code: + continue + + try: + tree = ast.parse(code) + for node in ast.walk(tree): + if isinstance(node, ast.ClassDef): + mapping[node.name] = file_path + except SyntaxError: + continue + + # Also map data_flow_types names to file paths + for dt_name in ds.get("data_flow_types", []): + if dt_name not in mapping: + mapping[dt_name] = file_path + + return mapping + + def _extract_subtree_names(self, skeleton: Dict[str, Any]) -> List[str]: + """Extract subtree/component names from skeleton.""" + components = set() + + def traverse(node): + if node.get("type") == "file": + for fp in node.get("feature_paths", []): + if "/" in fp: + components.add(fp.split("/")[0]) + elif node.get("type") == "directory": + for child in node.get("children", []): + traverse(child) + + root = skeleton.get("root", skeleton) + traverse(root) + return sorted(list(components)) + + def _find_files_for_subtree( + self, + skeleton: Dict[str, Any], + subtree_name: str + ) -> List[Dict[str, Any]]: + """Find all files belonging to a subtree.""" + files = [] + + def traverse(node): + if node.get("type") == "file": + feature_paths = node.get("feature_paths", []) + # Check if any feature path belongs to this subtree + for fp in feature_paths: + if fp.startswith(subtree_name + "/") or fp == subtree_name: + files.append({ + "path": node.get("path", ""), + "feature_paths": feature_paths + }) + break + elif node.get("type") == "directory": + for child in node.get("children", []): + traverse(child) + + root = skeleton.get("root", skeleton) + traverse(root) + return files + + def _plan_file_order( + self, + file_nodes: List[Dict[str, Any]], + repo_info: str, + subtree_name: str, + ) -> List[str]: + """Plan the implementation order for files.""" + file_paths = [f["path"] for f in file_nodes] + + if len(file_paths) <= 1: + return file_paths + + # Build files info for planning prompt + files_info = {} + for node in file_nodes: + files_info[node["path"]] = node.get("feature_paths", []) + + files_to_planned = "" + for path, features in files_info.items(): + feature_str = "\n - ".join(features) if features else "(no features)" + files_to_planned += f"- {path}:\n - {feature_str}\n\n" + + # Build planning prompt + prompt = PLAN_FILE_PROMPT.format( + repo_info=repo_info, + trees_info="(Feature tree omitted for brevity)", + files_to_planned=files_to_planned + ) + + # Try to get valid graph + for attempt in range(self.max_planning_retries): + try: + response = self.llm.generate(prompt, purpose=f"plan_file_order_{subtree_name}_{attempt + 1}") + parsed = self.llm.parse_json_block(response) + + if not parsed: + continue + + graph = parsed.get("file_implementation_graph", []) + feedback, is_valid = validate_file_implementation_graph(graph, file_paths) + + if is_valid: + order = topo_sort_file_graph(graph) + if order: + return order + + except Exception as e: + self.logger.warning(f"File planning attempt {attempt + 1} failed: {e}") + + # Fallback: return files in original order + self.logger.warning("Using fallback file order (no planning)") + return file_paths + + def _filter_data_flow_for_file( + self, + data_flow_edges: List[Dict[str, Any]], + current_subtree: str, + file_path: str + ) -> str: + """Filter data flow edges to only include those related to current file. + + Includes edges where: + - source or target is the current subtree + - edges directly connected to the current subtree's neighbors + """ + if not data_flow_edges: + return "No data flow defined." + + # Find subtrees directly connected to current subtree + connected_subtrees = {current_subtree} + for edge in data_flow_edges: + source = edge.get("source", "") + target = edge.get("target", "") + if source == current_subtree: + connected_subtrees.add(target) + if target == current_subtree: + connected_subtrees.add(source) + + # Filter edges that involve connected subtrees + filtered_edges = [] + for edge in data_flow_edges: + source = edge.get("source", "") + target = edge.get("target", "") + if source in connected_subtrees or target in connected_subtrees: + filtered_edges.append(edge) + + if not filtered_edges: + return "No related data flow for this file." + + return format_data_flow_edges(filtered_edges) + + def _filter_data_flow_for_subtree( + self, + data_flow_edges: List[Dict[str, Any]], + current_subtree: str + ) -> str: + """Filter data flow edges to include those related to the current subtree. + + Includes edges where: + - source or target is the current subtree + - edges between subtrees directly connected to current subtree + """ + if not data_flow_edges: + return "No data flow defined." + + # Find subtrees directly connected to current subtree + connected_subtrees = {current_subtree} + for edge in data_flow_edges: + source = edge.get("source", "") + target = edge.get("target", "") + if source == current_subtree: + connected_subtrees.add(target) + if target == current_subtree: + connected_subtrees.add(source) + + # Filter edges that involve connected subtrees + filtered_edges = [] + for edge in data_flow_edges: + source = edge.get("source", "") + target = edge.get("target", "") + if source in connected_subtrees or target in connected_subtrees: + filtered_edges.append(edge) + + if not filtered_edges: + return "No related data flow for this subtree." + + return format_data_flow_edges(filtered_edges) + + def _build_upstream_context_for_subtree( + self, + data_flow_edges: List[Dict[str, Any]], + current_subtree: str, + implemented_subtrees: Dict[str, List[Dict[str, Any]]], + top_n: int = 30, + global_registry: Optional[GlobalInterfaceRegistry] = None + ) -> str: + """Build context from upstream subtrees for an entire subtree. + + Unlike _build_upstream_context (per-file), this does NOT include + within-subtree files since all files are being designed together. + Only includes files from upstream (connected) subtrees. + + When global_registry is provided (A3 enhancement), prepends a + structured interface listing before the code snippets, giving the + LLM a clear, concise view of all callable interfaces. + """ + # Find connected subtrees + upstream_names = set() + for edge in data_flow_edges: + if edge.get("target") == current_subtree: + upstream_names.add(edge.get("source", "")) + if edge.get("source") == current_subtree: + upstream_names.add(edge.get("target", "")) + + context_parts = [] + + # --- A3: Structured interface listing (prepended) --- + if global_registry and upstream_names: + structured_listing = global_registry.get_all_structured_listings_for_upstream( + upstream_names + ) + if structured_listing and structured_listing != "No upstream interfaces available.": + context_parts.append( + "=== Available Interfaces from Upstream Subtrees ===\n" + "(You can import and call these interfaces in your designs)\n\n" + f"{structured_listing}\n" + ) + + # --- Integration directives from data flow --- + inbound_edges = [ + e for e in data_flow_edges if e.get("target") == current_subtree + ] + outbound_edges = [ + e for e in data_flow_edges if e.get("source") == current_subtree + ] + + if inbound_edges or outbound_edges: + directive_parts = [ + f'=== Integration Contracts for "{current_subtree}" ===', + "Your subtree has the following data flow contracts.", + "Design your interfaces to fulfill these contracts.\n", + ] + + if inbound_edges: + directive_parts.append("INBOUND (your subtree must consume):") + for edge in inbound_edges: + source = edge.get("source", "?") + data_type = edge.get("data_type", "?") + transformation = edge.get("transformation", "") + line = f' - {data_type} from "{source}"' + # Try to find the producing interface in global_registry + if global_registry: + for fp, unit_list in global_registry.file_units.items(): + found = False + for ui in unit_list: + if (ui.get("subtree_name") == source + and data_type + in ui.get("signature_summary", "")): + line += ( + f"\n Produced by: {ui['unit_type']} " + f"{ui['bare_name']} in {fp}" + ) + found = True + break + if found: + break + if transformation: + line += f"\n Context: {transformation}" + directive_parts.append(line) + directive_parts.append( + " \u2192 Design at least one interface that " + "accepts/imports the above data.\n" + ) + + if outbound_edges: + directive_parts.append("OUTBOUND (your subtree must produce):") + for edge in outbound_edges: + target = edge.get("target", "?") + data_type = edge.get("data_type", "?") + transformation = edge.get("transformation", "") + line = f' - {data_type} to "{target}"' + if transformation: + line += f"\n Context: {transformation}" + directive_parts.append(line) + directive_parts.append( + " \u2192 Design at least one interface that " + "produces/returns the above data.\n" + ) + + context_parts.append("\n".join(directive_parts) + "\n") + + # --- Original code snippet context --- + included_paths = set() + code_parts = [] + + for upstream in sorted(upstream_names): + impl_files = implemented_subtrees.get(upstream, []) + if not impl_files: + continue + + for file_info in impl_files: + if len(code_parts) >= top_n: + break + + path = file_info.get("path", "") + if path in included_paths: + continue + + included_paths.add(path) + features = ", ".join(file_info.get("features", [])[:5]) + code = file_info.get("code", "") + code_lines = code.split("\n")[:30] + code_skeleton = "\n".join(code_lines) + + code_parts.append( + f"### From module: `{upstream}`\n" + f"File: `{path}`\n" + f"Features: {features}\n" + f"```python\n{code_skeleton}\n```\n" + ) + + if code_parts: + context_parts.extend(code_parts[:top_n]) + + if not context_parts: + return "No upstream modules connected to this subtree." + + return "\n".join(context_parts) + + def _build_upstream_context( + self, + data_flow_edges: List[Dict[str, Any]], + current_subtree: str, + implemented_subtrees: Dict[str, List[Dict[str, Any]]], + top_n: int = 20, + file_path: Optional[str] = None, + subtree_implemented: Optional[List[Dict[str, Any]]] = None + ) -> str: + """Build context from upstream subtrees, filtered for relevance. + + Includes: + 1. Files in the same directory as current file + 2. Files with edges connected to current file's units + """ + # Get current file's directory + current_dir = str(Path(file_path).parent) if file_path else "" + + # Find upstream subtrees + upstream_names = set() + for edge in data_flow_edges: + if edge.get("target") == current_subtree: + upstream_names.add(edge.get("source", "")) + if edge.get("source") == current_subtree: + upstream_names.add(edge.get("target", "")) + + context_parts = [] + included_paths = set() + + # 1. First add files from same directory in current subtree + if subtree_implemented and current_dir: + for file_info in subtree_implemented: + path = file_info.get("path", "") + if path in included_paths: + continue + file_dir = str(Path(path).parent) + if file_dir == current_dir: + included_paths.add(path) + features = ", ".join(file_info.get("features", [])[:5]) + code = file_info.get("code", "") + code_lines = code.split("\n")[:30] + code_skeleton = "\n".join(code_lines) + + context_parts.append( + f"### Same directory: `{current_dir}`\n" + f"File: `{path}`\n" + f"Features: {features}\n" + f"```python\n{code_skeleton}\n```\n" + ) + if len(context_parts) >= top_n: + break + + # 2. Add files from upstream subtrees + if upstream_names and len(context_parts) < top_n: + for upstream in sorted(upstream_names): + impl_files = implemented_subtrees.get(upstream, []) + if not impl_files: + continue + + for file_info in impl_files: + if len(context_parts) >= top_n: + break + + path = file_info.get("path", "") + if path in included_paths: + continue + + included_paths.add(path) + features = ", ".join(file_info.get("features", [])[:5]) + code = file_info.get("code", "") + code_lines = code.split("\n")[:30] + code_skeleton = "\n".join(code_lines) + + context_parts.append( + f"### From module: `{upstream}`\n" + f"File: `{path}`\n" + f"Features: {features}\n" + f"```python\n{code_skeleton}\n```\n" + ) + + if not context_parts: + return "No upstream modules connected to this subtree." + + return "\n".join(context_parts[:top_n]) + + def _build_implemented_summary( + self, + implemented_files: List[Dict[str, Any]], + file_path: Optional[str] = None, + top_n: int = 20 + ) -> str: + """Build summary of implemented files in current subtree. + + Prioritizes files in the same directory as the current file. + """ + if not implemented_files: + return "No files implemented yet in this subtree." + + # Get current file's directory + current_dir = str(Path(file_path).parent) if file_path else "" + + # Separate files: same directory first, then others + same_dir_files = [] + other_files = [] + + for file_info in implemented_files: + path = file_info.get("path", "") + file_dir = str(Path(path).parent) + if current_dir and file_dir == current_dir: + same_dir_files.append(file_info) + else: + other_files.append(file_info) + + # Prioritize same directory files, then add others up to top_n + prioritized_files = same_dir_files + other_files + selected_files = prioritized_files[-top_n:] # Take last top_n (most recent) + + parts = [] + for file_info in selected_files: + path = file_info.get("path", "") + features = file_info.get("features", [])[:5] + features_str = ", ".join(features) + + code = file_info.get("code", "") + code_lines = code.split("\n")[:20] + code_skeleton = "\n".join(code_lines) + + # Mark if same directory + dir_marker = " (same dir)" if current_dir and str(Path(path).parent) == current_dir else "" + + parts.append( + f"#### Implemented File: `{path}`{dir_marker}\n" + f"Features: {features_str}\n" + f"```python\n{code_skeleton}\n```\n" + ) + + return "\n".join(parts) + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + # Test + agent = InterfaceAgent() + result = agent.design_file_interface( + file_path="src/module/file.py", + file_features=["module/feature1", "module/feature2"], + repo_info="A test repository", + data_flow_str="A -> B: Data", + base_classes_str="No base classes", + upstream_context="No upstream context", + implemented_summary="No implemented files" + ) + print(json.dumps({k: v for k, v in result.items() if k != "code"}, indent=2)) diff --git a/RPG-Kit/scripts/func_design/interface_prompts.py b/RPG-Kit/scripts/func_design/interface_prompts.py new file mode 100644 index 0000000..a057002 --- /dev/null +++ b/RPG-Kit/scripts/func_design/interface_prompts.py @@ -0,0 +1,370 @@ +#!/usr/bin/env python3 +"""Interface Design Prompts. + +This module contains prompts for interface design stage. +""" + +# ============================================================================ +# Interface Design Prompts +# ============================================================================ + +INTERFACE_PROMPT = """ +You are designing interfaces (functions or classes) for a large, production-oriented Python repository. +The goal is not to write arbitrary APIs, but to define interfaces that integrate cleanly into the repository's architecture, respect existing data flows, and follow established conventions for modules, base classes, and shared data structures. + +## Objective +For each invocation: +1. Select exactly one assigned feature, or a small group of closely related features. +2. Define exactly one public interface for it (either a function or a class). +3. Provide the following elements: + - All required imports: + - standard library imports + - external dependency imports + - internal project imports + - The interface definition: + - function or class signature only + - no implementation logic (function and method bodies must contain only `pass`) + - A precise docstring documenting: + - purpose and intended usage context within the repository + - parameters, including names, types, and semantics + - return type and meaning + - assumptions, constraints, error conditions, and edge cases +4. Do not generate placeholder logic or pseudo-implementation. Only define signatures and `pass`. +5. Interface design is incremental. Each round may define one or a small number of interfaces, but each must be self-contained and justified. + +## Repository Context and Constraints +All interfaces must: +1. Align with the repository's data flow patterns. +2. Use existing shared data structures or typed DTOs where applicable, instead of inventing new ad hoc structures. +3. Inherit from existing base classes when the feature conceptually fits into existing extensibility hierarchies. +4. Call or integrate with internal utility components when appropriate rather than duplicating behavior. +5. Avoid speculative abstractions that are unrelated to the repository's direction. + +Interfaces should feel like natural extensions of the repository, not isolated standalone utilities. +## Function vs Class Decision Rules +A function is appropriate when: +- the operation is conceptually a single computation or transformation, +- the logic is stateless, +- configuration is provided entirely by parameters, +- the operation does not manage lifecycle or persistent state. +- Helper functions are permitted, but only when they clearly support higher-level components rather than replacing them. + +A class is appropriate when: +- configuration persists across multiple calls, +- internal state influences behavior, +- multiple related operations belong together, +- subclassing, strategy replacement, or pluggable behavior is expected. + +## Cohesion, Scope, and Grouping +1. Interfaces must correspond to a single coherent responsibility. +2. Do not merge unrelated features merely to reduce the number of interfaces. +3. Group features only when they share state, configuration, contract expectations, or lifecycle. +4. If an interface cannot be described in one clear sentence, it likely needs to be split. + +## Type Requirements +1. Type annotations are mandatory for all parameters and return values. +2. The type `Any` is strictly prohibited. +3. Use one of the following instead when uncertainty exists: + - concrete application data structures already defined in the repository, + - generic types such as `Optional[T]`, `Union[T1, T2]`, `Mapping[...]`, `Iterable[...]`, + - well-defined protocol or typed base class. + +Types must be meaningful, stable, and reflect real usage. + +## Architectural Fit +Design each interface so that it: +1. can be extended without breaking current callers, +2. avoids unnecessary coupling between unrelated modules, +3. does not bypass existing architectural layers, +4. uses names consistent with repository modules, packages, and conventions. + +Prefer explicit, predictable contracts over overly generic APIs. + +## Integration & Wiring Requirements +Every interface must have a clear integration story within the repository's call graph. + +1. **Caller/callee awareness**: For each interface, consider: + - WHO will call this interface? (another interface in this file, an upstream/downstream module, or external invocation) + - WHAT does this interface call? (other interfaces from upstream context or base classes) +2. **No islands**: An interface that is never called by anyone AND calls nothing is dead code. The only exceptions are top-level interfaces โ€” units not expected to be called by other internal modules (e.g., application entry points, standalone submodules, externally-invoked APIs, framework callbacks). +3. **Upstream integration**: When upstream context provides interfaces that produce data you need, import and use them (or accept their output types as parameters). Don't redefine what already exists upstream. +4. **Execution path test**: For each interface, ask: "Can I trace a plausible execution path from a top-level orchestrator to this code?" If not, the interface is likely orphaned. + +## Action Space +For each interface, you MUST declare its dependencies: +- **inherits_from**: List of base class names this class inherits from (from base_classes or upstream modules). Empty list [] if none. +- **calls**: List of function/method names this interface expects to call during execution. Empty list [] if none. +- **uses_types**: List of type names used in parameters, return values, or internally (from base_classes or data structures). Empty list [] if none. + +## Output Format +Your response must contain exactly one block and exactly one block: + +For each interface you design, reason through: +1. Evaluate alternative interface shapes and justify your decision against the repository architecture. +2. WHO will call this interface? Name the specific caller, or explain why it is a top-level interface with no internal caller. +3. WHAT does it call from upstream context or base classes? Name specific interfaces you will import. +4. Do the input/output types align with the data flow contracts? + + +{{ + "interfaces": [ + {{ + "features": [ + "fully/qualified/feature/path_1", + "fully/qualified/feature/path_2" + ], + "code": "Python code string with imports, class/function signature, docstring, and pass body", + "dependencies": {{ + "inherits_from": ["BaseClassName"], + "calls": ["function_or_method_name"], + "uses_types": ["TypeName"] + }} + }} + ] +}} + + +Constraints: +- One interface per code string, covering one feature or a tight group of related features. +- The code must define either one top-level function OR one top-level class (with zero or more methods). +- All function/method bodies must use `pass`. +- Public functions and classes must have docstrings. +- Prefer explicit, custom containers and typed structures; do not use pandas.DataFrame or other third-party tabular types. +""".strip() + + +# ============================================================================ +# Subtree-Level Interface Design Prompts +# ============================================================================ + +SUBTREE_INTERFACE_PROMPT = """ +You are designing interfaces (functions or classes) for a large, production-oriented Python repository. +The goal is to define interfaces that integrate cleanly into the repository's architecture, respect existing data flows, and follow established conventions. + +## Objective +You are given files within the same subtree (functional area), listed in implementation dependency order. +Design interfaces for **ALL** files, processing them **sequentially** in the given order. +Later files may depend on and reference interfaces from earlier files in this batch. + +For each file: +1. Cover ALL assigned feature paths โ€” no feature left uncovered. +2. Each interface covers one feature or a small group of closely related features. +3. For each interface, provide: + - Required imports (standard library, external, internal project) + - The interface definition: function or class signature only, with `pass` bodies (no implementation logic) + - A docstring covering: purpose, parameters with types and semantics, return type, and notable constraints or edge cases +4. You MAY import and reuse symbols from upstream context, base classes, and earlier files in this batch. +5. **Glue/Orchestration Code**: If you need to create orchestrator classes, manager facades, or data structures that integrate multiple features but don't map to any assigned feature, you MAY create NEW feature paths for them. Simply include these new feature paths in the `features` field. New feature paths should follow the same naming convention as existing ones (e.g., "Subtree Name/category/feature name"). + +## Design Guidelines +### Function vs Class +Use a **function** for stateless, single-operation computations where all configuration is provided by parameters. +Use a **class** when state persists across calls, multiple related operations belong together, or subclassing/pluggable behavior is expected. + +### Cohesion and Grouping +- Each interface must correspond to a single coherent responsibility. +- Group features only when they share state, configuration, or lifecycle โ€” not merely to reduce count. +- If an interface cannot be described in one sentence, it likely needs to be split. + +### Type Annotations +- Type annotations are mandatory for all parameters and return values. +- `Any` is strictly prohibited. Use concrete project-defined types, generics (`Optional[T]`, `Union[T1, T2]`, `Mapping[...]`, etc.), or protocol/base classes instead. + +## Repository Constraints +All interfaces must: +1. Align with the repository's data flow patterns and use existing shared data structures or typed DTOs. +2. Inherit from existing base classes and integrate with internal utilities when appropriate. +3. Be extensible without breaking callers, avoid unnecessary coupling, and respect architectural layers. +4. Use names consistent with repository conventions. + +## Integration & Wiring Requirements +Every interface must have a clear integration story within the repository's call graph. + +1. **Caller/callee awareness**: For each interface, identify: + - WHO will call it? (a specific interface in this subtree, an upstream caller, or external invocation) + - WHAT does it call? (other interfaces in this subtree, or upstream interfaces shown in context) +2. **No islands**: An interface that is never called by anyone AND calls nothing is dead code. The only exceptions are top-level interfaces โ€” units not expected to be called by other internal modules (e.g., application entry points, standalone submodules, externally-invoked APIs, framework callbacks). +3. **Explicit call chains**: Later files SHOULD import and call interfaces from earlier files. Files should form a connected call graph, not independent modules. +4. **Upstream integration**: When upstream context provides interfaces producing data your subtree needs, import and use them. Don't redefine what already exists upstream. +5. **Execution path test**: For each interface, ask: "Can I trace a plausible execution path from a top-level orchestrator to this code?" If not, the interface is likely orphaned. + +## Dependencies Field (Required) +For each interface, you MUST declare its dependencies: +- **inherits_from**: List of base class names this class inherits from (from base_classes, upstream modules, or earlier files in this batch). Empty list [] if none. +- **calls**: List of function/method names this interface expects to call during execution. Empty list [] if none. +- **uses_types**: List of type names used in parameters, return values, or internally (from base_classes, data structures, or earlier files). Empty list [] if none. + +## Output Format +Your response must contain exactly one block and exactly one block: + +For each file in order, reason through: +1. What interfaces are needed to cover all assigned features? +2. For EACH interface, explicitly identify: + a. WHO calls it? Name the specific caller (file + class/function), or explain why it is a top-level interface with no internal caller. + b. WHAT upstream or sibling interfaces does it call? Name them by file and name. + c. What data types flow in and out? Do they match the data flow contracts? +3. How does this file connect to earlier files in this batch? Describe the call chain. +4. If you cannot identify a caller for an interface, reconsider whether it should be standalone or merged into another interface that already has a clear caller. + + +{{ + "files": [ + {{ + "file_path": "src/module/file1.py", + "interfaces": [ + {{ + "features": ["fully/qualified/feature/path_1", "fully/qualified/feature/path_2"], + "code": "Python code string with imports, class/function signature, docstring, and pass body", + "dependencies": {{ + "inherits_from": ["BaseClassName"], + "calls": ["function_or_method_name"], + "uses_types": ["TypeName"] + }} + }} + ] + }} + ] +}} + + +Constraints: +- file_path must match exactly one of the file paths specified in the task. +- One interface per code string: either one top-level function OR one top-level class. +- All function/method bodies must use `pass`. +- Public functions and classes must have docstrings. +- For most interfaces, use the assigned feature paths from the task. +- For glue/orchestration code that doesn't map to any assigned feature, you may create NEW feature paths following the naming convention: "Subtree Name/category/feature name". +""".strip() + + +# ============================================================================ +# File Order Planning Prompt +# ============================================================================ + +PLAN_FILE_PROMPT = """ +You are an expert software architect assisting in planning feature implementation within a Python codebase. + +Your task is to construct an **implementation dependency graph** across a set of files that collectively realize a functional subtree of the system. +Each file corresponds to one or more feature paths. These features may have logical dependencies derived from the feature hierarchy and standard software layering principles. + +## Repository Context +### High-Level Repository Description +{repo_info} + +### Abstract Feature Tree (Omitting Low-Level Detail) +{trees_info} + +### Files to be planned +{files_to_planned} + +## Planning Guidelines +You must output a **directed acyclic graph (DAG)** over the given file paths, where: +- Each node represents a file (specified as a file path string). +- An edge from A to B means **file A must be implemented before file B**. +- The graph must include **all provided file paths** โ€” do not invent or omit file names. +- The graph must **not contain cycles**. +- Favor bottom-up ordering, respecting typical architecture layering + (e.g., utilities before logic, logic before interface layers). + +## Output Format (Strict Requirement) +You must output **only** the graph in the following exact JSON structure โ€” no explanations, no commentary, no formatting text: +{{ + "file_implementation_graph": [ + {{"from": "path/to/file1.py", "to": "path/to/file2.py"}}, + {{"from": "path/to/file2.py", "to": "path/to/file3.py"}} + ] +}} + +### Strict structural rules: +1. The top-level object must contain **exactly one key**: `"file_implementation_graph"`. +2. `"file_implementation_graph"` must be a JSON array. +3. Each element of the array must be an object with **exactly two fields**: + - `"from"` : a string equal to one of the provided file paths + - `"to"` : a string equal to one of the provided file paths +4. No other keys or fields are permitted. +5. No file path may appear that was not provided in the input. +6. The JSON must be valid and parseable โ€” **no trailing commas**, no comments, no text outside the JSON. +7. The graph must be a **DAG**: no cycles, no self-loops (`"from": X, "to": X"`), no implicit cycles. + +## Notice +- Your output must be **only** the JSON object matching the required structure. +- If dependencies are unclear, choose the most reasonable bottom-up ordering โ€” but still obey DAG constraints. +- Do not wrap the JSON in markdown (no ```json or ```). +""" + + +# ============================================================================ +# Orphan Unit Review Prompt +# ============================================================================ + +ORPHAN_REVIEW_PROMPT = """ +You are reviewing interface units that appear to be "orphaned" โ€” they have no incoming or outgoing call edges in the dependency graph. + +Your task: Determine whether each orphan unit is truly unnecessary, or whether it should be retained. + +## Review Criteria + +A unit should be **RETAINED** (not pruned) if: +1. It is a top-level entry point (main function, CLI handler, API endpoint, framework callback) +2. It is a data structure or configuration class that other code will instantiate directly +3. It implements a feature that is explicitly required by the project specification +4. It provides utility functions that are intended to be imported and used externally +5. It is part of a plugin/extension system where registration happens at runtime +6. The lack of edges is due to incomplete interface design (callers/callees not yet defined) + +A unit should be **PRUNED** (removed) if: +1. It duplicates functionality already provided by another unit +2. It was created speculatively but doesn't serve any concrete requirement +3. It is an internal helper that nothing actually needs +4. It is dead code that was superseded by a better design + +## Context + +You will be given: +- The orphan unit's code (interface definition) +- The features it claims to implement +- The subtree/module it belongs to +- Other units in the same subtree (for understanding relationships) + +## Output Format + +Return a JSON object: +{{ + "reviews": [ + {{ + "unit_key": "file_path::unit_name", + "decision": "retain" | "prune", + "reason": "Brief explanation of why this unit should be retained or pruned", + "edges": {{ + "inheritance_edges": [ + {{"child": "ChildClass", "parent": "ParentClass", "source_file": "path/to/child.py", "parent_file": "path/to/parent.py"}} + ], + "invocation_edges": [ + {{"caller": "function caller_func", "callee": "function callee_func", "caller_file": "path/to/caller.py", "callee_file": "path/to/callee.py"}} + ], + "reference_edges": [ + {{"unit": "function user_func", "referenced_type": "DataType", "source_file": "path/to/user.py", "type_file": "path/to/type.py"}} + ] + }} + }} + ] +}} + +## Edge Field Rules + +The `edges` field is **optional** but should be provided when: +- decision is "retain" AND +- the reason is that the interface design is incomplete (missing edges) + +Notes: +- For class names: use bare name like "Parser", not "class Parser" +- For function/method names in invocation: use full unit name like "function parse" or "class Parser" +- Only include edges that should exist based on the interface design + +If decision is "prune" or the unit is retained for other reasons (e.g., it's an entry point), omit the `edges` field or set it to null. + +Constraints: +- Every orphan unit provided must appear exactly once in the reviews list. +- decision must be exactly "retain" or "prune". +- reason should be concise (1-2 sentences) but specific. +""" diff --git a/RPG-Kit/scripts/func_design/interface_review.py b/RPG-Kit/scripts/func_design/interface_review.py new file mode 100644 index 0000000..e9f6511 --- /dev/null +++ b/RPG-Kit/scripts/func_design/interface_review.py @@ -0,0 +1,1273 @@ +#!/usr/bin/env python3 +"""Global Interface Review Module. + +Implements the Global Review phase for interface design, including: +- Entry point identification via LLM semantic reasoning +- Wiring completeness / call graph connectivity checks +- Cross-module type compatibility validation +- Automatic fix suggestions and application + +This module is invoked AFTER all per-subtree interface designs are complete, +but BEFORE the final interfaces.json is saved. +""" + +import json +import logging +import ast +from collections import defaultdict, deque +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Tuple, Any, Set + +import sys +from pathlib import Path +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from common import LLMClient + +from .interface_agent import ( + GlobalInterfaceRegistry, + DependencyCollector, + cross_validate_imports_vs_calls, +) +from .interface_prompts import ORPHAN_REVIEW_PROMPT + +logger = logging.getLogger(__name__) + + +# ============================================================================ +# Global Review Prompt +# ============================================================================ + +GLOBAL_INTERFACE_REVIEW_PROMPT = """ +You are a senior software engineer reviewing the COMPLETE set of interfaces for an entire Python repository. + +All subtrees have been designed. Your task is to review the interfaces holistically, +focusing on CROSS-MODULE integration โ€” not individual interface quality. + +## Input +- All designed interfaces (grouped by subtree) +- Data flow DAG (subtree-level dependencies) +- Import cross-validation warnings (symbols imported but not declared as calls) + +## Review Tasks + +### Task 1: Identify Top-Level Interfaces +Identify which units (classes/functions) are **top-level interfaces** โ€” those that +are not expected to be called by other internal modules within this repository. + +Top-level interfaces are NOT limited to "files named main.py". They are units whose +role in the architecture means they don't need an internal caller. This includes: +- Application entry points: a `MainLoop` class, a CLI `main()` function, an `Application` class +- Standalone submodules: components that can function independently (e.g., a `TestRunner`, a `Benchmark` harness) +- Externally-invoked APIs: interfaces designed to be called by external code, plugins, or frameworks +- Framework callbacks: handlers registered with an event system or framework + +Use semantic judgment based on the module's role and the project's architecture. + +### Task 2: Wiring Completeness +- Does every non-top-level module's output have at least one consumer? +- Are there "island" modules that neither call nor are called by anyone? +- Do the identified top-level interfaces actually invoke the key subsystems? +- Are there missing orchestration layers? + +### Task 3: Call Chain Realism +- Can you trace a realistic execution path from each top-level interface to leaf modules? +- Are the parameter/return types compatible across call boundaries? + +### Task 4: Dependency Direction Consistency +- Do dependencies flow in the direction specified by the data_flow DAG? +- Are there undeclared reverse dependencies? + +## Output +You must return ONLY a valid JSON object with the following structure (no other text): +{ + "entry_points": [ + { + "file_path": "...", + "unit_name": "...", + "rationale": "..." + } + ], + "orphan_modules": [ + { + "file_path": "...", + "unit_name": "...", + "reason": "..." + } + ], + "missing_wiring": [ + { + "from_unit": "...", + "from_file": "...", + "to_unit": "...", + "to_file": "...", + "description": "..." + } + ], + "type_mismatches": [ + { + "file_path": "...", + "unit_name": "...", + "description": "..." + } + ], + "orchestration_gaps": [ + { + "description": "...", + "suggested_location": "..." + } + ], + "recommended_fixes": [ + { + "action": "add_dependency", + "file_path": "...", + "unit_name": "...", + "description": "...", + "calls_to_add": [ + {"callee": "...", "callee_file": "...", "purpose": "..."} + ] + } + ], + "pass": true +} + +Important: +- "pass" should be true only if there are no orphan modules, no missing wiring, + and no orchestration gaps. +- recommended_fixes should contain concrete, actionable fixes. +- Each fix action must be one of: "add_dependency", "add_interface", "modify_interface" +- For "add_dependency" fixes, include "calls_to_add" with callee name and file. +""".strip() + + +# ============================================================================ +# Code-Based Structural Checks +# ============================================================================ + +def build_call_graph( + interfaces_data: Dict[str, Any], + enhanced_data_flow: Dict[str, Any] +) -> Tuple[Dict[str, Set[str]], Dict[str, Set[str]], Dict[str, str]]: + """Build a directed call graph from interfaces and enhanced_data_flow. + + Returns: + - outgoing: {unit_key -> set of callee unit_keys} + - incoming: {unit_key -> set of caller unit_keys} + - unit_to_file: {unit_key -> file_path} + """ + outgoing = defaultdict(set) + incoming = defaultdict(set) + unit_to_file = {} + + # Collect all units + subtrees = interfaces_data.get("subtrees", {}) + for subtree_name, subtree_data in subtrees.items(): + file_interfaces = subtree_data.get("interfaces", subtree_data.get("files", {})) + for file_path, file_data in file_interfaces.items(): + for unit_name in file_data.get("units", []): + unit_key = f"{file_path}::{unit_name}" + unit_to_file[unit_key] = file_path + + # Build a lookup: bare_name -> unit_key(s) + name_to_keys = defaultdict(list) + for unit_key in unit_to_file: + parts = unit_key.split("::", 1) + if len(parts) == 2: + unit_name = parts[1] + # e.g., "class Foo" -> bare name "Foo" + name_parts = unit_name.split(" ", 1) + bare_name = name_parts[1] if len(name_parts) == 2 else unit_name + name_to_keys[bare_name].append(unit_key) + name_to_keys[unit_name].append(unit_key) + + # Process invocation edges from enhanced_data_flow + for edge in enhanced_data_flow.get("invocation_edges", []): + caller = edge.get("caller", "") + callee = edge.get("callee", "") + caller_file = edge.get("caller_file", "") + callee_file = edge.get("callee_file", "") + + # Resolve caller key + caller_key = f"{caller_file}::{caller}" if caller_file else None + if caller_key and caller_key not in unit_to_file: + # Try to find by name + candidates = name_to_keys.get(caller, []) + if candidates: + caller_key = candidates[0] + else: + caller_key = None + + # Resolve callee key + callee_key = None + if callee_file: + callee_key = f"{callee_file}::{callee}" + if callee_key not in unit_to_file: + # Try matching just by callee name + for key in name_to_keys.get(callee, []): + if unit_to_file.get(key) == callee_file: + callee_key = key + break + else: + candidates = name_to_keys.get(callee, []) + callee_key = candidates[0] if candidates else None + else: + candidates = name_to_keys.get(callee, []) + callee_key = candidates[0] if candidates else None + + if caller_key and callee_key: + outgoing[caller_key].add(callee_key) + incoming[callee_key].add(caller_key) + + # Process inheritance edges + for edge in enhanced_data_flow.get("inheritance_edges", []): + child = edge.get("child", "") + parent = edge.get("parent", "") + source_file = edge.get("source_file", "") + parent_file = edge.get("parent_file", "") + + child_candidates = name_to_keys.get(child, []) + parent_candidates = name_to_keys.get(parent, []) + + if child_candidates and parent_candidates: + child_key = child_candidates[0] + parent_key = parent_candidates[0] + outgoing[child_key].add(parent_key) + incoming[parent_key].add(child_key) + + # Process reference edges + for edge in enhanced_data_flow.get("reference_edges", []): + unit = edge.get("unit", "") + ref_type = edge.get("referenced_type", "") + source_file = edge.get("source_file", "") + + unit_candidates = name_to_keys.get(unit, []) + type_candidates = name_to_keys.get(ref_type, []) + + if unit_candidates and type_candidates: + unit_key = unit_candidates[0] + type_key = type_candidates[0] + outgoing[unit_key].add(type_key) + incoming[type_key].add(unit_key) + + return dict(outgoing), dict(incoming), unit_to_file + + +def check_call_graph_connectivity( + interfaces_data: Dict[str, Any], + enhanced_data_flow: Dict[str, Any], + entry_points: List[Dict[str, Any]] +) -> Dict[str, Any]: + """Build a directed graph of all invocation edges and check connectivity. + + Identifies orphan units (non-entry-point units with no incoming edges). + + Returns: + Dict with keys: orphan_units, total_units, entry_point_count + """ + outgoing, incoming, unit_to_file = build_call_graph(interfaces_data, enhanced_data_flow) + + all_units = set(unit_to_file.keys()) + + # Build entry point key set + entry_point_keys = set() + for ep in entry_points: + ep_file = ep.get("file_path", "") + ep_unit = ep.get("unit_name", "") + ep_key = f"{ep_file}::{ep_unit}" + if ep_key in all_units: + entry_point_keys.add(ep_key) + else: + # Try fuzzy match + for uk in all_units: + if uk.endswith(f"::{ep_unit}"): + entry_point_keys.add(uk) + break + + non_entry_units = all_units - entry_point_keys + + # Units with no incoming edges (excluding entry points) + orphan_units = [] + for unit_key in non_entry_units: + if unit_key not in incoming or len(incoming[unit_key]) == 0: + orphan_units.append({ + "unit_key": unit_key, + "file_path": unit_to_file.get(unit_key, ""), + }) + + return { + "orphan_units": orphan_units, + "total_units": len(all_units), + "entry_point_count": len(entry_point_keys), + } + + +def check_feature_dependency_coverage( + interfaces_data: Dict[str, Any], + enhanced_data_flow: Dict[str, Any], + entry_points: List[Dict[str, Any]] +) -> List[Dict[str, Any]]: + """Check that every feature-bearing unit is either an entry point or has at least one incoming dependency edge. + + Returns: list of orphan features (feature paths without incoming edges + and not in entry points) + """ + _, incoming, unit_to_file = build_call_graph(interfaces_data, enhanced_data_flow) + + # Build entry point key set + entry_point_keys = set() + for ep in entry_points: + ep_file = ep.get("file_path", "") + ep_unit = ep.get("unit_name", "") + ep_key = f"{ep_file}::{ep_unit}" + entry_point_keys.add(ep_key) + # Also add bare match + for uk in unit_to_file: + if uk.endswith(f"::{ep_unit}"): + entry_point_keys.add(uk) + + orphan_features = [] + subtrees = interfaces_data.get("subtrees", {}) + + for subtree_name, subtree_data in subtrees.items(): + file_interfaces = subtree_data.get("interfaces", subtree_data.get("files", {})) + for file_path, file_data in file_interfaces.items(): + units_to_features = file_data.get("units_to_features", {}) + for unit_name, features in units_to_features.items(): + unit_key = f"{file_path}::{unit_name}" + + # Skip entry points + if unit_key in entry_point_keys: + continue + + # Check if has any incoming edge + if unit_key not in incoming or len(incoming[unit_key]) == 0: + orphan_features.append({ + "file_path": file_path, + "unit_name": unit_name, + "features": features, + "subtree": subtree_name, + }) + + return orphan_features + + +# ============================================================================ +# Interface Reviewer +# ============================================================================ + +class InterfaceReviewer: + """Global interface reviewer that performs holistic review after all subtrees are designed. + + Combines: + 1. LLM-based semantic review (entry point identification, wiring, consistency) + 2. Code-based structural checks (call graph connectivity, feature coverage) + 3. Automatic fix application (add missing dependencies / interfaces) + """ + + def __init__( + self, + llm_client: Optional[LLMClient] = None, + trajectory: Optional[Any] = None, + step_id: Optional[int] = None, + ): + if llm_client is None: + self.llm = LLMClient(trajectory=trajectory, step_id=step_id) + else: + self.llm = llm_client + self.logger = logging.getLogger(__name__) + + def review_and_fix( + self, + interfaces_data: Dict[str, Any], + enhanced_data_flow: Dict[str, Any], + global_registry: GlobalInterfaceRegistry, + import_warnings: List[Dict[str, str]], + data_flow_edges: List[Dict[str, Any]], + dependency_collector: Optional[DependencyCollector] = None, + max_fix_iterations: int = 2, + ) -> Dict[str, Any]: + """Run the full global review and fix cycle. + + Steps: + 1. LLM global review (entry point identification + wiring + consistency) + 2. Code-based checks using LLM-identified entry points + 3. If issues found: apply recommended_fixes from LLM + 4. Re-run code checks + 5. Repeat until pass or max iterations + + Args: + interfaces_data: The full interfaces result dict + enhanced_data_flow: The enhanced_data_flow dict from DependencyCollector + global_registry: The GlobalInterfaceRegistry with all designed interfaces + import_warnings: List of import cross-validation warnings + data_flow_edges: Original data flow DAG edges + dependency_collector: DependencyCollector for adding new edges + max_fix_iterations: Maximum number of review-fix cycles + + Returns: + Dict with review results, applied fixes, and updated interfaces_data + """ + self.logger.info("[InterfaceReviewer] Starting global interface review") + + review_history = [] + + for iteration in range(max_fix_iterations): + self.logger.info(f"[InterfaceReviewer] Review iteration {iteration + 1}/{max_fix_iterations}") + + # Step 1: LLM global review + llm_review = self._run_llm_review( + interfaces_data=interfaces_data, + enhanced_data_flow=enhanced_data_flow, + global_registry=global_registry, + import_warnings=import_warnings, + data_flow_edges=data_flow_edges, + iteration=iteration, + previous_reviews=review_history, + ) + + if not llm_review: + self.logger.warning("[InterfaceReviewer] LLM review returned empty result") + break + + entry_points = llm_review.get("entry_points", []) + self.logger.info( + f"[InterfaceReviewer] LLM identified {len(entry_points)} entry points" + ) + for ep in entry_points: + self.logger.info( + f" Entry point: {ep.get('unit_name', '?')} in {ep.get('file_path', '?')} " + f"โ€” {ep.get('rationale', '')}" + ) + + # Step 2: Code-based structural checks + connectivity = check_call_graph_connectivity( + interfaces_data, enhanced_data_flow, entry_points + ) + feature_orphans = check_feature_dependency_coverage( + interfaces_data, enhanced_data_flow, entry_points + ) + + self.logger.info( + f"[InterfaceReviewer] Connectivity: " + f"{connectivity['total_units']} total units, " + f"{connectivity['entry_point_count']} entry points, " + f"{len(connectivity['orphan_units'])} orphan units" + ) + self.logger.info( + f"[InterfaceReviewer] Feature coverage: {len(feature_orphans)} orphan features" + ) + + review_result = { + "iteration": iteration + 1, + "llm_review": llm_review, + "orphan_units": connectivity["orphan_units"], + "feature_orphans": feature_orphans, + "entry_points": entry_points, + } + review_history.append(review_result) + + # Step 3: Check if passed + llm_passed = llm_review.get("pass", False) + code_passed = ( + len(connectivity["orphan_units"]) == 0 + and len(feature_orphans) == 0 + ) + + if llm_passed and code_passed: + self.logger.info("[InterfaceReviewer] [OK] Global review PASSED") + break + + # Step 4: Apply fixes + recommended_fixes = llm_review.get("recommended_fixes", []) + if recommended_fixes: + applied_count = self._apply_fixes( + fixes=recommended_fixes, + interfaces_data=interfaces_data, + enhanced_data_flow=enhanced_data_flow, + global_registry=global_registry, + dependency_collector=dependency_collector, + ) + self.logger.info( + f"[InterfaceReviewer] Applied {applied_count}/{len(recommended_fixes)} fixes" + ) + else: + self.logger.info("[InterfaceReviewer] No fixes recommended, stopping iteration") + break + + # Compile final summary + final_result = { + "review_history": review_history, + "final_entry_points": review_history[-1]["entry_points"] if review_history else [], + "final_feature_orphans": review_history[-1]["feature_orphans"] if review_history else [], + "iterations_run": len(review_history), + "passed": ( + review_history[-1]["llm_review"].get("pass", False) + if review_history else False + ), + } + + return final_result + + def _run_llm_review( + self, + interfaces_data: Dict[str, Any], + enhanced_data_flow: Dict[str, Any], + global_registry: GlobalInterfaceRegistry, + import_warnings: List[Dict[str, str]], + data_flow_edges: List[Dict[str, Any]], + iteration: int = 0, + previous_reviews: Optional[List[Dict[str, Any]]] = None, + ) -> Optional[Dict[str, Any]]: + """Run the LLM global review. + + Builds a comprehensive prompt with all designed interfaces and asks + LLM to identify entry points, orphan modules, missing wiring, etc. + """ + # Build the interface summary for the prompt + interface_summary = self._build_interface_summary(interfaces_data, global_registry) + + # Build data flow summary + data_flow_summary = self._build_data_flow_summary(data_flow_edges) + + # Build import warnings summary + import_warnings_summary = self._build_import_warnings_summary(import_warnings) + + # Build dependency summary + dep_summary = self._build_dependency_summary(enhanced_data_flow) + + # Build previous review context (for iteration > 0) + prev_context = "" + if previous_reviews: + last_review = previous_reviews[-1] + prev_llm = last_review.get("llm_review", {}) + prev_orphan_units = last_review.get("orphan_units", []) + prev_orphan_count = len(last_review.get("feature_orphans", [])) + + prev_context = f""" +## Previous Review Results (iteration {last_review.get('iteration', '?')}) +- Entry points identified: {len(prev_llm.get('entry_points', []))} +- Orphan modules from LLM: {len(prev_llm.get('orphan_modules', []))} +- Orphan units (no incoming edges): {len(prev_orphan_units)} +- Orphan features: {prev_orphan_count} +- Fixes applied: {len(prev_llm.get('recommended_fixes', []))} + +Please review the CURRENT state after fixes were applied and provide updated analysis. +""" + + user_prompt = f""" +## All Designed Interfaces (grouped by subtree) +{interface_summary} + +## Data Flow DAG +{data_flow_summary} + +## Current Dependency Edges +{dep_summary} + +## Import Cross-Validation Warnings +{import_warnings_summary} +{prev_context} + +Please perform the review tasks and return the JSON result. +""".strip() + + combined_prompt = f"{GLOBAL_INTERFACE_REVIEW_PROMPT}\n\n{user_prompt}" + + try: + response = self.llm.generate( + combined_prompt, + purpose=f"global_interface_review_{iteration + 1}" + ) + + # Parse JSON from response + result = self.llm.parse_json_block(response) + + if result: + return result + + # Try to extract JSON directly + try: + # Find JSON in the response + start = response.find("{") + end = response.rfind("}") + 1 + if start >= 0 and end > start: + result = json.loads(response[start:end]) + return result + except json.JSONDecodeError: + pass + + self.logger.warning("[InterfaceReviewer] Failed to parse LLM review response") + return None + + except Exception as e: + self.logger.error(f"[InterfaceReviewer] LLM review failed: {e}") + return None + + def _apply_fixes( + self, + fixes: List[Dict[str, Any]], + interfaces_data: Dict[str, Any], + enhanced_data_flow: Dict[str, Any], + global_registry: GlobalInterfaceRegistry, + dependency_collector: Optional[DependencyCollector] = None, + ) -> int: + """Apply recommended fixes from the LLM review. + + Supported actions: + - add_dependency: Add a call dependency edge + - add_interface: (logged as warning โ€” requires manual or future LLM action) + - modify_interface: (logged as warning โ€” requires manual or future LLM action) + + Returns: + Number of fixes successfully applied + """ + applied = 0 + + for fix in fixes: + action = fix.get("action", "") + file_path = fix.get("file_path", "") + unit_name = fix.get("unit_name", "") + description = fix.get("description", "") + + if action == "add_dependency": + calls_to_add = fix.get("calls_to_add", []) + for call_info in calls_to_add: + callee = call_info.get("callee", "") + callee_file = call_info.get("callee_file", "") + + if not callee: + continue + + # Resolve callee_file from global registry if not provided + if not callee_file: + callee_file = global_registry.resolve_callee(callee) + + if not callee_file: + self.logger.warning( + f"[InterfaceReviewer] Cannot resolve callee '{callee}' " + f"for fix on {file_path}::{unit_name}" + ) + continue + + # Add to enhanced_data_flow + inv_edges = enhanced_data_flow.get("invocation_edges", []) + + # Check if edge already exists + exists = any( + e.get("caller") == unit_name + and e.get("callee") == callee + and e.get("caller_file") == file_path + for e in inv_edges + ) + + if not exists: + new_edge = { + "caller": unit_name, + "callee": callee, + "caller_file": file_path, + "callee_file": callee_file, + "edge_type": "invokes", + "generator": "global_review", + } + inv_edges.append(new_edge) + enhanced_data_flow["invocation_edges"] = inv_edges + + # Also add to dependency_collector if available + if dependency_collector: + dependency_collector.add_invocation( + caller=unit_name, + callee=callee, + caller_file=file_path, + callee_file=callee_file, + ) + + self.logger.info( + f"[InterfaceReviewer] Added dependency: " + f"{unit_name} ({file_path}) -> {callee} ({callee_file})" + ) + applied += 1 + + elif action == "add_interface": + self.logger.warning( + f"[InterfaceReviewer] add_interface fix requested but not auto-applied: " + f"{description} (file: {file_path})" + ) + + elif action == "modify_interface": + self.logger.warning( + f"[InterfaceReviewer] modify_interface fix requested but not auto-applied: " + f"{description} (file: {file_path}, unit: {unit_name})" + ) + + else: + self.logger.warning( + f"[InterfaceReviewer] Unknown fix action: {action}" + ) + + return applied + + def _build_interface_summary( + self, + interfaces_data: Dict[str, Any], + global_registry: GlobalInterfaceRegistry, + ) -> str: + """Build a comprehensive interface summary for the LLM review prompt.""" + parts = [] + subtrees = interfaces_data.get("subtrees", {}) + subtree_order = interfaces_data.get("subtree_order", []) + + for subtree_name in subtree_order: + subtree_data = subtrees.get(subtree_name, {}) + file_interfaces = subtree_data.get("interfaces", subtree_data.get("files", {})) + + if not file_interfaces: + continue + + parts.append(f"\n### Subtree: {subtree_name}") + + for file_path, file_data in file_interfaces.items(): + units = file_data.get("units", []) + units_to_features = file_data.get("units_to_features", {}) + file_code = file_data.get("file_code", "") + + if not units: + continue + + parts.append(f"\n**{file_path}**") + + for unit_name in units: + features = units_to_features.get(unit_name, []) + features_str = ", ".join(features[:5]) + if len(features) > 5: + features_str += f" (+{len(features) - 5} more)" + parts.append(f" - `{unit_name}` โ†’ features: {features_str}") + + # Include abbreviated code (first 20 lines) + if file_code: + code_lines = file_code.split("\n") + if len(code_lines) > 25: + code_preview = "\n".join(code_lines[:25]) + "\n # ... (truncated)" + else: + code_preview = file_code + parts.append(f" ```python\n{code_preview}\n ```") + + return "\n".join(parts) if parts else "No interfaces designed." + + def _build_data_flow_summary(self, data_flow_edges: List[Dict[str, Any]]) -> str: + """Build a data flow summary for the prompt.""" + if not data_flow_edges: + return "No data flow edges." + + parts = [] + for edge in data_flow_edges: + source = edge.get("source", "?") + target = edge.get("target", "?") + desc = edge.get("description", "") + parts.append(f" {source} โ†’ {target}" + (f": {desc}" if desc else "")) + + return "\n".join(parts) + + def _build_import_warnings_summary(self, warnings: List[Dict[str, str]]) -> str: + """Build import warnings summary for the prompt.""" + if not warnings: + return "No import cross-validation warnings." + + parts = [f"Found {len(warnings)} potential issues:"] + for w in warnings[:20]: # Limit to 20 + parts.append(f" - {w.get('message', '?')}") + + if len(warnings) > 20: + parts.append(f" ... and {len(warnings) - 20} more warnings") + + return "\n".join(parts) + + def _build_dependency_summary(self, enhanced_data_flow: Dict[str, Any]) -> str: + """Build a dependency edge summary for the prompt.""" + if not enhanced_data_flow: + return "No dependency edges collected." + + parts = [] + + inv_edges = enhanced_data_flow.get("invocation_edges", []) + inh_edges = enhanced_data_flow.get("inheritance_edges", []) + ref_edges = enhanced_data_flow.get("reference_edges", []) + + parts.append( + f"Total: {len(inv_edges)} invocation, {len(inh_edges)} inheritance, " + f"{len(ref_edges)} reference edges" + ) + + # Cross-file invocations + cross_file = [e for e in inv_edges if e.get("caller_file") != e.get("callee_file")] + same_file = [e for e in inv_edges if e.get("caller_file") == e.get("callee_file")] + no_callee = [e for e in inv_edges if not e.get("callee_file")] + + parts.append( + f"Invocations: {len(cross_file)} cross-file, {len(same_file)} same-file, " + f"{len(no_callee)} unresolved callee" + ) + + # Show cross-file edges + if cross_file: + parts.append("\nCross-file invocations:") + for e in cross_file[:30]: + parts.append( + f" {e.get('caller', '?')} ({e.get('caller_file', '?')}) " + f"โ†’ {e.get('callee', '?')} ({e.get('callee_file', '?')})" + ) + if len(cross_file) > 30: + parts.append(f" ... and {len(cross_file) - 30} more") + + return "\n".join(parts) + + +# ============================================================================ +# Orphan Pruning +# ============================================================================ + +def prune_orphan_interfaces( + interfaces_data: Dict[str, Any], + review_result: Dict[str, Any], + enhanced_data_flow: Dict[str, Any], + logger: Optional[logging.Logger] = None, +) -> Dict[str, Any]: + """Remove orphan interfaces from interfaces_data after global review. + + An interface unit is considered a **true orphan** when it has **no incoming + edges AND no outgoing edges** in the call graph and is not an entry point. + Units that participate in any edge (caller or callee) are preserved even if + they are not reachable from entry points โ€” their connected components are + valid code that just lacks proper wiring to the top-level entry flow. + + For each pruned unit the function: + - Removes it from ``units``, ``units_to_features``, ``units_to_code`` + - Regenerates ``file_code`` from the remaining units + - If all units in a file are removed, removes the entire file entry + - Removes related edges from ``enhanced_data_flow`` + + Returns a summary dict:: + + { + "pruned_units": [...], + "pruned_files": [...], + "orphan_feature_paths": set of feature paths whose ALL implementing + units were pruned, + "surviving_feature_paths": set of feature paths that still have at + least one surviving unit, + } + """ + if logger is None: + logger = logging.getLogger(__name__) + + # ---- 0. Build call graph to find truly isolated units ---- + entry_points = review_result.get("final_entry_points", []) + entry_point_keys: Set[str] = set() + for ep in entry_points: + ep_file = ep.get("file_path", "") + ep_unit = ep.get("unit_name", "") + if ep_file and ep_unit: + entry_point_keys.add(f"{ep_file}::{ep_unit}") + + outgoing, incoming, unit_to_file = build_call_graph( + interfaces_data, enhanced_data_flow + ) + all_units = set(unit_to_file.keys()) + + # Truly isolated: no incoming AND no outgoing AND not entry point + isolated_keys: Set[str] = set() + for u in all_units: + if u in entry_point_keys: + continue + has_in = u in incoming and len(incoming[u]) > 0 + has_out = u in outgoing and len(outgoing[u]) > 0 + if not has_in and not has_out: + isolated_keys.add(u) + + if not isolated_keys: + logger.info("[prune_orphan_interfaces] No truly isolated units โ€” nothing to prune") + # Still compute surviving features for RPG pruning + surviving = _collect_surviving_features(interfaces_data) + return { + "pruned_units": [], + "pruned_files": [], + "orphan_feature_paths": set(), + "surviving_feature_paths": surviving, + } + + logger.info( + f"[prune_orphan_interfaces] {len(isolated_keys)} truly isolated units " + f"(out of {len(all_units)} total) to prune" + ) + + pruned_units: List[Dict[str, Any]] = [] + pruned_files: List[Dict[str, str]] = [] + + # ---- 1. Build a global map: feature_path โ†’ set of unit_keys that implement it ---- + feature_to_all_unit_keys: Dict[str, Set[str]] = defaultdict(set) + subtrees = interfaces_data.get("subtrees", {}) + for subtree_name, subtree_data in subtrees.items(): + file_interfaces = subtree_data.get("interfaces", subtree_data.get("files", {})) + for file_path, file_data in file_interfaces.items(): + for unit_name, features in file_data.get("units_to_features", {}).items(): + unit_key = f"{file_path}::{unit_name}" + for fp in features: + feature_to_all_unit_keys[fp].add(unit_key) + + # ---- 2. Prune units from interfaces_data ---- + for subtree_name, subtree_data in subtrees.items(): + file_interfaces = subtree_data.get("interfaces", subtree_data.get("files", {})) + files_to_remove: List[str] = [] + + for file_path in list(file_interfaces.keys()): + file_data = file_interfaces[file_path] + units: List[str] = file_data.get("units", []) + units_to_features: Dict[str, List[str]] = file_data.get("units_to_features", {}) + units_to_code: Dict[str, str] = file_data.get("units_to_code", {}) + + units_to_remove: List[str] = [] + for unit_name in units: + unit_key = f"{file_path}::{unit_name}" + if unit_key in isolated_keys: + units_to_remove.append(unit_name) + pruned_units.append({ + "file_path": file_path, + "unit_name": unit_name, + "subtree": subtree_name, + "features": units_to_features.get(unit_name, []), + }) + + if not units_to_remove: + continue + + # Remove the units + for uname in units_to_remove: + if uname in units: + units.remove(uname) + units_to_features.pop(uname, None) + units_to_code.pop(uname, None) + logger.info(f"[prune_orphan_interfaces] Pruned unit: {file_path}::{uname}") + + file_data["units"] = units + file_data["units_to_features"] = units_to_features + file_data["units_to_code"] = units_to_code + + if not units: + # All units pruned โ†’ remove the entire file entry + files_to_remove.append(file_path) + else: + # Regenerate file_code from surviving units + code_parts = [] + for uname in units: + code = units_to_code.get(uname, "") + if code: + code_parts.append(code) + file_data["file_code"] = "\n\n".join(code_parts) + + for fp in files_to_remove: + del file_interfaces[fp] + pruned_files.append({"file_path": fp, "subtree": subtree_name}) + logger.info(f"[prune_orphan_interfaces] Pruned entire file: {fp} (all units removed)") + + # ---- 3. Remove edges for pruned units from enhanced_data_flow ---- + pruned_unit_names = {pu["unit_name"] for pu in pruned_units} + pruned_file_paths = {pu["file_path"] for pu in pruned_units} + + def _edge_involves_pruned_unit(edge: Dict[str, Any]) -> bool: + """Return True if the edge references a pruned unit.""" + for role_name, role_file in [ + ("caller", "caller_file"), ("callee", "callee_file"), + ("child", "child_file"), ("parent", "parent_file"), + ("unit", "unit_file"), + ]: + name_val = edge.get(role_name, "") + file_val = edge.get(role_file, "") + if name_val in pruned_unit_names: + # Double-check file to avoid false positives on common names + if not file_val or file_val in pruned_file_paths: + return True + return False + + for edge_list_key in ("invocation_edges", "inheritance_edges", "reference_edges"): + edges = enhanced_data_flow.get(edge_list_key, []) + before = len(edges) + edges[:] = [e for e in edges if not _edge_involves_pruned_unit(e)] + after = len(edges) + if before != after: + logger.info( + f"[prune_orphan_interfaces] Removed {before - after} edges from {edge_list_key}" + ) + + # ---- 4. Identify features that are now fully orphaned ---- + pruned_key_set = {f"{pu['file_path']}::{pu['unit_name']}" for pu in pruned_units} + orphan_feature_paths: Set[str] = set() + for feature_path, all_keys in feature_to_all_unit_keys.items(): + if all_keys and all_keys.issubset(pruned_key_set): + orphan_feature_paths.add(feature_path) + + if orphan_feature_paths: + logger.info( + f"[prune_orphan_interfaces] {len(orphan_feature_paths)} features fully orphaned: " + + ", ".join(sorted(orphan_feature_paths)[:10]) + ) + + # ---- 5. Collect surviving feature paths for RPG pruning ---- + surviving = _collect_surviving_features(interfaces_data) + + return { + "pruned_units": pruned_units, + "pruned_files": pruned_files, + "orphan_feature_paths": orphan_feature_paths, + "surviving_feature_paths": surviving, + } + + +def _collect_surviving_features(interfaces_data: Dict[str, Any]) -> Set[str]: + """Collect all feature paths that still have at least one interface unit.""" + surviving: Set[str] = set() + for st_data in interfaces_data.get("subtrees", {}).values(): + file_interfaces = st_data.get("interfaces", st_data.get("files", {})) + for file_data in file_interfaces.values(): + for features in file_data.get("units_to_features", {}).values(): + surviving.update(features) + return surviving + + +def print_review_summary(review_result: Dict[str, Any]): + """Print a human-readable summary of the global review results.""" + print("\n" + "=" * 60) + print("GLOBAL INTERFACE REVIEW SUMMARY") + print("=" * 60) + + iterations = review_result.get("iterations_run", 0) + passed = review_result.get("passed", False) + + print(f"Iterations: {iterations}") + print(f"Final Status: {'[OK] PASSED' if passed else '[FAIL] NEEDS ATTENTION'}") + + # Entry points + entry_points = review_result.get("final_entry_points", []) + if entry_points: + print(f"\nEntry Points ({len(entry_points)}):") + for ep in entry_points: + print(f" - {ep.get('unit_name', '?')} in {ep.get('file_path', '?')}") + if ep.get("rationale"): + print(f" Reason: {ep['rationale']}") + + # Feature orphans + feature_orphans = review_result.get("final_feature_orphans", []) + if feature_orphans: + print(f"\nOrphan Features ({len(feature_orphans)}):") + for fo in feature_orphans[:10]: + print( + f" - {fo.get('unit_name', '?')} in {fo.get('file_path', '?')} " + f"({fo.get('subtree', '?')})" + ) + if len(feature_orphans) > 10: + print(f" ... and {len(feature_orphans) - 10} more") + + print("=" * 60) + + +# ============================================================================ +# Orphan Unit Review +# ============================================================================ + + +@dataclass +class OrphanReviewResult: + """Result of orphan unit review.""" + decisions: Dict[str, str] = field(default_factory=dict) # unit_key -> "retain" | "prune" + completed_edges: Dict[str, Dict[str, List[Dict]]] = field(default_factory=dict) # unit_key -> edges dict + + @property + def keys_to_prune(self) -> List[str]: + return [k for k, d in self.decisions.items() if d == "prune"] + + @property + def keys_to_retain(self) -> List[str]: + return [k for k, d in self.decisions.items() if d == "retain"] + + def get_all_edges(self) -> Dict[str, List[Dict]]: + """Aggregate all completed edges by type.""" + result: Dict[str, List[Dict]] = { + "inheritance_edges": [], + "invocation_edges": [], + "reference_edges": [], + } + for edges_dict in self.completed_edges.values(): + for edge_type, edges in edges_dict.items(): + if edge_type in result and edges: + result[edge_type].extend(edges) + return result + + +def review_orphan_units( + orphan_details: List[Dict[str, Any]], + repo_info: str, + subtree_interfaces: Optional[Dict[str, Any]] = None, + llm_client: Optional[LLMClient] = None, +) -> OrphanReviewResult: + """Review orphan units using LLM to determine which should be retained or pruned. + + Units are grouped by subtree for better context during review. + + Args: + orphan_details: List of orphan unit details from InterfacesStore.get_orphan_unit_details() + repo_info: Repository description for context + subtree_interfaces: Optional dict mapping subtree -> interfaces data for context + llm_client: LLM client to use (creates new one if not provided) + + Returns: + OrphanReviewResult with decisions and completed edges + """ + if not orphan_details: + logger.info("[review_orphan_units] No orphan units to review") + return OrphanReviewResult() + + llm = llm_client or LLMClient() + result = OrphanReviewResult() + + # Group orphans by subtree + orphans_by_subtree: Dict[str, List[Dict[str, Any]]] = {} + for detail in orphan_details: + subtree = detail.get("subtree", "unknown") + orphans_by_subtree.setdefault(subtree, []).append(detail) + + # Review each subtree's orphans together + for subtree, subtree_orphans in orphans_by_subtree.items(): + # Get subtree context if available + subtree_context = None + if subtree_interfaces and subtree in subtree_interfaces: + subtree_context = subtree_interfaces[subtree] + + batch_result = _review_orphan_batch( + subtree_orphans, repo_info, subtree, subtree_context, llm + ) + result.decisions.update(batch_result.decisions) + result.completed_edges.update(batch_result.completed_edges) + + logger.info( + f"[review_orphan_units] Reviewed {len(orphan_details)} orphan units across " + f"{len(orphans_by_subtree)} subtrees: " + f"{len(result.keys_to_retain)} retain, " + f"{len(result.keys_to_prune)} prune, " + f"{len(result.completed_edges)} with completed edges" + ) + + return result + + +def _review_orphan_batch( + batch: List[Dict[str, Any]], + repo_info: str, + subtree_name: str, + subtree_context: Optional[Dict[str, Any]], + llm: LLMClient, +) -> OrphanReviewResult: + """Review orphan units from a single subtree.""" + # Build user prompt with orphan details + orphan_summaries = [] + for detail in batch: + summary = f""" +### Unit: {detail['unit_key']} +- File: {detail['file_path']} +- Features: {', '.join(detail['features']) if detail['features'] else '(none)'} + +Code: +```python +{detail['code']} +``` +""" + orphan_summaries.append(summary) + + user_prompt = f"""## Repository Context +{repo_info} + +## Subtree: {subtree_name} + +## Orphan Units to Review +The following {len(batch)} interface units in subtree "{subtree_name}" have no incoming or outgoing call edges. +Determine whether each should be retained or pruned. + +{''.join(orphan_summaries)} +""" + + if subtree_context: + # Extract other unit names in the subtree for context + other_units = [] + interfaces = subtree_context.get("interfaces", {}) + for file_path, file_data in interfaces.items(): + units_to_code = file_data.get("units_to_code", {}) + for unit_name in units_to_code.keys(): + unit_key = f"{file_path}::{unit_name}" + # Exclude current orphans from context + if not any(d["unit_key"] == unit_key for d in batch): + other_units.append(unit_key) + + if other_units: + user_prompt += f""" +## Other Units in This Subtree (for context) +{', '.join(other_units[:20])}{'...' if len(other_units) > 20 else ''} +""" + + combined_prompt = f"{ORPHAN_REVIEW_PROMPT}\n\n{user_prompt}" + + result = OrphanReviewResult() + + try: + response = llm.generate(combined_prompt, purpose="orphan_review") + + # Parse JSON response using LLMClient's built-in method + parsed = llm.parse_json_block(response) + if not parsed: + logger.error("[orphan_review] Failed to parse LLM response as JSON") + for detail in batch: + result.decisions[detail["unit_key"]] = "retain" + return result + + reviews = parsed.get("reviews", []) + + for review in reviews: + unit_key = review.get("unit_key", "") + decision = review.get("decision", "retain").lower() + reason = review.get("reason", "") + edges = review.get("edges") + + if decision not in ("retain", "prune"): + decision = "retain" # Default to retain if unclear + + result.decisions[unit_key] = decision + + # Collect completed edges if provided + if edges and isinstance(edges, dict): + valid_edges = {} + for edge_type in ("inheritance_edges", "invocation_edges", "reference_edges"): + if edge_type in edges and edges[edge_type]: + valid_edges[edge_type] = edges[edge_type] + if valid_edges: + result.completed_edges[unit_key] = valid_edges + logger.info( + f"[orphan_review] {unit_key}: {decision} - {reason} " + f"(+{sum(len(e) for e in valid_edges.values())} edges)" + ) + else: + logger.info(f"[orphan_review] {unit_key}: {decision} - {reason}") + else: + logger.info(f"[orphan_review] {unit_key}: {decision} - {reason}") + + # Ensure all units in batch have a decision (default to retain) + for detail in batch: + if detail["unit_key"] not in result.decisions: + result.decisions[detail["unit_key"]] = "retain" + logger.warning( + f"[orphan_review] {detail['unit_key']}: defaulting to retain (missing from LLM response)" + ) + + return result + + except Exception as e: + logger.error(f"[orphan_review] Error during review: {e}") + # Default all to retain on error + for detail in batch: + result.decisions[detail["unit_key"]] = "retain" + return result diff --git a/RPG-Kit/scripts/func_design/interfaces_store.py b/RPG-Kit/scripts/func_design/interfaces_store.py new file mode 100644 index 0000000..e5e954a --- /dev/null +++ b/RPG-Kit/scripts/func_design/interfaces_store.py @@ -0,0 +1,1289 @@ +#!/usr/bin/env python3 +"""Interfaces Store - Unified Data Structure for Interface Design. + +This module provides a unified data store for managing all interface-related data +during the design_interfaces workflow. It replaces scattered dict structures with +a single source of truth. + +Key components: +- InterfaceUnit: Single interface unit (class/function) +- InheritanceEdge, InvocationEdge, ReferenceEdge: Dependency edge types +- InterfacesStore: Central store managing all units and edges with auto-maintained indexes +""" + +import logging +from collections import defaultdict +from dataclasses import dataclass, field, asdict +from pathlib import Path +from typing import Dict, List, Optional, Set, Any, Union, Tuple + +logger = logging.getLogger(__name__) + + +# ============================================================================ +# Edge Data Classes +# ============================================================================ + +@dataclass +class InheritanceEdge: + """Inheritance relationship (child extends parent).""" + child: str # e.g., "ChildClass" + parent: str # e.g., "BaseClass" + child_file: str # source file path + parent_file: Optional[str] = None + generator: str = "design_interfaces" + + def to_dict(self) -> Dict[str, Any]: + """Convert to legacy dict format.""" + return { + "child": self.child, + "parent": self.parent, + "source_file": self.child_file, + "parent_file": self.parent_file, + "edge_type": "inherits", + "generator": self.generator, + } + + +@dataclass +class InvocationEdge: + """Invocation relationship (caller calls callee).""" + caller: str # e.g., "function parse", "class Parser" + callee: str # e.g., "function tokenize" + caller_file: str + callee_file: Optional[str] = None + generator: str = "design_interfaces" + + def to_dict(self) -> Dict[str, Any]: + """Convert to legacy dict format.""" + return { + "caller": self.caller, + "callee": self.callee, + "caller_file": self.caller_file, + "callee_file": self.callee_file, + "edge_type": "invokes", + "generator": self.generator, + } + + +@dataclass +class ReferenceEdge: + """Type reference relationship (unit references type).""" + unit: str # e.g., "function process" + referenced_type: str # e.g., "Config" + source_file: str + type_file: Optional[str] = None + generator: str = "design_interfaces" + + def to_dict(self) -> Dict[str, Any]: + """Convert to legacy dict format.""" + return { + "unit": self.unit, + "referenced_type": self.referenced_type, + "source_file": self.source_file, + "type_file": self.type_file, + "edge_type": "references", + "generator": self.generator, + } + + +# ============================================================================ +# Interface Unit +# ============================================================================ + +@dataclass +class InterfaceUnit: + """A single interface unit (function or class).""" + name: str # e.g., "class Parser", "function parse" + file_path: str + subtree_name: str + features: List[str] # feature paths this unit implements (existing + new combined) + code: str # interface source code + + @property + def key(self) -> str: + """Unique key for this unit: file_path::name.""" + return f"{self.file_path}::{self.name}" + + @property + def unit_type(self) -> str: + """Return 'class' or 'function'.""" + if self.name.startswith("class "): + return "class" + elif self.name.startswith("function "): + return "function" + return "unknown" + + @property + def bare_name(self) -> str: + """Return name without 'class ' or 'function ' prefix.""" + parts = self.name.split(" ", 1) + return parts[1] if len(parts) == 2 else self.name + + +# ============================================================================ +# Summary Data Classes +# ============================================================================ + +@dataclass +class OrphanFeature: + """A feature whose implementing unit was pruned.""" + feature_path: str + unit_key: str # format: "file_path::unit_name" + + def to_dict(self) -> Dict[str, str]: + return { + "feature_path": self.feature_path, + "unit_key": self.unit_key, + } + + +@dataclass +class PruneSummary: + """Summary of orphan pruning operation.""" + pruned_units: List["InterfaceUnit"] = field(default_factory=list) + pruned_files: List[str] = field(default_factory=list) + orphan_features: List[OrphanFeature] = field(default_factory=list) + surviving_feature_paths: Set[str] = field(default_factory=set) + + def get_orphan_features_list(self) -> List[Dict[str, str]]: + """Get orphan features as list of dicts for JSON output.""" + return [of.to_dict() for of in self.orphan_features] + + +@dataclass +class RPGUpdateSummary: + """Summary of RPG update operation.""" + updated_features: int = 0 + created_new_features: int = 0 + added_same_unit_edges: int = 0 + added_dependency_edges: int = 0 + marked_entry_points: int = 0 + skipped_features: int = 0 + pruned_feature_nodes: int = 0 + pruned_parent_nodes: int = 0 + pruned_edges: int = 0 + + +# ============================================================================ +# Interfaces Store +# ============================================================================ + +class InterfacesStore: + """Unified store for all interface data. + + Provides: + - Single source of truth for units and edges + - Type-safe CRUD operations + - Auto-maintained indexes for efficient lookups + - Pruning and RPG update as unified methods + """ + + def __init__(self): + # Primary data: unit_key -> InterfaceUnit + self._units: Dict[str, InterfaceUnit] = {} + + # Edge lists + self._inheritance_edges: List[InheritanceEdge] = [] + self._invocation_edges: List[InvocationEdge] = [] + self._reference_edges: List[ReferenceEdge] = [] + # Preserved original coarse-grained data flow edges + self._original_data_flow_edges: List[Dict[str, Any]] = [] + + # Auto-maintained indexes + self._file_to_units: Dict[str, List[str]] = defaultdict(list) # file -> [unit_keys] + self._subtree_to_files: Dict[str, Set[str]] = defaultdict(set) # subtree -> {files} + self._feature_to_units: Dict[str, Set[str]] = defaultdict(set) # feature -> {unit_keys} + self._class_to_file: Dict[str, str] = {} # bare_class_name -> file_path + self._function_to_file: Dict[str, str] = {} # bare_function_name -> file_path + + # Entry points (set after global review) + self._entry_point_keys: Set[str] = set() + + # Subtree ordering + self.subtree_order: List[str] = [] + + # Global review metadata + self._global_review: Dict[str, Any] = {} + + # New features created during interface design + self._new_features: Dict[str, str] = {} # feature_path -> unit_key that created it + + # ======================================================================== + # Unit CRUD Operations + # ======================================================================== + + def add_unit(self, unit: InterfaceUnit) -> None: + """Add a unit and update all indexes. + + Args: + unit: The InterfaceUnit to add + """ + key = unit.key + self._units[key] = unit + + # Update file index + if key not in self._file_to_units[unit.file_path]: + self._file_to_units[unit.file_path].append(key) + + # Update subtree index + self._subtree_to_files[unit.subtree_name].add(unit.file_path) + + # Update feature index + for feature_path in unit.features: + self._feature_to_units[feature_path].add(key) + + # Update symbol resolution indexes + if unit.unit_type == "class": + self._class_to_file[unit.bare_name] = unit.file_path + elif unit.unit_type == "function": + self._function_to_file[unit.bare_name] = unit.file_path + + def remove_unit(self, key: str) -> Optional[InterfaceUnit]: + """Remove a unit and clean up all related data. + + Args: + key: Unit key in format "file_path::unit_name" + + Returns: + The removed InterfaceUnit, or None if not found + """ + unit = self._units.pop(key, None) + if not unit: + return None + + # Clean file index + if key in self._file_to_units[unit.file_path]: + self._file_to_units[unit.file_path].remove(key) + + # Clean feature index + for feature_path in unit.features: + self._feature_to_units[feature_path].discard(key) + + # Clean new features that this unit created + features_to_remove = [fp for fp, uk in self._new_features.items() if uk == key] + for fp in features_to_remove: + del self._new_features[fp] + + # Clean symbol index + if unit.unit_type == "class": + if self._class_to_file.get(unit.bare_name) == unit.file_path: + del self._class_to_file[unit.bare_name] + elif unit.unit_type == "function": + if self._function_to_file.get(unit.bare_name) == unit.file_path: + del self._function_to_file[unit.bare_name] + + # Remove related edges + self._remove_edges_involving_unit(unit) + + # Clean empty file entries + if not self._file_to_units[unit.file_path]: + del self._file_to_units[unit.file_path] + self._subtree_to_files[unit.subtree_name].discard(unit.file_path) + + return unit + + def get_unit(self, key: str) -> Optional[InterfaceUnit]: + """Get a unit by its key.""" + return self._units.get(key) + + def get_units_for_file(self, file_path: str) -> List[InterfaceUnit]: + """Get all units in a file.""" + keys = self._file_to_units.get(file_path, []) + return [self._units[k] for k in keys if k in self._units] + + def get_units_for_subtree(self, subtree_name: str) -> List[InterfaceUnit]: + """Get all units in a subtree.""" + files = self._subtree_to_files.get(subtree_name, set()) + units = [] + for file_path in files: + units.extend(self.get_units_for_file(file_path)) + return units + + @property + def all_units(self) -> Dict[str, InterfaceUnit]: + """Return all units.""" + return self._units.copy() + + @property + def all_unit_keys(self) -> Set[str]: + """Return all unit keys.""" + return set(self._units.keys()) + + @property + def new_features(self) -> Dict[str, str]: + """Return all new features created during interface design. + + Returns: + Dict mapping feature_path -> unit_key that created it + """ + return self._new_features.copy() + + def get_new_features_summary(self) -> List[Dict[str, Any]]: + """Get a summary of all new features for reporting. + + Returns: + List of dicts with feature info: path, unit_name, file_path, subtree + """ + summary = [] + for feature_path, unit_key in self._new_features.items(): + unit = self._units.get(unit_key) + if unit: + summary.append({ + "feature_path": feature_path, + "unit_name": unit.name, + "file_path": unit.file_path, + "subtree": unit.subtree_name, + }) + return summary + + def register_new_feature(self, feature_path: str, unit_key: str) -> None: + """Register a new feature created during interface design. + + Args: + feature_path: The new feature path + unit_key: The unit key that implements this feature + """ + self._new_features[feature_path] = unit_key + # Also add to feature index + self._feature_to_units[feature_path].add(unit_key) + + # ======================================================================== + # Edge Operations + # ======================================================================== + + def add_inheritance_edge(self, edge: InheritanceEdge) -> None: + """Add an inheritance edge.""" + self._inheritance_edges.append(edge) + + def add_invocation_edge(self, edge: InvocationEdge) -> None: + """Add an invocation edge (self-calls are filtered).""" + # Self-call filter + bare_caller = edge.caller.split(" ", 1)[-1] if " " in edge.caller else edge.caller + bare_callee = edge.callee.split(" ", 1)[-1] if " " in edge.callee else edge.callee + if bare_caller == bare_callee and (edge.callee_file is None or edge.callee_file == edge.caller_file): + return + self._invocation_edges.append(edge) + + def add_reference_edge(self, edge: ReferenceEdge) -> None: + """Add a type reference edge.""" + self._reference_edges.append(edge) + + def add_edge(self, edge: Union[InheritanceEdge, InvocationEdge, ReferenceEdge]) -> None: + """Add any edge type.""" + if isinstance(edge, InheritanceEdge): + self.add_inheritance_edge(edge) + elif isinstance(edge, InvocationEdge): + self.add_invocation_edge(edge) + elif isinstance(edge, ReferenceEdge): + self.add_reference_edge(edge) + + def _remove_edges_involving_unit(self, unit: InterfaceUnit) -> int: + """Remove edges that reference the given unit. + + Returns: + Number of edges removed + """ + removed = 0 + unit_name = unit.name + file_path = unit.file_path + + # Filter inheritance edges + orig_len = len(self._inheritance_edges) + self._inheritance_edges = [ + e for e in self._inheritance_edges + if not self._edge_involves_unit(e, unit_name, file_path, "inheritance") + ] + removed += orig_len - len(self._inheritance_edges) + + # Filter invocation edges + orig_len = len(self._invocation_edges) + self._invocation_edges = [ + e for e in self._invocation_edges + if not self._edge_involves_unit(e, unit_name, file_path, "invocation") + ] + removed += orig_len - len(self._invocation_edges) + + # Filter reference edges + orig_len = len(self._reference_edges) + self._reference_edges = [ + e for e in self._reference_edges + if not self._edge_involves_unit(e, unit_name, file_path, "reference") + ] + removed += orig_len - len(self._reference_edges) + + return removed + + def _edge_involves_unit( + self, + edge: Union[InheritanceEdge, InvocationEdge, ReferenceEdge], + unit_name: str, + file_path: str, + edge_type: str + ) -> bool: + """Check if an edge involves the specified unit.""" + if edge_type == "inheritance": + e = edge # type: InheritanceEdge + # Child matches + if e.child == unit_name or (e.child in unit_name): + if not e.child_file or e.child_file == file_path: + return True + # Parent matches + if e.parent == unit_name or (e.parent in unit_name): + if not e.parent_file or e.parent_file == file_path: + return True + elif edge_type == "invocation": + e = edge # type: InvocationEdge + if e.caller == unit_name: + if not e.caller_file or e.caller_file == file_path: + return True + if e.callee == unit_name: + if not e.callee_file or e.callee_file == file_path: + return True + elif edge_type == "reference": + e = edge # type: ReferenceEdge + if e.unit == unit_name: + if not e.source_file or e.source_file == file_path: + return True + return False + + # ======================================================================== + # Entry Points + # ======================================================================== + + def set_entry_points(self, entry_points: List[Dict[str, Any]]) -> None: + """Set entry point keys from global review result. + + Args: + entry_points: List of dicts with 'file_path' and 'unit_name' keys + """ + self._entry_point_keys.clear() + for ep in entry_points: + ep_file = ep.get("file_path", "") + ep_unit = ep.get("unit_name", "") + if ep_file and ep_unit: + self._entry_point_keys.add(f"{ep_file}::{ep_unit}") + + def is_entry_point(self, key: str) -> bool: + """Check if a unit is an entry point.""" + return key in self._entry_point_keys + + # ======================================================================== + # Call Graph Construction + # ======================================================================== + + def build_adjacency(self) -> Tuple[Dict[str, Set[str]], Dict[str, Set[str]]]: + """Build outgoing and incoming adjacency sets from edges. + + Returns: + Tuple of (outgoing, incoming) where: + - outgoing: {unit_key -> set of callee unit_keys} + - incoming: {unit_key -> set of caller unit_keys} + """ + outgoing: Dict[str, Set[str]] = defaultdict(set) + incoming: Dict[str, Set[str]] = defaultdict(set) + + # Build name-to-keys lookup + name_to_keys: Dict[str, List[str]] = defaultdict(list) + for key, unit in self._units.items(): + name_to_keys[unit.name].append(key) + name_to_keys[unit.bare_name].append(key) + + # Process invocation edges + for edge in self._invocation_edges: + caller_key = self._resolve_edge_unit(edge.caller, edge.caller_file, name_to_keys) + callee_key = self._resolve_edge_unit(edge.callee, edge.callee_file, name_to_keys) + if caller_key and callee_key: + outgoing[caller_key].add(callee_key) + incoming[callee_key].add(caller_key) + + # Process inheritance edges + for edge in self._inheritance_edges: + child_key = self._resolve_edge_unit(f"class {edge.child}", edge.child_file, name_to_keys) + parent_key = self._resolve_edge_unit(f"class {edge.parent}", edge.parent_file, name_to_keys) + if child_key and parent_key: + outgoing[child_key].add(parent_key) + incoming[parent_key].add(child_key) + + # Process reference edges + for edge in self._reference_edges: + unit_key = self._resolve_edge_unit(edge.unit, edge.source_file, name_to_keys) + type_key = self._resolve_edge_unit(f"class {edge.referenced_type}", edge.type_file, name_to_keys) + if unit_key and type_key: + outgoing[unit_key].add(type_key) + incoming[type_key].add(unit_key) + + return dict(outgoing), dict(incoming) + + def _resolve_edge_unit( + self, + name: str, + file_path: Optional[str], + name_to_keys: Dict[str, List[str]] + ) -> Optional[str]: + """Resolve a unit name to its key.""" + # Try direct key match + if file_path: + direct_key = f"{file_path}::{name}" + if direct_key in self._units: + return direct_key + + # Try name lookup + candidates = name_to_keys.get(name, []) + if candidates: + if file_path: + # Prefer matching file + for key in candidates: + if self._units[key].file_path == file_path: + return key + return candidates[0] + + # Try bare name + if " " in name: + bare_name = name.split(" ", 1)[1] + candidates = name_to_keys.get(bare_name, []) + if candidates: + return candidates[0] + + return None + + # ======================================================================== + # Pruning + # ======================================================================== + + def find_orphan_units(self) -> List[str]: + """Find isolated units (no incoming/outgoing edges, not entry point). + + Returns: + List of unit keys that are candidates for pruning + """ + outgoing, incoming = self.build_adjacency() + + isolated_keys: List[str] = [] + for key in list(self._units.keys()): + if key in self._entry_point_keys: + continue + has_outgoing = key in outgoing and len(outgoing[key]) > 0 + has_incoming = key in incoming and len(incoming[key]) > 0 + if not has_outgoing and not has_incoming: + isolated_keys.append(key) + + return isolated_keys + + def get_orphan_unit_details(self, orphan_keys: List[str]) -> List[Dict[str, Any]]: + """Get detailed information about orphan units for review. + + Args: + orphan_keys: List of unit keys to get details for + + Returns: + List of dicts with unit details (key, code, features, subtree, file_path) + """ + details = [] + for key in orphan_keys: + unit = self._units.get(key) + if unit: + details.append({ + "unit_key": key, + "unit_name": unit.name, + "file_path": unit.file_path, + "subtree": unit.subtree_name, + "code": unit.code, + "features": unit.features, + }) + return details + + def prune_units(self, keys_to_prune: List[str]) -> PruneSummary: + """Remove specified units from the store. + + Args: + keys_to_prune: List of unit keys to remove + + Returns: + PruneSummary with details of what was pruned + """ + if not keys_to_prune: + logger.info("[InterfacesStore.prune_units] No units to prune") + return PruneSummary( + surviving_feature_paths=set(self._feature_to_units.keys()) + ) + + logger.info( + f"[InterfacesStore.prune_units] Pruning {len(keys_to_prune)} units " + f"out of {len(self._units)} total" + ) + + # Track feature -> unit_key mapping before removal + feature_to_unit_key: Dict[str, str] = {} + for fp, keys in self._feature_to_units.items(): + if keys: + feature_to_unit_key[fp] = next(iter(keys)) + + # Remove specified units + pruned_units: List[InterfaceUnit] = [] + pruned_files: Set[str] = set() + for key in keys_to_prune: + unit = self.remove_unit(key) + if unit: + pruned_units.append(unit) + logger.info(f"[InterfacesStore.prune_units] Pruned: {key}") + + # Identify files that were completely emptied + for unit in pruned_units: + if unit.file_path not in self._file_to_units: + pruned_files.add(unit.file_path) + logger.info( + f"[InterfacesStore.prune_units] File fully pruned: {unit.file_path}" + ) + + # Find orphan features + pruned_key_set = {u.key for u in pruned_units} + orphan_features: List[OrphanFeature] = [] + for feature_path, unit_key in feature_to_unit_key.items(): + if unit_key in pruned_key_set: + orphan_features.append(OrphanFeature( + feature_path=feature_path, + unit_key=unit_key, + )) + + if orphan_features: + logger.info( + f"[InterfacesStore.prune_units] {len(orphan_features)} features orphaned" + ) + + return PruneSummary( + pruned_units=pruned_units, + pruned_files=list(pruned_files), + orphan_features=orphan_features, + surviving_feature_paths=set(self._feature_to_units.keys()), + ) + + def prune_orphans(self) -> PruneSummary: + """Remove truly isolated units (no incoming/outgoing edges, not entry point). + + This is a convenience method that finds orphans and prunes them all. + + Returns: + PruneSummary with details of what was pruned + """ + isolated_keys = self.find_orphan_units() + return self.prune_units(isolated_keys) + + def add_edges(self, edges_by_type: Dict[str, List[Dict]]) -> int: + """Add edges to the store (e.g., from orphan review completion). + + Args: + edges_by_type: Dict with keys "inheritance_edges", "invocation_edges", "reference_edges" + Format matches interfaces.json: + - inheritance_edges: {child, parent, source_file, parent_file} + - invocation_edges: {caller, callee, caller_file, callee_file} + - reference_edges: {unit, referenced_type, source_file, type_file} + + Returns: + Number of edges added + """ + added = 0 + + # Process inheritance edges + for edge in edges_by_type.get("inheritance_edges", []): + child = edge.get("child", "") + parent = edge.get("parent", "") + child_file = edge.get("source_file", "") + parent_file = edge.get("parent_file") + if child and parent and child_file: + new_edge = InheritanceEdge( + child=child, + parent=parent, + child_file=child_file, + parent_file=parent_file, + ) + if new_edge not in self._inheritance_edges: + self._inheritance_edges.append(new_edge) + added += 1 + logger.info(f"[InterfacesStore.add_edges] Added inheritance: {child} -> {parent}") + + # Process invocation edges + for edge in edges_by_type.get("invocation_edges", []): + caller = edge.get("caller", "") + callee = edge.get("callee", "") + caller_file = edge.get("caller_file", "") + callee_file = edge.get("callee_file") + if caller and callee and caller_file: + new_edge = InvocationEdge( + caller=caller, + callee=callee, + caller_file=caller_file, + callee_file=callee_file, + ) + if new_edge not in self._invocation_edges: + self._invocation_edges.append(new_edge) + added += 1 + logger.info(f"[InterfacesStore.add_edges] Added invocation: {caller} -> {callee}") + + # Process reference edges + for edge in edges_by_type.get("reference_edges", []): + unit = edge.get("unit", "") + referenced_type = edge.get("referenced_type", "") + source_file = edge.get("source_file", "") + type_file = edge.get("type_file") + if unit and referenced_type and source_file: + new_edge = ReferenceEdge( + unit=unit, + referenced_type=referenced_type, + source_file=source_file, + type_file=type_file, + ) + if new_edge not in self._reference_edges: + self._reference_edges.append(new_edge) + added += 1 + logger.info(f"[InterfacesStore.add_edges] Added reference: {unit} -> {referenced_type}") + + return added + + # ======================================================================== + # RPG Update + # ======================================================================== + + def update_rpg(self, rpg_path: Path) -> RPGUpdateSummary: + """Update RPG with interface design results. + + This method: + 1. Updates existing feature nodes' meta.path with implementation location + 2. Adds SAME_UNIT edges when multiple features share the same unit + 3. Adds dependency edges (INHERITS, INVOKES, REFERENCES) + 4. Marks entry points + 5. Prunes orphan features from RPG + + Args: + rpg_path: Path to repo_rpg.json + + Returns: + RPGUpdateSummary with operation counts + """ + from rpg.service import RPGService + from rpg.models import Node, NodeType, EdgeType + + summary = RPGUpdateSummary() + + if not rpg_path.exists(): + logger.warning(f"RPG file not found: {rpg_path}") + return summary + + try: + svc = RPGService.load(rpg_path) + except Exception as e: + logger.error(f"Failed to load RPG: {e}") + return summary + + rpg = svc.rpg + + # Remove old edges by generator + svc.refresh_stage_edges("design_interfaces") + + # Build feature name -> node mapping + feature_nodes: Dict[str, Node] = {} + for node in rpg.nodes.values(): + if node.node_type == "feature" or node.level == rpg.MAX_FEATURE_LEVEL: + feature_nodes[node.name] = node + feature_path = node.feature_path() + if feature_path: + feature_nodes[feature_path] = node + + # Track unit -> list of feature nodes for SAME_UNIT edges + unit_to_features: Dict[str, List[Node]] = defaultdict(list) + + # Update feature nodes with implementation paths + for unit in self._units.values(): + impl_path = f"{unit.file_path}::{unit.name}" + + for feature_path in unit.features: + feature_node = feature_nodes.get(feature_path) + if not feature_node: + # Try by name + feature_name = feature_path.split("/")[-1] if "/" in feature_path else feature_path + feature_node = feature_nodes.get(feature_name) + + if not feature_node: + logger.debug(f"Feature node not found: {feature_path}") + summary.skipped_features += 1 + continue + + # Infer type_name + inferred_type: Optional[NodeType] = None + if unit.unit_type == "class": + inferred_type = NodeType.CLASS + elif unit.unit_type == "function": + inferred_type = NodeType.FUNCTION + + # Update via service for consistency + svc.update_feature_mapping(feature_node, impl_path, inferred_type) + + summary.updated_features += 1 + unit_to_features[impl_path].append(feature_node) + + # Create new feature nodes for glue/orchestration code + summary.created_new_features = self._create_new_feature_nodes( + svc, rpg, feature_nodes, unit_to_features + ) + + # Add SAME_UNIT edges + for impl_path, feature_list in unit_to_features.items(): + if len(feature_list) < 2: + continue + + for i in range(len(feature_list)): + for j in range(i + 1, len(feature_list)): + if svc.add_dependency_edge( + feature_list[i], feature_list[j], + EdgeType.SAME_UNIT, "design_interfaces", + description=f"Share implementation: {impl_path}", + bidirectional_dedup=True, + ): + summary.added_same_unit_edges += 1 + + # Add dependency edges + summary.added_dependency_edges += self._add_rpg_dependency_edges(svc) + + # Mark entry points + summary.marked_entry_points = self._mark_rpg_entry_points(svc, rpg) + + # Prune orphan features + prune_result = svc.prune_orphan_features(self.surviving_feature_paths) + summary.pruned_feature_nodes = prune_result[0] + summary.pruned_parent_nodes = prune_result[1] + summary.pruned_edges = prune_result[2] + + # Save RPG + svc.save(rpg_path) + + total_changes = ( + summary.updated_features + summary.created_new_features + + summary.added_same_unit_edges + summary.added_dependency_edges + + summary.marked_entry_points + ) + if total_changes > 0: + parts = [f"{summary.updated_features} features updated"] + if summary.created_new_features > 0: + parts.append(f"{summary.created_new_features} new features created") + parts.append(f"{summary.added_same_unit_edges} SAME_UNIT") + parts.append(f"{summary.added_dependency_edges} dependency edges") + parts.append(f"{summary.marked_entry_points} entry points") + print(f" RPG updated: {', '.join(parts)}") + if summary.pruned_feature_nodes > 0: + print( + f" RPG pruned: {summary.pruned_feature_nodes} feature nodes, " + f"{summary.pruned_parent_nodes} parent nodes, " + f"{summary.pruned_edges} edges" + ) + + return summary + + def _create_new_feature_nodes( + self, + svc, + rpg, + feature_nodes: Dict[str, Any], + unit_to_features: Dict[str, List] + ) -> int: + """Create new feature nodes in RPG for glue/orchestration code. + + Args: + svc: RPGService instance for node creation + rpg: The RPG object (for read-only node lookups) + feature_nodes: Existing feature name -> node mapping (will be updated) + unit_to_features: Mapping of impl_path -> feature nodes (will be updated) + + Returns: + Number of new feature nodes created + """ + from rpg.models import NodeType + + created = 0 + + for feature_path, unit_key in self._new_features.items(): + unit = self._units.get(unit_key) + if not unit: + logger.warning(f"Unit not found for new feature: {feature_path} -> {unit_key}") + continue + + # Skip if already exists + if feature_path in feature_nodes: + logger.debug(f"Feature already exists: {feature_path}") + continue + + impl_path = f"{unit.file_path}::{unit.name}" + + # Parse feature path to determine parent + # Format: "Subtree Name/category/subcategory/feature name" + path_parts = feature_path.split("/") + if len(path_parts) < 2: + logger.warning(f"Invalid new feature path format: {feature_path}") + continue + + feature_name = path_parts[-1] + subtree_name = path_parts[0] + + # Find parent node - try to find the closest existing parent + # Prefer full path match at each level; fall back to bare name. + parent_node = None + for i in range(len(path_parts) - 1, 0, -1): + parent_path = "/".join(path_parts[:i]) + if parent_path in feature_nodes: + parent_node = feature_nodes[parent_path] + break + parent_name = path_parts[i - 1] + # Skip name lookup when it equals path (i.e., i == 1, single-segment) + if parent_name != parent_path and parent_name in feature_nodes: + parent_node = feature_nodes[parent_name] + break + + # Last-resort: scan all nodes (not only features) for subtree_name + if not parent_node: + for node in rpg.nodes.values(): + if node.name == subtree_name: + parent_node = node + break + + if not parent_node: + logger.warning( + f"Cannot find parent for new feature: {feature_path}. " + f"Creating as root-level node." + ) + + # Create new feature node + type_name = NodeType.CLASS if unit.unit_type == "class" else NodeType.FUNCTION + + if parent_node: + new_node = svc.add_feature_node( + name=feature_name, + parent=parent_node, + impl_path=impl_path, + type_name=type_name, + generator="design_interfaces", + description=f"Auto-created for glue code: {unit.name}", + ) + else: + # No parent found โ€” create as root-level orphan via rpg.add_node + # to ensure _graph wiring and ID-collision safeguards apply. + from rpg.models import Node, NodeMetaData, uuid8 as _uuid8 + new_node = Node( + id=f"{feature_name}_{_uuid8()}", + name=feature_name, + node_type="feature", + meta=NodeMetaData( + path=impl_path, + description=f"Auto-created for glue code: {unit.name}", + generator="design_interfaces", + type_name=type_name, + ) + ) + new_node.level = rpg.MAX_FEATURE_LEVEL + rpg.add_node(new_node) + + # Update feature_nodes mapping + feature_nodes[feature_path] = new_node + feature_nodes[feature_name] = new_node + + # Update unit_to_features for SAME_UNIT edges + unit_to_features[impl_path].append(new_node) + + created += 1 + logger.info(f"Created new feature node: {feature_path} -> {new_node.id}") + + return created + + def _add_rpg_dependency_edges(self, svc) -> int: + """Add INHERITS, INVOKES, REFERENCES edges to RPG.""" + from rpg.models import EdgeType + + added = 0 + + # Inheritance edges + for edge in self._inheritance_edges: + child_node = svc.find_node_by_unit_name(edge.child) + parent_node = svc.find_node_by_unit_name(edge.parent) + if child_node and parent_node: + if svc.add_dependency_edge( + child_node, parent_node, EdgeType.INHERITS, + "design_interfaces", + description=f"{edge.child} inherits from {edge.parent}", + ): + added += 1 + + # Invocation edges + for edge in self._invocation_edges: + caller_node = svc.find_node_by_unit_name(edge.caller) + callee_node = svc.find_node_by_unit_name(edge.callee) + if caller_node and callee_node: + if svc.add_dependency_edge( + caller_node, callee_node, EdgeType.INVOKES, + "design_interfaces", + description=f"{edge.caller} invokes {edge.callee}", + ): + added += 1 + + # Reference edges + for edge in self._reference_edges: + unit_node = svc.find_node_by_unit_name(edge.unit) + type_node = svc.find_node_by_unit_name(edge.referenced_type) + if unit_node and type_node: + if svc.add_dependency_edge( + unit_node, type_node, EdgeType.REFERENCES, + "design_interfaces", + description=f"{edge.unit} references type {edge.referenced_type}", + ): + added += 1 + + return added + + def _mark_rpg_entry_points(self, svc, rpg) -> int: + """Mark entry points on RPG nodes.""" + marked = 0 + global_review = self._global_review + entry_points = global_review.get("entry_points", []) + + for ep in entry_points: + ep_unit = ep.get("unit_name", "") + ep_file = ep.get("file_path", "") + ep_rationale = ep.get("rationale", "") + + if not ep_unit: + continue + + ep_node = svc.find_node_by_unit_name(ep_unit) + + if not ep_node: + expected_path = f"{ep_file}::{ep_unit}" if ep_file else "" + if expected_path: + for node in rpg.nodes.values(): + if node.meta and node.meta.path == expected_path: + ep_node = node + break + + if ep_node: + svc.mark_entry_point(ep_node, ep_rationale) + marked += 1 + + return marked + + @property + def surviving_feature_paths(self) -> Set[str]: + """Get all feature paths that have at least one unit.""" + return set(self._feature_to_units.keys()) + + # ======================================================================== + # Symbol Resolution (for GlobalInterfaceRegistry compatibility) + # ======================================================================== + + def resolve_callee(self, callee_name: str) -> Optional[str]: + """Resolve a callee name to its file_path. + + Compatible with GlobalInterfaceRegistry.resolve_callee(). + """ + # Exact match in class/function indexes + if callee_name in self._class_to_file: + return self._class_to_file[callee_name] + if callee_name in self._function_to_file: + return self._function_to_file[callee_name] + + # Try with prefix stripped + stripped = callee_name + if callee_name.startswith("class "): + stripped = callee_name[len("class "):] + elif callee_name.startswith("function "): + stripped = callee_name[len("function "):] + + if stripped != callee_name: + if stripped in self._class_to_file: + return self._class_to_file[stripped] + if stripped in self._function_to_file: + return self._function_to_file[stripped] + + # Case-insensitive fallback + callee_lower = callee_name.lower() + for name, path in self._class_to_file.items(): + if name.lower() == callee_lower: + return path + for name, path in self._function_to_file.items(): + if name.lower() == callee_lower: + return path + + return None + + def get_all_public_symbols(self) -> Dict[str, str]: + """Return {symbol_name: file_path} for all registered symbols.""" + symbols = {} + symbols.update(self._class_to_file) + symbols.update(self._function_to_file) + return symbols + + # ======================================================================== + # Serialization + # ======================================================================== + + def to_interfaces_json(self) -> Dict[str, Any]: + """Export to interfaces.json format. + + Returns: + Dict compatible with current interfaces.json structure + """ + subtrees: Dict[str, Any] = {} + + for subtree_name in self.subtree_order: + files = sorted(self._subtree_to_files.get(subtree_name, set())) + subtree_interfaces: Dict[str, Any] = {} + + for file_path in files: + unit_keys = self._file_to_units.get(file_path, []) + units = [self._units[k] for k in unit_keys if k in self._units] + + if not units: + continue + + subtree_interfaces[file_path] = { + "units": [u.name for u in units], + "units_to_features": {u.name: u.features for u in units}, + "units_to_code": {u.name: u.code for u in units}, + "file_code": "\n\n".join(u.code for u in units), + } + + subtrees[subtree_name] = { + "files_order": files, + "interfaces": subtree_interfaces, + } + + result = { + "subtrees": subtrees, + "subtree_order": self.subtree_order, + "implemented_subtrees": { + st: sorted(self._subtree_to_files.get(st, set())) + for st in self.subtree_order + }, + "enhanced_data_flow": { + "original_edges": self._original_data_flow_edges, + "inheritance_edges": [e.to_dict() for e in self._inheritance_edges], + "invocation_edges": [e.to_dict() for e in self._invocation_edges], + "reference_edges": [e.to_dict() for e in self._reference_edges], + }, + "success": True, + } + + # Include new features summary + if self._new_features: + result["new_features"] = self.get_new_features_summary() + + # Include global review if set + if self._global_review: + result["global_review"] = self._global_review + + return result + + @classmethod + def from_legacy_format( + cls, + interfaces_data: Dict[str, Any], + enhanced_data_flow: Optional[Dict[str, Any]] = None, + global_review: Optional[Dict[str, Any]] = None, + ) -> "InterfacesStore": + """Construct store from current interfaces_data dict format. + + Args: + interfaces_data: The interfaces.json dict structure + enhanced_data_flow: The enhanced_data_flow dict (or from interfaces_data) + global_review: Global review results (or from interfaces_data) + + Returns: + InterfacesStore populated with units and edges + """ + store = cls() + store.subtree_order = interfaces_data.get("subtree_order", []) + + # Use enhanced_data_flow from parameter or from interfaces_data + if enhanced_data_flow is None: + enhanced_data_flow = interfaces_data.get("enhanced_data_flow", {}) + + store._original_data_flow_edges = enhanced_data_flow.get("original_edges", []) + + # Load units from subtrees + subtrees = interfaces_data.get("subtrees", {}) + for subtree_name, subtree_data in subtrees.items(): + file_interfaces = subtree_data.get("interfaces", subtree_data.get("files", {})) + + for file_path, file_data in file_interfaces.items(): + units_to_features = file_data.get("units_to_features", {}) + units_to_code = file_data.get("units_to_code", {}) + + for unit_name in file_data.get("units", []): + unit = InterfaceUnit( + name=unit_name, + file_path=file_path, + subtree_name=subtree_name, + features=units_to_features.get(unit_name, []), + code=units_to_code.get(unit_name, ""), + ) + store.add_unit(unit) + + # Load new features from top-level list + for nf in interfaces_data.get("new_features", []): + feature_path = nf.get("feature_path", "") + file_path = nf.get("file_path", "") + unit_name = nf.get("unit_name", "") + if feature_path and file_path and unit_name: + unit_key = f"{file_path}::{unit_name}" + store.register_new_feature(feature_path, unit_key) + + # Load edges + for e in enhanced_data_flow.get("inheritance_edges", []): + store._inheritance_edges.append(InheritanceEdge( + child=e.get("child", ""), + parent=e.get("parent", ""), + child_file=e.get("source_file", ""), + parent_file=e.get("parent_file"), + generator=e.get("generator", "design_interfaces"), + )) + + for e in enhanced_data_flow.get("invocation_edges", []): + store._invocation_edges.append(InvocationEdge( + caller=e.get("caller", ""), + callee=e.get("callee", ""), + caller_file=e.get("caller_file", ""), + callee_file=e.get("callee_file"), + generator=e.get("generator", "design_interfaces"), + )) + + for e in enhanced_data_flow.get("reference_edges", []): + store._reference_edges.append(ReferenceEdge( + unit=e.get("unit", ""), + referenced_type=e.get("referenced_type", ""), + source_file=e.get("source_file", ""), + type_file=e.get("type_file"), + generator=e.get("generator", "design_interfaces"), + )) + + # Load global review + if global_review is None: + global_review = interfaces_data.get("global_review", {}) + store._global_review = global_review + + # Set entry points from global review + entry_points = global_review.get("entry_points", []) + store.set_entry_points(entry_points) + + return store + + # ======================================================================== + # Debug / Info + # ======================================================================== + + def get_stats(self) -> Dict[str, int]: + """Get summary statistics.""" + return { + "units": len(self._units), + "files": len(self._file_to_units), + "subtrees": len(self._subtree_to_files), + "features": len(self._feature_to_units), + "inheritance_edges": len(self._inheritance_edges), + "invocation_edges": len(self._invocation_edges), + "reference_edges": len(self._reference_edges), + "entry_points": len(self._entry_point_keys), + } diff --git a/RPG-Kit/scripts/generate_viz.py b/RPG-Kit/scripts/generate_viz.py new file mode 100644 index 0000000..07dd75d --- /dev/null +++ b/RPG-Kit/scripts/generate_viz.py @@ -0,0 +1,352 @@ +import json +import argparse +from pathlib import Path + +from common.paths import DATA_FLOW_FILE, DATA_FLOW_VIZ_FILE + +# HTML Template +HTML_TEMPLATE = """ + + + + + RPG Kit Data Flow Architecture + + + + + + + + + + +
+

Data Flow: Architecture View

+
+ + + +
+
+
+ +
+
+ +
+
+ + +
+
+
+ + +
+
+ +
+

Details

+
+
+ + + + + +""" + +def generate_visualization(json_path, output_path): + print(f"Reading data from: {json_path}") + try: + with open(json_path, 'r', encoding='utf-8') as f: + data = json.load(f) + except FileNotFoundError: + print(f"Error: File not found {json_path}") + return + except json.JSONDecodeError: + print(f"Error: Invalid JSON in {json_path}") + return + + # Process data to JSON string + json_str = json.dumps(data, indent=2) + + # Inject into HTML + html_content = HTML_TEMPLATE.replace('__JSON_DATA_PLACEHOLDER__', json_str) + + print(f"Writing visualization to: {output_path}") + with open(output_path, 'w', encoding='utf-8') as f: + f.write(html_content) + + print("Done!") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Generate Data Flow Visualization") + parser.add_argument("--input", "-i", type=Path, default=DATA_FLOW_FILE, help="Input data flow JSON file") + parser.add_argument("--output", "-o", type=Path, default=DATA_FLOW_VIZ_FILE, help="Output HTML file") + + args = parser.parse_args() + + generate_visualization(args.input, args.output) diff --git a/RPG-Kit/scripts/init_codebase.py b/RPG-Kit/scripts/init_codebase.py new file mode 100644 index 0000000..9f1f843 --- /dev/null +++ b/RPG-Kit/scripts/init_codebase.py @@ -0,0 +1,726 @@ +#!/usr/bin/env python3 +"""Initialize Codebase Script - Setup Initial Repository. + +Sets up the initial repository state before TDD implementation: +1. Ensures we're on the main branch +2. Creates README.md with repository info +3. Creates .gitignore with Python cache rules +4. Writes base classes from base_classes.json +5. Creates an initial commit + +This matches ZeroRepo's _setup_initial_repository() logic. +Interfaces and __init__.py are created during the TDD loop. + +Output: JSON with initialization status + +Usage: + python init_codebase.py # Initialize codebase + python init_codebase.py --dry-run # Preview without writing files + python init_codebase.py --no-commit # Write files but don't commit +""" + +import json +import argparse +import sys +from pathlib import Path +from datetime import datetime +from typing import Dict, Any, Optional, List + +# Add scripts directory to path for imports +sys.path.insert(0, str(Path(__file__).parent)) + +from common.git_utils import GitRunner +from common.paths import ( + BASE_CLASSES_FILE, + INTERFACES_FILE, + REPO_RPG_FILE, + FEATURE_BUILD_FILE, + CODE_GEN_STATE_FILE as STATE_FILE, + get_scripts_dir, + REPO_DIR, +) +from common.execution_state import load_code_gen_state, save_code_gen_state +from code_gen.context_collector import write_interface_skeletons + + +# Default .gitignore content for Python projects. +# +# Split into two logical blocks so ``create_gitignore`` can be smart: +# * ``_GITIGNORE_PYTHON_BLOCK`` โ€” generic Python / OS / IDE ignores. +# Modeled on the canonical ``github/gitignore/Python.gitignore`` template +# (trimmed of niche framework sections: Django/Flask/Scrapy/SageMath/ +# Celery/Translations) plus the modern tool-cache entries (ruff, mypy, +# pyright) and the common OS-junk lines (.DS_Store, Thumbs.db). Written +# only when the user's existing ``.gitignore`` lacks ``__pycache__/``. +# * ``_GITIGNORE_RPGKIT_BLOCK`` โ€” RPG-Kit-specific ignores (the entire +# ``.rpgkit/`` runtime tree, the ``.claude`` workspace symlink, and the +# ``.venv_dev/`` / ``.rpgkit_dev_env/`` venvs created by the codegen +# pipeline). Appended whenever the existing ``.gitignore`` lacks +# ``.rpgkit/``, regardless of whether Python ignores are already present. +# This guarantees that an existing Python project getting bootstrapped +# by ``init_codebase`` still gets the RPG-Kit runtime files ignored. +_GITIGNORE_PYTHON_BLOCK = """# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Sphinx / mkdocs documentation +docs/_build/ +/site + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# PEP 582 +__pypackages__/ + +# Type checkers +.mypy_cache/ +.dmypy.json +dmypy.json +.pyre/ +.pytype/ +pyrightconfig.json + +# Linters / formatters +.ruff_cache/ + +# Cython debug symbols +cython_debug/ + +# Environments +.env +.env.local +.env.*.local +env/ +venv/ +ENV/ +.venv/ +env.bak/ +venv.bak/ + +# Logs +*.log + +# IDE / editors +.idea/ +.vscode/ +*.swp +*.swo +*~ + +# OS files +.DS_Store +Thumbs.db +ehthumbs.db +desktop.ini +""" + +_GITIGNORE_RPGKIT_BLOCK = """# RPG-Kit runtime workspace +# The entire .rpgkit/ tree is internal tooling state: logs, scripts copy, +# state snapshots, trajectory traces, encoder/codegen JSON artifacts. +# Treat it as ephemeral โ€” none of it should be tracked in the project repo. +.rpgkit/ + +# RPG-Kit dev environments (created by codegen pipeline) +.venv_dev/ +.rpgkit_dev_env/ + +# RPG-Kit workspace symlink +.claude +""" + +# Kept for backward compatibility with any external import โ€” equivalent to +# the full ``.gitignore`` written for a brand-new project. +GITIGNORE_CONTENT = _GITIGNORE_PYTHON_BLOCK + "\n" + _GITIGNORE_RPGKIT_BLOCK + + +def _gitignore_has_python_block(existing: str) -> bool: + """Heuristic: does an existing .gitignore already cover Python cache?""" + return "__pycache__/" in existing + + +def _gitignore_has_rpgkit_block(existing: str) -> bool: + """Heuristic: does an existing .gitignore already ignore .rpgkit/? + + Accepts the line-anchored form ``.rpgkit/`` or ``.rpgkit`` (without a + leading ``#``) so that earlier handwritten variants still count as + "already configured" and don't get a duplicate block appended. + """ + for raw in existing.splitlines(): + line = raw.strip() + if not line or line.startswith("#"): + continue + if line in (".rpgkit", ".rpgkit/", "/.rpgkit", "/.rpgkit/"): + return True + return False + + +# ============================================================================ +# Agent Detection & Persistent Instructions +# ============================================================================ +# +# Removed in commit C4 (see plans/20260508-1-rpgkit-optimization*.md): the +# previously-generated `repo/.claude/rules/rpgkit-codegen.md` and +# `repo/.github/instructions/rpgkit-codegen.instructions.md` files were +# auto-loaded by Claude Code / Copilot for **every** session, contaminating +# unrelated commands (rpg_edit, encode, plain Q&A) with codegen-only +# instructions. The recovery-after-/compact concern is already handled by +# `templates/commands/code_gen.md` itself, which the user re-invokes via +# `/rpgkit.code_gen`. +# +# `rpgkit update` cleans up any stale `rpgkit-codegen.*` files left in older +# user workspaces (see src/rpgkit_cli/__init__.py). + + +def load_json_file(path: Path) -> Dict[str, Any]: + """Load a JSON file, return empty dict if not found.""" + if not path.exists(): + return {} + + try: + with open(path, 'r', encoding='utf-8') as f: + return json.load(f) + except Exception: + return {} + + +def get_repo_info() -> tuple: + """Get repository name and description from RPG files. + + Returns (repo_name, repo_purpose) + """ + # Try build_feature.json first + build_feature = load_json_file(FEATURE_BUILD_FILE) + if build_feature: + name = build_feature.get("repository_name", "") + purpose = build_feature.get("repository_purpose", "") + if name: + return name, purpose + + # Try repo_rpg.json + repo_rpg = load_json_file(REPO_RPG_FILE) + if repo_rpg: + name = repo_rpg.get("repo_name", "") + info = repo_rpg.get("repo_info", "") + if name: + return name, info + + # Fallback to directory name + return REPO_DIR.name, "" + + +def write_file(file_path: Path, content: str, dry_run: bool = False) -> bool: + """Write content to a file, creating directories as needed. + + Returns True if successful or would succeed (dry_run). + """ + if dry_run: + return True + + try: + file_path.parent.mkdir(parents=True, exist_ok=True) + with open(file_path, 'w', encoding='utf-8') as f: + f.write(content) + return True + except Exception: + return False + + +def create_readme(repo_path: Path, dry_run: bool = False) -> bool: + """Create README.md if it doesn't exist.""" + readme_path = repo_path / "README.md" + + if readme_path.exists(): + return False # Already exists, no change + + repo_name, repo_purpose = get_repo_info() + + content = f"# {repo_name}\n\n" + if repo_purpose: + content += f"{repo_purpose}\n\n" + + return write_file(readme_path, content, dry_run) + + +def create_gitignore(repo_path: Path, dry_run: bool = False) -> bool: + """Create or update ``.gitignore`` to cover Python cache and RPG-Kit runtime. + + Behavior matrix: + + * ``.gitignore`` does not exist โ†’ write the full template (Python + RPG-Kit blocks). + * Exists, lacks Python block โ†’ append Python + RPG-Kit blocks. + * Exists, has Python block, no RPG-Kit โ†’ append only the RPG-Kit block. + * Exists, has both blocks โ†’ no-op. + + Returns True when the file was created/modified, False when nothing changed + or an error prevented writing. + """ + gitignore_path = repo_path / ".gitignore" + + if not gitignore_path.exists(): + return write_file(gitignore_path, GITIGNORE_CONTENT, dry_run) + + try: + existing = gitignore_path.read_text(encoding='utf-8') + except Exception: + return False + + has_python = _gitignore_has_python_block(existing) + has_rpgkit = _gitignore_has_rpgkit_block(existing) + + if has_python and has_rpgkit: + return False # Already fully configured + + additions = "" + if not has_python: + additions += _GITIGNORE_PYTHON_BLOCK + if not has_rpgkit: + # Separate the two blocks with a blank line for readability. + if additions: + additions += "\n" + additions += _GITIGNORE_RPGKIT_BLOCK + + if not additions: + return False + + if not dry_run: + try: + new_content = existing.rstrip() + "\n\n" + additions + gitignore_path.write_text(new_content, encoding='utf-8') + except Exception: + return False + return True + + +def write_base_classes( + repo_path: Path, + base_classes_path: Path, + dry_run: bool = False +) -> List[str]: + """Write base classes from base_classes.json. + + Returns list of files written. + """ + base_classes_data = load_json_file(base_classes_path) + if not base_classes_data: + return [] + + files_written = [] + + # Check for "files" field (pre-aggregated file contents) + if "files" in base_classes_data: + for file_path, content in base_classes_data["files"].items(): + full_path = repo_path / file_path + if write_file(full_path, content, dry_run): + files_written.append(file_path) + + # Check for "base_classes" array + base_class_list = base_classes_data.get("base_classes", []) + + # Group by file_path to avoid overwriting + file_contents: Dict[str, List[str]] = {} + + for bc in base_class_list: + file_path = bc.get("file_path", "") + code = bc.get("code", "") + + if file_path and code: + if file_path not in file_contents: + file_contents[file_path] = [] + file_contents[file_path].append(code) + + # Write aggregated content + for file_path, code_blocks in file_contents.items(): + if file_path in files_written: + continue # Already written from "files" field + + content = "\n\n".join(code_blocks) + full_path = repo_path / file_path + if write_file(full_path, content, dry_run): + files_written.append(file_path) + + # Check for "data_structures" array (data flow type stubs) + # Note: file_path may be empty if not yet assigned by interface designer + data_structures_list = base_classes_data.get("data_structures", []) + + ds_file_contents: Dict[str, List[str]] = {} + + for ds in data_structures_list: + file_path = ds.get("file_path", "") + code = ds.get("code", "") + + if file_path and code: # Skip entries without file_path + if file_path not in ds_file_contents: + ds_file_contents[file_path] = [] + ds_file_contents[file_path].append(code) + + # Write data structure stubs - append to existing files or create new + for file_path, code_blocks in ds_file_contents.items(): + content = "\n\n".join(code_blocks) + full_path = repo_path / file_path + + if file_path in files_written: + # File was already written by base_classes - append data structures + if not dry_run: + existing = full_path.read_text(encoding='utf-8') + combined = existing.rstrip() + "\n\n\n" + content + full_path.write_text(combined, encoding='utf-8') + else: + if write_file(full_path, content, dry_run): + files_written.append(file_path) + + return files_written + + +def create_initial_commit( + repo_path: Path, + files_written: List[str], + readme_created: bool, + gitignore_created: bool +) -> Optional[str]: + """Stage and commit all written files. + + Returns commit hash if successful, "no-changes" if nothing to commit, + None on error. + """ + try: + git = GitRunner(str(repo_path)) + + parts = [] + if readme_created: + parts.append("README") + if gitignore_created: + parts.append(".gitignore") + if files_written: + parts.append(f"{len(files_written)} base class files") + + if not parts: + return "no-changes" + + if not git.has_uncommitted_changes(): + return "no-changes" + + message = "chore: initial repository setup\n\n" + message += "Add " + ", ".join(parts) + + success, commit_hash = git.stage_and_commit(message) + + if success and commit_hash: + return commit_hash + + return None + + except Exception: + return None + + +def update_code_gen_state(state_path: Path, initial_commit: str) -> None: + """Update code_gen_state.jsonl with initial commit info (append a line).""" + state = load_code_gen_state(state_path) + state.initialized = True + state.initialized_at = datetime.now().isoformat() + state.initial_commit = initial_commit + save_code_gen_state(state, state_path) + + +def init_codebase( + repo_path: Path = None, + base_classes_path: Path = BASE_CLASSES_FILE, + state_path: Path = STATE_FILE, + dry_run: bool = False, + no_commit: bool = False +) -> Dict[str, Any]: + """Initialize the codebase with README, .gitignore, and base classes. + + Matches ZeroRepo's _setup_initial_repository() logic. + + Args: + repo_path: Repository path (defaults to cwd) + base_classes_path: Path to base_classes.json + state_path: Path to code_gen_state.jsonl + dry_run: Preview without writing files + no_commit: Write files but don't commit + + Returns: + Dict with initialization results + """ + repo_path = repo_path or REPO_DIR + + # Ensure repo directory exists + repo_path.mkdir(parents=True, exist_ok=True) + + # Ensure .rpgkit/ runtime directories exist. This is normally already + # done by ``rpgkit init`` / ``rpgkit update`` (see + # ``rpgkit_cli.ensure_rpgkit_runtime_dirs``), but we mkdir here too as + # a safety net: a workspace created by an older rpgkit may lack + # ``.rpgkit/logs/``, in which case stage prompts that redirect with + # shell ``>`` fail before the Python process can recover. Creating + # them here at code_gen bootstrap is harmless and idempotent. + from common.paths import LOGS_DIR, DATA_DIR, TRAJECTORY_DIR + for d in (LOGS_DIR, DATA_DIR, TRAJECTORY_DIR): + d.mkdir(parents=True, exist_ok=True) + + # NOTE: A previous version of this function created a ``.claude`` + # symlink inside ``repo/`` because the project repo lived in a + # ``/repo/`` subdirectory and sub-agents ran with cwd = + # repo/. After the workspace==repo unification the workspace root + # IS the project repo root, so ``.claude`` is already at the right + # location and the symlink is unnecessary (and would point at + # ``/.claude``, i.e. outside the workspace). + # Block removed deliberately; do NOT reintroduce. + + # Check if already initialized + if state_path.exists(): + try: + state = load_code_gen_state(state_path) + if state.initialized: + scripts = get_scripts_dir() + return { + "success": False, + "error": "Codebase already initialized", + "initial_commit": state.initial_commit, + "initialized_at": state.initialized_at, + "suggestion": "Run run_batch.py to start codegen", + "next_action": ( + f"Already initialized. Run: python3 {scripts}/run_batch.py --next --json " + f"to start the next batch." + ) + } + except Exception: + pass + + # Ensure on main branch and clean workspace + if not dry_run: + git = GitRunner(str(repo_path)) + git.ensure_clean_workspace() + success, msg = git.ensure_main_branch() + if not success: + return { + "success": False, + "error": msg, + "suggestion": "Manually switch to main branch and retry", + "next_action": "Git branch error. Switch to the main branch manually, then re-run init_codebase.py --json." + } + + # Track changes + readme_created = False + gitignore_created = False + base_files = [] + + # 1. Create README.md + readme_created = create_readme(repo_path, dry_run) + + # 2. Create/update .gitignore + gitignore_created = create_gitignore(repo_path, dry_run) + + # 3. Write base classes + if base_classes_path.exists(): + base_files = write_base_classes(repo_path, base_classes_path, dry_run) + + # 4. Write interface skeletons (one-time, from interfaces.json) + skeletons_written: List[str] = [] + if not dry_run and INTERFACES_FILE.exists(): + try: + skel_result = write_interface_skeletons(INTERFACES_FILE, repo_path) + skeletons_written = skel_result.get("written", []) + except Exception as e: + print(f"Warning: failed to write interface skeletons: {e}", file=sys.stderr) + + # Check if any changes were made + has_changes = readme_created or gitignore_created or base_files or skeletons_written + + if not has_changes: + # Mark initialized even if no file changes were needed + if not dry_run: + state = load_code_gen_state(state_path) + if not state.initialized: + state.initialized = True + state.initialized_at = datetime.now().isoformat() + save_code_gen_state(state, state_path) + scripts = get_scripts_dir() + return { + "success": True, + "message": "Repository already set up, no changes needed", + "readme_created": False, + "gitignore_created": False, + "base_class_files": 0, + "next_action": ( + f"Codebase already set up. Run: python3 {scripts}/run_batch.py --next --json " + f"to start the first batch." + ) + } + + # 5. Create commit + commit_hash = None + if not dry_run and not no_commit: + commit_hash = create_initial_commit( + repo_path, + base_files + skeletons_written, + readme_created, + gitignore_created + ) + + if commit_hash and commit_hash not in ["no-changes", None]: + state = load_code_gen_state(state_path) + state.interfaces_written = bool(skeletons_written) + save_code_gen_state(state, state_path) + update_code_gen_state(state_path, commit_hash) + + return { + "success": True, + "dry_run": dry_run, + "readme_created": readme_created, + "gitignore_created": gitignore_created, + "base_class_files": len(base_files), + "base_class_file_list": base_files, + "skeleton_files": len(skeletons_written), + "skeleton_file_list": skeletons_written, + "commit_hash": commit_hash, + "message": "Repository initialized successfully" if not dry_run else "Dry run complete", + "next_action": ( + f"Codebase initialized. Run: python3 {get_scripts_dir()}/run_batch.py --next --json " + f"to start the first batch." + ) if not dry_run else "Dry run complete. Re-run without --dry-run to apply changes." + } + + +def print_result(result: Dict[str, Any], json_output: bool = False): + """Print the result in a user-friendly format.""" + if json_output: + print(json.dumps(result, indent=2)) + return + + if not result.get("success"): + print(f"\nError: {result.get('error', 'Unknown error')}") + if result.get("suggestion"): + print(f" Suggestion: {result['suggestion']}") + return + + if result.get("dry_run"): + print("\nโ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•—") + print("โ•‘ DRY RUN PREVIEW โ•‘") + print("โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•") + else: + print("\nโ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•—") + print("โ•‘ REPOSITORY INITIALIZED โ•‘") + print("โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•") + + print(f"\n Files created/updated:") + print(f" - README.md: {'[OK] created' if result.get('readme_created') else '[-] already exists'}") + print(f" - .gitignore: {'[OK] created/updated' if result.get('gitignore_created') else '[-] already exists'}") + + base_files = result.get("base_class_files", 0) + if base_files > 0: + print(f" - Base classes: {base_files} files") + for f in result.get("base_class_file_list", [])[:5]: + print(f" - {f}") + if base_files > 5: + print(f" ... and {base_files - 5} more") + else: + print(f" - Base classes: (none found in base_classes.json)") + + if result.get("commit_hash"): + if result["commit_hash"] == "no-changes": + print(f"\n No changes to commit") + else: + print(f"\n Initial commit: {result['commit_hash'][:8]}") + + print("\n " + "โ”€" * 60) + print(f" Next step: Run /rpgkit.code_gen to start TDD") + + +def main(): + parser = argparse.ArgumentParser( + description="Initialize codebase with README, .gitignore, and base classes" + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Preview without writing files or creating commits" + ) + parser.add_argument( + "--no-commit", + action="store_true", + help="Write files but don't create a commit" + ) + parser.add_argument( + "--json", + action="store_true", + help="Output as JSON" + ) + parser.add_argument( + "--base-classes", "-b", + type=Path, + default=BASE_CLASSES_FILE, + help=f"Input base classes file (default: {BASE_CLASSES_FILE})" + ) + + args = parser.parse_args() + + result = init_codebase( + base_classes_path=args.base_classes, + dry_run=args.dry_run, + no_commit=args.no_commit + ) + + print_result(result, json_output=args.json) + + return 0 if result.get("success") else 1 + + +if __name__ == "__main__": + exit(main()) diff --git a/RPG-Kit/scripts/mcp_server.py b/RPG-Kit/scripts/mcp_server.py new file mode 100644 index 0000000..8ae8d01 --- /dev/null +++ b/RPG-Kit/scripts/mcp_server.py @@ -0,0 +1,398 @@ +"""RPG-Kit MCP Server. + +Exposes RPG graph query tools via MCP (Model Context Protocol), allowing +AI assistants to search, explore, and inspect RPG graphs interactively. + +Tools provided: +- ``search_rpg`` -- search nodes by keyword (substring + fuzzy) +- ``explore_rpg`` -- traverse dependency graph from a starting node +- ``get_node_detail`` -- get full attributes and optional source code +- ``list_rpg_tree`` -- browse RPG feature tree structure + +The server communicates over stdio (the standard MCP transport for +CLI-based servers). It is designed to be deployed under +``/.rpgkit/scripts/`` by ``rpgkit init`` / ``rpgkit update``, +and registered automatically in ``.mcp.json`` (Claude) or +``.vscode/mcp.json`` (VS Code Copilot). + +Run directly:: + + python /.rpgkit/scripts/mcp_server.py [--rpg-file PATH] +""" + +import json +import logging +import os +import sys +import time +from datetime import datetime, timezone +from typing import List, Optional + +# Ensure sibling packages (common/, rpg/) are importable when this script is +# invoked by an absolute path (which is how Claude / VS Code launch it). +_SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__)) +if _SCRIPTS_DIR not in sys.path: + sys.path.insert(0, _SCRIPTS_DIR) + +from common.paths import RPG_FILE, MCP_CALLS_LOG # noqa: E402 +from rpg.graph_query import GraphQueryEngine # noqa: E402 + +logger = logging.getLogger(__name__) + +# All logging to stderr (stdout is reserved for MCP JSON-RPC) +logging.basicConfig(stream=sys.stderr, level=logging.WARNING) + + +# --------------------------------------------------------------------------- +# Telemetry: append-only JSONL log of every tool call +# --------------------------------------------------------------------------- + +def _log_tool_call(tool_name: str, params: dict, result_summary: dict, duration_ms: int) -> None: + """Append a single-line JSON record to the MCP calls log. + + Best-effort: never raises; failures are silently ignored so + telemetry never breaks a tool invocation. + """ + try: + MCP_CALLS_LOG.parent.mkdir(parents=True, exist_ok=True) + record = { + "ts": datetime.now(timezone.utc).isoformat(timespec="seconds"), + "tool": tool_name, + "params": params, + **result_summary, + "duration_ms": duration_ms, + } + with open(MCP_CALLS_LOG, "a", encoding="utf-8") as f: + f.write(json.dumps(record, ensure_ascii=False) + "\n") + except Exception: + pass + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _resolve_rpg_path() -> str: + """Resolve RPG file path from CLI args or default (.rpgkit/data/rpg.json).""" + rpg_path = str(RPG_FILE) + args = sys.argv[1:] + for i, arg in enumerate(args): + if arg == "--rpg-file" and i + 1 < len(args): + rpg_path = args[i + 1] + return rpg_path + + +# Standard message returned to the AI agent when the RPG graph isn't ready +# (e.g. ``rpgkit init`` ran, but the encoder hasn't been run yet so +# ``.rpgkit/data/rpg.json`` doesn't exist). Kept short + actionable so +# the agent will relay it verbatim to the user. +_ENCODE_HINT = ( + "RPG graph not generated yet. Ask the user to run **`/rpgkit.encode`** " + "in this AI agent to build `.rpgkit/data/rpg.json`. Once it finishes, " + "RPG tools will start working automatically on the next call โ€” no need " + "to restart the MCP server." +) + + +def _unavailable_payload(rpg_path: str, reason: str) -> str: + """Render a uniform 'graph not available' JSON response for every tool. + + The shape is deliberately identical across all 4 tools so the AI agent + can reliably detect the condition (``error == "rpg_unavailable"``) + and surface the ``next_step`` field to the user. + """ + return json.dumps( + { + "error": "rpg_unavailable", + "rpg_file": rpg_path, + "reason": reason, + "next_step": _ENCODE_HINT, + }, + indent=2, + ensure_ascii=False, + ) + + +# --------------------------------------------------------------------------- +# MCP Server builder +# --------------------------------------------------------------------------- + +def create_mcp_server(rpg_file: str): + """Create and return a configured MCP server instance. + + Uses ``rpg.graph_query.GraphQueryEngine`` as the query backend. + Registers 4 MCP tools: search, explore, detail, and tree. + + The engine is loaded **lazily**: if ``rpg_file`` doesn't yet exist + (typical first-run flow โ€” ``rpgkit init`` finished but the user + hasn't run the encoder yet), the server still starts cleanly and + every tool returns an actionable ``rpg_unavailable`` payload pointing + the user at ``/rpgkit.encode``. Once the encoder writes + ``rpg.json`` the next tool call picks it up automatically โ€” no + restart needed. This avoids the ``MCP error -32000: Connection + closed`` failure mode that used to happen when the server exited + during startup. + + Args: + rpg_file: Path to the RPG JSON file. + + Returns: + A ``FastMCP`` server instance ready to be run. + """ + from mcp.server.fastmcp import FastMCP + + # Single-element list used as a mutable box so the per-tool closures + # below can update the cached engine without needing ``nonlocal`` in + # each function. + engine_box: List[Optional[GraphQueryEngine]] = [None] + + def _get_engine() -> Optional[GraphQueryEngine]: + """Return the cached engine, lazily loading rpg.json on first use. + + Returns ``None`` if the file doesn't exist or fails to load. + Errors are logged to stderr โ€” never raised โ€” because raising + from a tool handler closes the MCP transport. + """ + if engine_box[0] is not None: + return engine_box[0] + if not os.path.isfile(rpg_file): + return None + try: + engine_box[0] = GraphQueryEngine.from_rpg_file(rpg_file) + logger.info("Loaded RPG from %s", rpg_file) + return engine_box[0] + except Exception as exc: # noqa: BLE001 + logger.error("Failed to load RPG from %s: %s", rpg_file, exc) + return None + + def _unavailable_reason() -> str: + return ( + "file_not_found" + if not os.path.isfile(rpg_file) + else "load_failed" + ) + + mcp = FastMCP( + "rpg-tools", + instructions=( + "This server provides structured access to the Repository " + "Program Graph (RPG) for the current workspace \u2014 a " + "pre-computed, queryable index of the codebase built by " + "`/rpgkit.encode` and kept in sync with HEAD by a " + "pre-commit hook.\n\n" + "What the RPG knows about this repository:\n" + " \u2022 The feature hierarchy: functional areas \u2192 " + "feature groups \u2192 individual features, each linked to " + "the source files that implement it.\n" + " \u2022 Every code entity: files, classes, and functions " + "with their signatures, docstrings, and exact line ranges.\n" + " \u2022 Resolved dependency edges between entities: " + "invokes (call graph), inherits, imports, contains.\n\n" + "What you can ask it for (and which tool answers it):\n" + " \u2022 The project's architecture \u2014 what each " + "functional area does, without reading any file. " + "\u2192 `list_rpg_tree`\n" + " \u2022 The definition site of any symbol (function, " + "class, file) by name or keyword. \u2192 `search_rpg`\n" + " \u2022 The callers and callees of a function, or its " + "full reachable subgraph up to N hops. \u2192 `explore_rpg`\n" + " \u2022 The full signature, docstring, and optional " + "source of a specific entity. \u2192 `get_node_detail`\n" + " \u2022 The mapping between abstract concerns (e.g. " + "\"authentication\", \"caching\") and the concrete code " + "that implements them. \u2192 `search_rpg` with " + "`scope=\"feature\"`, then `get_node_detail` on the " + "feature node.\n\n" + "Tools provided:\n" + " \u2022 `list_rpg_tree(root_id, max_depth)` \u2014 " + "browse the feature tree (functional areas \u2192 groups " + "\u2192 features). Best entry point for unfamiliar " + "codebases.\n" + " \u2022 `search_rpg(query, scope, top_k)` \u2014 " + "keyword search over code entities, features, or both; " + "returns ranked node IDs.\n" + " \u2022 `explore_rpg(node_id, direction, depth, " + "edge_types)` \u2014 traverse the dependency graph " + "upstream / downstream / both from a node, with edge-type " + "filtering.\n" + " \u2022 `get_node_detail(node_id, include_code)` \u2014 " + "full attributes of one node: signature, callers, callees, " + "line ranges, optional source code.\n\n" + "These tools resolve references semantically and aggregate " + "them by feature, so they answer structural and " + "dependency questions far more directly than a text scan. " + "See each tool's description for parameters and output " + "shape.\n\n" + "If a tool returns `error: \"rpg_unavailable\"`, the graph " + "has not been built yet \u2014 relay the `next_step` field " + "to the user." + ), + ) + + # ------------------------------------------------------------------ + # Tool 1: search_rpg + # ------------------------------------------------------------------ + @mcp.tool() + def search_rpg( + query: str, + scope: str = "all", + top_k: int = 10, + ) -> str: + """Search for code entities or features in this project by keyword. + + Use this when the user asks 'where is X?', 'find the login function', + 'which module handles authentication?', or any question that requires + locating code or features by name. + + Scope guide: + - 'code': find functions, classes, files by name or path + - 'feature': find functional features (e.g. 'authentication', 'data persistence') + - 'all': search both (recommended when unsure) + + Args: + query: Search keyword โ€” function name, class name, file path, or feature name. + scope: 'code' (code entities), 'feature' (functional features), or 'all' (both). + top_k: Maximum number of results (default 10). + + Returns: + JSON list of matching nodes with id, name, type, score. + """ + engine = _get_engine() + if engine is None: + return _unavailable_payload(rpg_file, _unavailable_reason()) + t0 = time.monotonic() + results = engine.search(query, scope=scope, top_k=top_k) + has_error = bool(results and isinstance(results[0], dict) and "error" in results[0]) + _log_tool_call("search_rpg", + {"query": query, "scope": scope, "top_k": top_k}, + {"results": 0 if has_error else len(results), "error": has_error}, + int((time.monotonic() - t0) * 1000)) + return json.dumps(results, indent=2, ensure_ascii=False) + + # ------------------------------------------------------------------ + # Tool 2: explore_rpg + # ------------------------------------------------------------------ + @mcp.tool() + def explore_rpg( + node_id: str, + direction: str = "both", + depth: int = 2, + edge_types: Optional[List[str]] = None, + ) -> str: + """Explore dependencies and call chains from a code entity. + + Use this when the user asks 'what does X call?', 'who calls X?', + 'what are the dependencies of X?', or 'show me the call chain'. + Returns the subgraph of connected nodes and edges. + + Args: + node_id: Starting node ID (from search_rpg results, e.g. 'routes/auth.py:login'). + direction: 'downstream' (what I call), 'upstream' (who calls me), or 'both'. + depth: Maximum traversal depth in hops (default 2). + edge_types: Filter by edge types like 'invokes', 'inherits', 'imports'. Default: all. + + Returns: + JSON with connected nodes and edges. + """ + engine = _get_engine() + if engine is None: + return _unavailable_payload(rpg_file, _unavailable_reason()) + t0 = time.monotonic() + result = engine.explore( + node_id, direction=direction, depth=depth, edge_types=edge_types + ) + _log_tool_call("explore_rpg", + {"node_id": node_id, "direction": direction, "depth": depth}, + {"nodes": result.get("total_nodes", 0), "edges": result.get("total_edges", 0)}, + int((time.monotonic() - t0) * 1000)) + return json.dumps(result, indent=2, ensure_ascii=False) + + # ------------------------------------------------------------------ + # Tool 3: get_node_detail + # ------------------------------------------------------------------ + @mcp.tool() + def get_node_detail( + node_id: str, + include_code: bool = False, + ) -> str: + """Get full details about a specific function, class, or feature. + + Use this when the user asks 'show me the signature of X', 'what does X do?', + 'what are the parameters of X?', or needs the source code of a specific entity. + Also works for RPG feature nodes (functional areas, feature groups). + + Args: + node_id: Node ID (from search_rpg or explore_rpg results). + include_code: If true, include the full source code of the function/class. + + Returns: + JSON with all node attributes: signature, calls, called_by, line numbers, etc. + """ + engine = _get_engine() + if engine is None: + return _unavailable_payload(rpg_file, _unavailable_reason()) + t0 = time.monotonic() + result = engine.get_node_detail(node_id, include_code=include_code) + _log_tool_call("get_node_detail", + {"node_id": node_id, "include_code": include_code}, + {"source": result.get("source", "error"), "found": "error" not in result}, + int((time.monotonic() - t0) * 1000)) + return json.dumps(result, indent=2, ensure_ascii=False) + + # ------------------------------------------------------------------ + # Tool 4: list_rpg_tree + # ------------------------------------------------------------------ + @mcp.tool() + def list_rpg_tree( + root_id: str = "", + max_depth: int = 2, + ) -> str: + """List the project's functional architecture as a tree. + + Shows how the codebase is organized: functional areas (top-level domains), + feature groups, and individual features โ€” each linked to source files. + + Use this FIRST when the user asks about project structure, module organization, + or wants an overview of what the codebase does. + + Args: + root_id: Start from this node ID (empty = full project). Use a functional_area ID to zoom into one domain. + max_depth: How many levels deep to show (1=areas only, 2=+groups, 3=+features with file paths). + + Returns: + JSON tree with node names, types, and paths. + """ + engine = _get_engine() + if engine is None: + return _unavailable_payload(rpg_file, _unavailable_reason()) + t0 = time.monotonic() + result = engine.list_tree(root_id=root_id or None, max_depth=max_depth) + _log_tool_call("list_rpg_tree", + {"root_id": root_id, "max_depth": max_depth}, + {"total_nodes": result.get("total_nodes", 0)}, + int((time.monotonic() - t0) * 1000)) + return json.dumps(result, indent=2, ensure_ascii=False) + + return mcp + + +# --------------------------------------------------------------------------- +# Entry point: python .rpgkit/scripts/mcp_server.py [--rpg-file PATH] +# --------------------------------------------------------------------------- + +if __name__ == "__main__": + rpg_path = _resolve_rpg_path() + # NOTE: do NOT sys.exit when the file is missing. The MCP transport + # must stay up so the client can actually receive the + # ``rpg_unavailable`` hint that tells the user to run + # ``/rpgkit.encode``. Exiting here used to surface as the opaque + # ``MCP error -32000: Connection closed`` on the client side. + if not os.path.isfile(rpg_path): + logger.warning( + "RPG file not found: %s โ€” server will start in degraded mode " + "and instruct the user to run /rpgkit.encode on the first tool call.", + rpg_path, + ) + + server = create_mcp_server(rpg_file=rpg_path) + server.run(transport="stdio") diff --git a/RPG-Kit/scripts/plan_tasks.py b/RPG-Kit/scripts/plan_tasks.py new file mode 100644 index 0000000..4861149 --- /dev/null +++ b/RPG-Kit/scripts/plan_tasks.py @@ -0,0 +1,1633 @@ +#!/usr/bin/env python3 +"""Plan Tasks Script - Implementation Level Step 5. + +Function: Create implementation tasks from interfaces using LLM +- Reads interfaces.json, data_flow.json, repo_rpg.json +- Uses LLM to plan implementation tasks for each file +- Validates that all units are covered without duplicates +- Generates tasks.json with ordered implementation tasks + +Input: .rpgkit/interfaces.json, .rpgkit/data_flow.json, .rpgkit/repo_rpg.json +Output: .rpgkit/tasks.json (ordered implementation tasks) +""" + +import json +import logging +import argparse +import ast +from pathlib import Path +from datetime import datetime +from typing import Dict, Any, List, Optional, Set +from dataclasses import dataclass, field +from collections import Counter, defaultdict, deque + +from common.trajectory import Trajectory, load_or_create_trajectory +from common import LLMClient +from rpg import uuid8 + +# Import centralized paths +from common.paths import ( + DATA_FLOW_FILE, + INTERFACES_FILE, + REPO_RPG_FILE as RPG_FILE, + REPO_INFO_FILE, + TASKS_FILE as OUTPUT_FILE, + REPO_DIR, +) +import re as _re +from os.path import commonpath, dirname + + +# ============================================================================ +# Prompts (ZeroRepo compatible) +# ============================================================================ + +TASK_PLANNER_PROMPT = """ +## Instruction +You are an Implementation Task Planner. Your job is to decide the implementation order for all files and units within a subtree (module) by grouping them into dependency-aware tasks that feel natural for real development and code review. + +Think like someone organizing GitHub PRs: +each task should represent a meaningful, reviewable feature step โ€” not just a list of functions. + +## Planning Principles +- Implement prerequisites before dependents, both within and across files. +- Prefer tasks that deliver a complete functional milestone. +- Favor fewer, clearer tasks over many tiny ones. +- Each task targets units within a single file; task ordering reflects cross-file dependencies. +- A task should be implementable without needing code from future tasks. + +## Task Scope +Combine units when they naturally belong together: +- data model + validation/normalization +- helper functions + main logic that uses them +- core class + tightly coupled behavior methods + +Separate units when they: +- are foundational utilities reused across many places +- represent orchestration or entry-point logic +- clearly belong to a higher-level layer + +Split only when it improves clarity or dependency flow, not for symmetry or size. + +## Task Description Style +Each task description should convey: +1) What capability this task delivers +2) Why it matters right now +3) What scope is included (and what is not) + +Focus on functional milestone + intent. Avoid vague summaries or simply restating function names. + +## Output Format +Your response must include exactly one `` block and exactly one `` block: + +Analyze dependencies across all files, identify functional steps, decide task grouping and order. + + +{{ + "tasks": [ + {{ + "file_path": "", + "units": ["", "", ...], + "task": "" + }} + ] +}} + + +Constraints: +- Every unit in every file must appear exactly once across all tasks. +- Each task's units must belong to the specified file. +""" + + +# ============================================================================ +# Data Classes +# ============================================================================ + +@dataclass +class PlannedTask: + """Represents a planned implementation task.""" + task_id: str = field(init=False) + task: str + file_path: str + units_key: List[str] + unit_to_code: Dict[str, str] + unit_to_features: Dict[str, List] + priority: int = 0 + subtree: str = "" + task_type: str = "implementation" + + def __post_init__(self): + unique_suffix = uuid8() + self.task_id = f"{self.file_path.replace('/', '_')}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{unique_suffix}" + + if not isinstance(self.units_key, list) or not self.units_key: + raise ValueError("PlannedTask validation error: 'units_key' must be a non-empty list.") + + missing_in_code = [k for k in self.units_key if k not in self.unit_to_code] + if missing_in_code: + raise ValueError( + f"PlannedTask validation error: units_key contains keys not present " + f"in unit_to_code: {missing_in_code}" + ) + + # Auto-fill missing unit_to_features keys (informational only) + for k in self.units_key: + if k not in self.unit_to_features: + self.unit_to_features[k] = [] + + def to_dict(self) -> Dict: + return { + "task_id": self.task_id, + "task": self.task, + "file_path": self.file_path, + "units_key": self.units_key, + "unit_to_code": self.unit_to_code, + "unit_to_features": self.unit_to_features, + "priority": self.priority, + "subtree": self.subtree, + "task_type": self.task_type, + } + + @classmethod + def from_dict(cls, data: Dict) -> "PlannedTask": + obj = cls( + task=data["task"], + file_path=data["file_path"], + units_key=data["units_key"], + unit_to_code=data["unit_to_code"], + unit_to_features=data["unit_to_features"], + priority=data.get("priority", 0), + subtree=data.get("subtree", ""), + task_type=data.get("task_type", "implementation"), + ) + if "task_id" in data: + obj.task_id = data["task_id"] + return obj + + +def _file_path_to_module_name(file_path: str) -> str: + """Convert a Python file path to its importable module name.""" + normalized = file_path.replace("\\", "/") + if normalized.endswith(".py"): + normalized = normalized[:-3] + return _normalize_module_name(normalized.replace("/", ".")) + + +def _normalize_module_name(module_name: Optional[str]) -> str: + """Normalize module names so equivalent import styles map to the same file.""" + if not module_name: + return "" + + normalized = module_name.strip() + while normalized.startswith("."): + normalized = normalized[1:] + if normalized.startswith("src."): + normalized = normalized[4:] + return normalized + + +def _resolve_relative_import(module_name: str, level: int, current_file: str) -> Optional[str]: + """Resolve a relative import target to an absolute module name.""" + current_module = _file_path_to_module_name(current_file) + package_parts = current_module.split(".")[:-1] + + if level <= 0: + return _normalize_module_name(module_name) + + if level > len(package_parts): + return None + + anchor_parts = package_parts[: len(package_parts) - level + 1] + if module_name: + anchor_parts.extend(module_name.split(".")) + return _normalize_module_name(".".join(anchor_parts)) + + +def _is_type_checking_test(test_node: ast.AST) -> bool: + """Return True when an if-test represents TYPE_CHECKING.""" + if isinstance(test_node, ast.Name): + return test_node.id == "TYPE_CHECKING" + if isinstance(test_node, ast.Attribute): + return test_node.attr == "TYPE_CHECKING" + return False + + +def _iter_import_nodes(tree: ast.AST, inside_type_checking: bool = False): + """Yield import nodes together with whether they are TYPE_CHECKING-only.""" + for node in ast.iter_child_nodes(tree): + child_inside_type_checking = inside_type_checking + if isinstance(node, ast.If) and _is_type_checking_test(node.test): + child_inside_type_checking = True + + if isinstance(node, (ast.Import, ast.ImportFrom)): + yield node, inside_type_checking + + yield from _iter_import_nodes(node, child_inside_type_checking) + + +def _extract_imported_modules(file_code: str, current_file: str) -> Set[str]: + """Extract runtime imported module names from code, excluding TYPE_CHECKING-only imports.""" + if not file_code.strip(): + return set() + + try: + tree = ast.parse(file_code) + except SyntaxError: + return set() + + imported_modules: Set[str] = set() + + for node, inside_type_checking in _iter_import_nodes(tree): + if inside_type_checking: + continue + if isinstance(node, ast.Import): + for alias in node.names: + if alias.name: + imported_modules.add(_normalize_module_name(alias.name)) + elif isinstance(node, ast.ImportFrom): + resolved_module = _resolve_relative_import(node.module, node.level, current_file) + if resolved_module: + imported_modules.add(resolved_module) + + if node.module: + base_module = _resolve_relative_import(node.module, node.level, current_file) + else: + base_module = _resolve_relative_import("", node.level, current_file) + + if base_module: + for alias in node.names: + if alias.name == "*": + continue + imported_modules.add(_normalize_module_name(f"{base_module}.{alias.name}")) + + return imported_modules + + +def _load_dependency_source_code(file_path: str, interface_file_code: str) -> str: + """Load source code for dependency analysis, combining repo and interface inputs.""" + code_parts: List[str] = [] + repo_file_path = REPO_DIR / file_path + if repo_file_path.exists(): + try: + repo_code = repo_file_path.read_text(encoding="utf-8") + if repo_code.strip(): + code_parts.append(repo_code) + except OSError: + pass + if interface_file_code.strip(): + code_parts.append(interface_file_code) + return "\n\n".join(code_parts) + + +def _topologically_sort_files( + files_order: List[str], + dependency_edges: Dict[str, Set[str]], +) -> Optional[List[str]]: + """Stable topological sort preserving original order for unrelated files.""" + order_index = {file_path: index for index, file_path in enumerate(files_order)} + adjacency: Dict[str, Set[str]] = {file_path: set() for file_path in files_order} + indegree: Dict[str, int] = {file_path: 0 for file_path in files_order} + + for dependency_file, dependent_files in dependency_edges.items(): + if dependency_file not in adjacency: + continue + for dependent_file in dependent_files: + if dependent_file not in adjacency: + continue + if dependent_file not in adjacency[dependency_file]: + adjacency[dependency_file].add(dependent_file) + indegree[dependent_file] += 1 + + queue = deque(sorted( + [file_path for file_path, degree in indegree.items() if degree == 0], + key=lambda file_path: order_index[file_path], + )) + resolved_order: List[str] = [] + + while queue: + file_path = queue.popleft() + resolved_order.append(file_path) + + for neighbor in sorted(adjacency[file_path], key=lambda item: order_index[item]): + indegree[neighbor] -= 1 + if indegree[neighbor] == 0: + insert_pos = 0 + while insert_pos < len(queue) and order_index[queue[insert_pos]] <= order_index[neighbor]: + insert_pos += 1 + queue.insert(insert_pos, neighbor) + + if len(resolved_order) != len(files_order): + return None + + return resolved_order + + +def correct_intra_subtree_file_order( + subtree_name: str, + files_order: List[str], + subtree_interfaces: Dict[str, Dict[str, Any]], + logger: Optional[logging.Logger] = None, +) -> tuple[List[str], Dict[str, Any]]: + """Correct file order using imports declared in interface skeleton code.""" + logger = logger or logging.getLogger(__name__) + available_files = [file_path for file_path in files_order if file_path in subtree_interfaces] + if len(available_files) <= 1: + return available_files, { + "original_files_order": list(available_files), + "corrected_files_order": list(available_files), + "changed": False, + "dependency_edges": [], + "reason": "single_file_or_empty_subtree", + } + + module_to_file = { + _file_path_to_module_name(file_path): file_path + for file_path in available_files + } + dependency_edges: Dict[str, Set[str]] = defaultdict(set) + dependency_pairs: List[Dict[str, str]] = [] + seen_dependency_pairs: Set[tuple[str, str, str]] = set() + + for file_path in available_files: + file_code = _load_dependency_source_code( + file_path=file_path, + interface_file_code=subtree_interfaces[file_path].get("file_code", ""), + ) + imported_modules = _extract_imported_modules(file_code, file_path) + + for module_name in sorted(imported_modules): + dependency_file = module_to_file.get(module_name) + if not dependency_file or dependency_file == file_path: + continue + dependency_edges[dependency_file].add(file_path) + dependency_key = (dependency_file, file_path, module_name) + if dependency_key not in seen_dependency_pairs: + seen_dependency_pairs.add(dependency_key) + dependency_pairs.append({ + "dependency": dependency_file, + "dependent": file_path, + "module": module_name, + }) + + corrected_order = _topologically_sort_files(available_files, dependency_edges) + if corrected_order is None: + logger.warning( + "[TaskPlanner] Detected cyclic or invalid intra-subtree imports in '%s'; keeping original file order.", + subtree_name, + ) + return available_files, { + "original_files_order": list(available_files), + "corrected_files_order": list(available_files), + "changed": False, + "dependency_edges": dependency_pairs, + "reason": "cycle_detected_fallback_to_original_order", + } + + changed = corrected_order != available_files + if changed: + logger.info( + "[TaskPlanner] Corrected files_order for subtree '%s': %s -> %s", + subtree_name, + available_files, + corrected_order, + ) + + return corrected_order, { + "original_files_order": list(available_files), + "corrected_files_order": list(corrected_order), + "changed": changed, + "dependency_edges": dependency_pairs, + "reason": "ast_import_toposort", + } + + +# ============================================================================ +# Validation Functions +# ============================================================================ + +def validate_tasks( + tasks: List[Dict], + file_unit_keys: Dict[str, List[str]] +) -> tuple[bool, str, Optional[List[Dict]]]: + """Validate planned tasks for a subtree (ZeroRepo compatible validation). + + Args: + tasks: List of task dicts with file_path, units, task + file_unit_keys: Dict mapping file_path -> list of unit keys + + Returns: (success, error_message, validated_tasks) + """ + if not tasks: + return False, "Invalid: 'tasks' is empty. You must provide a complete list of ALL tasks.", None + + # Validate task structure + collect unit keys per file + file_got_units: Dict[str, List[str]] = {} + + for i, task_entry in enumerate(tasks): + required_keys = ["file_path", "units", "task"] + missing = [k for k in required_keys if k not in task_entry] + if missing: + return False, f"Invalid task at index {i}: missing required keys {missing}.", None + + fp = task_entry["file_path"] + task_units = task_entry["units"] + + if fp not in file_unit_keys: + return False, ( + f"Invalid task at index {i}: unknown file_path '{fp}'. " + f"Valid files: {list(file_unit_keys.keys())}" + ), None + + if not isinstance(task_units, list) or not task_units: + return False, f"Invalid task at index {i}: 'units' must be a non-empty list.", None + + file_got_units.setdefault(fp, []).extend(task_units) + + # Validate per file + errors = [] + for fp, expected_units in file_unit_keys.items(): + expected = set(expected_units) + got = set(file_got_units.get(fp, [])) + + counter = Counter(file_got_units.get(fp, [])) + duplicates = sorted([k for k, c in counter.items() if c > 1]) + + if got != expected: + missing_u = sorted(list(expected - got)) + extra_u = sorted(list(got - expected)) + errors.append( + f"File '{fp}': expected {len(expected)} units, got {len(got)}. " + f"Missing: {missing_u}, Extra: {extra_u}, Duplicates: {duplicates}" + ) + elif duplicates: + errors.append(f"File '{fp}': duplicate unit keys: {duplicates}") + + if errors: + all_units_info = {fp: units for fp, units in file_unit_keys.items()} + return False, ( + "Unit key mismatch:\n" + "\n".join(errors) + "\n\n" + f"IMPORTANT: Re-plan ALL tasks. Required units per file: {json.dumps(all_units_info)}" + ), None + + total_units = sum(len(u) for u in file_unit_keys.values()) + return True, f"Planned {len(tasks)} tasks covering all {total_units} units across {len(file_unit_keys)} files.", tasks + + +# ============================================================================ +# Task Planner Agent (per subtree) +# ============================================================================ + +class TaskPlannerAgent: + """Agent for planning implementation tasks for all files in a subtree.""" + + def __init__( + self, + llm_client: LLMClient, + subtree: str, + files_data: Dict[str, Dict[str, Any]], + files_order: List[str], + repo_name: str = "", + repo_info: str = "", + logger: Optional[logging.Logger] = None + ): + self.llm = llm_client + self.subtree = subtree + self.files_data = files_data # file_path -> {file_code, units_to_code, units_to_features} + self.files_order = files_order # authoritative implementation order + self.repo_name = repo_name + self.repo_info = repo_info + self.logger = logger or logging.getLogger(__name__) + + def plan_subtree_tasks( + self, + max_retry: int = 5, + max_steps: int = 5 + ) -> Dict[str, Any]: + """Plan tasks for all files in a subtree using a single LLM call.""" + # Build file_unit_keys for validation + file_unit_keys = { + fp: list(fdata["units_to_code"].keys()) + for fp, fdata in self.files_data.items() + } + total_units = sum(len(v) for v in file_unit_keys.values()) + self.logger.info( + f"[TaskPlannerAgent] Planning {total_units} units across " + f"{len(self.files_data)} files for subtree '{self.subtree}'" + ) + + # Build system prompt + system_prompt = TASK_PLANNER_PROMPT + + # Build per-file context (in files_order) + files_context_parts = [] + for i, fp in enumerate(self.files_order): + if fp not in self.files_data: + continue + fdata = self.files_data[fp] + unit_keys = list(fdata["units_to_code"].keys()) + files_context_parts.append( + f"### File {i + 1}: {fp}\n" + f"Units ({len(unit_keys)}): {json.dumps(unit_keys)}\n\n" + f"Source code (interfaces only):\n{fdata.get('file_code', '')}\n" + ) + files_context = "\n---\n".join(files_context_parts) + + # Build all units summary (in files_order) + ordered_file_unit_keys = {fp: file_unit_keys[fp] for fp in self.files_order if fp in file_unit_keys} + all_units_summary = json.dumps(ordered_file_unit_keys, indent=2) + + # Build files_order hint + files_order_hint = json.dumps(self.files_order, indent=2) + + # Build task prompt + task_prompt = f"""Plan the implementation tasks for the repository: {self.repo_name} +Repository description: {self.repo_info} + +Context: +- You are planning the implementation order for the subtree / module: {self.subtree} +- Total files: {len(self.files_data)} +- Total units: {total_units} + +**CRITICAL โ€” Mandatory file implementation order (files_order):** +{files_order_hint} +You MUST output tasks following this file order strictly. +All tasks for file N must appear BEFORE any task for file N+1. +Within each file, order tasks by internal dependency. + +Files and their source code (listed in files_order): +{files_context} + +All units per file (must ALL be covered exactly once): +{all_units_summary} +""" + + combined_prompt = f"{system_prompt}\n\n{task_prompt}" + last_error = "" + planned_tasks = [] + + for step in range(max_steps): + self.logger.info(f"[TaskPlannerAgent] Step {step + 1}/{max_steps} for subtree '{self.subtree}'") + + current_prompt = combined_prompt + if last_error: + current_prompt += f"\n\n[Tool Execution Feedback - Please fix and retry]:\n{last_error}" + + try: + response = self.llm.generate(current_prompt, purpose=f"plan_{self.subtree}_{step + 1}") + parsed = self.llm.parse_result_json(response) + + if not parsed: + # Fallback: try to find {"tasks": [...]} directly in the response + tasks_match = _re.search(r'\{\s*"tasks"\s*:\s*\[', response) + if tasks_match: + # Found "tasks" key โ€” try brace-counting extraction + start = tasks_match.start() + brace_count = 0 + for i, ch in enumerate(response[start:], start): + if ch == '{': + brace_count += 1 + elif ch == '}': + brace_count -= 1 + if brace_count == 0: + try: + parsed = json.loads(response[start:i+1]) + self.logger.info( + "[TaskPlannerAgent] Recovered tasks JSON via fallback extraction" + ) + except json.JSONDecodeError: + pass + break + + if not parsed: + # Show the LLM what it actually returned so it can fix the format + response_tail = response[-500:] if len(response) > 500 else response + last_error = ( + "Failed to parse result_json from your response.\n" + "You MUST wrap your JSON output in tags.\n" + "Your response ended with:\n" + f"```\n{response_tail}\n```\n\n" + "Expected format:\n" + "\n" + '{"tasks": [{"file_path": "...", "units": [...], "task": "..."}]}\n' + "" + ) + continue + + tasks = parsed.get("tasks", []) + + # Validate + success, message, validated_tasks = validate_tasks(tasks, file_unit_keys) + + if success: + self.logger.info(f"[TaskPlannerAgent] [OK] {message}") + planned_tasks = validated_tasks + break + else: + self.logger.warning(f"[TaskPlannerAgent] Validation failed: {message}") + last_error = message + + except Exception as e: + self.logger.error(f"[TaskPlannerAgent] Error: {e}") + last_error = str(e) + + # Convert to PlannedTask objects, organized by file_path + # Enforce files_order: re-sort tasks so that file order is respected, + # while preserving LLM's within-file task ordering. + file_order_index = {fp: i for i, fp in enumerate(self.files_order)} + sorted_tasks = sorted( + enumerate(planned_tasks), + key=lambda pair: ( + file_order_index.get(pair[1]["file_path"], 999), + pair[0], # preserve original LLM order within same file + ), + ) + + tasks_by_file: Dict[str, List[PlannedTask]] = {} + for priority_idx, (_orig_idx, task_data) in enumerate(sorted_tasks): + fp = task_data["file_path"] + t_unit_keys = task_data["units"] + fdata = self.files_data[fp] + t_unit_to_code = {u: fdata["units_to_code"][u] for u in t_unit_keys} + t_unit_to_features = {u: fdata["units_to_features"][u] for u in t_unit_keys} + + planned_task = PlannedTask( + task=task_data["task"], + file_path=fp, + units_key=t_unit_keys, + unit_to_code=t_unit_to_code, + unit_to_features=t_unit_to_features, + priority=priority_idx, + subtree=self.subtree + ) + tasks_by_file.setdefault(fp, []).append(planned_task) + + total_tasks = sum(len(t) for t in tasks_by_file.values()) + return { + "planned_tasks": tasks_by_file, + "success": total_tasks > 0, + "subtree": self.subtree + } + + +# ============================================================================ +# Helpers (B4 โ€” entry-file hint based on project_types) +# ============================================================================ + +def _format_entry_file_hint(project_types: List[str]) -> str: + """Render a per-project-type hint about the entry filename (plan B4). + + Returns a single-line string appended to the "main.py" bullet of the + main-entry task description. The wording stays advisory โ€” the agent + keeps freedom to choose ``main.py`` for any project โ€” but flags more + idiomatic alternatives for SERVICE / API / PIPELINE / GAME / GUI. + Empty ``project_types`` falls back to a neutral hint. + """ + if not project_types: + return ( + "If a different filename better expresses the project's purpose " + "(e.g. `app.py`, `server.py`, `pipeline.py`), you may use it " + "instead โ€” name the file by intent." + ) + + types = set(project_types) + if "SERVICE" in types or "API" in types: + return ( + "For service/API projects, `app.py` or `server.py` is often a " + "more conventional name โ€” pick whichever expresses intent best." + ) + if "PIPELINE" in types: + return ( + "For data pipelines / batch jobs, name the file after the job " + "(e.g. `pipeline.py`, `dag.py`, `train.py`) instead of `main.py`." + ) + if "GAME" in types: + return ( + "For games, `main.py` is fine, but `game.py` or `play.py` are " + "also acceptable. Pick what reads naturally to a new contributor." + ) + if "LIBRARY" in types and not (types & {"CLI", "WEB", "GUI", "GAME"}): + return ( + "This project is primarily a library; an entry point is " + "optional. If you create one, prefer a thin CLI demonstrator " + "(e.g. `examples/run.py`)." + ) + # WEB / CLI / GUI all use main.py idiomatically. + return "`main.py` is the right choice for this project." + + +# ============================================================================ +# Task Planner (orchestrator) +# ============================================================================ + +class TaskPlanner: + """Plans implementation tasks from interfaces using LLM.""" + + def __init__( + self, + interfaces: Dict[str, Any], + data_flow: Dict[str, Any], + repo_name: str = "", + repo_info: str = "", + debug: bool = False, + trajectory: Optional[Trajectory] = None + ): + self.interfaces = interfaces + self.data_flow = data_flow + self.repo_name = repo_name + self.repo_info = repo_info + self.debug = debug + self.trajectory = trajectory + self.llm: Optional[LLMClient] = None + self.logger = logging.getLogger(__name__) + + # Results storage + self.planned_tasks_dict: Dict[str, Dict[str, List[PlannedTask]]] = {} + self.agent_results_dict: Dict[str, Dict[str, Any]] = {} + self.file_order_diagnostics: Dict[str, Dict[str, Any]] = {} + + def plan(self) -> Dict[str, Any]: + """Create implementation task plan using LLM.""" + # Ensure repo directory exists (LLMClient needs it for session management, + # but plan_tasks may run before init_codebase creates it) + REPO_DIR.mkdir(parents=True, exist_ok=True) + + # Add step to trajectory + step_id = None + if self.trajectory: + step_id = self.trajectory.add_step( + "plan_tasks", + description="Create implementation tasks from interfaces using LLM" + ) + self.trajectory.start_step(step_id) + + self.llm = LLMClient(trajectory=self.trajectory, step_id=step_id) + + print("\nโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”") + print(" PLANNING IMPLEMENTATION TASKS (LLM-based)") + print("โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”") + + # Get subtree order from data_flow + subtree_order = self.data_flow.get("subtree_order", []) + print(f"\n Subtree order: {subtree_order}") + + # Get subtrees data from interfaces + subtrees_data = self.interfaces.get("subtrees", {}) + + total_files = 0 + total_units = 0 + + for subtree in subtree_order: + if subtree not in subtrees_data: + self.logger.warning(f"Subtree {subtree} not found in interfaces") + continue + + subtree_dict = subtrees_data[subtree] + self.planned_tasks_dict[subtree] = {} + self.agent_results_dict[subtree] = {} + + # Get files order and interfaces + subtree_interfaces = subtree_dict.get("interfaces", {}) + files_order = subtree_dict.get("files_order", list(subtree_interfaces.keys())) + corrected_files_order, order_diagnostics = correct_intra_subtree_file_order( + subtree_name=subtree, + files_order=files_order, + subtree_interfaces=subtree_interfaces, + logger=self.logger, + ) + self.file_order_diagnostics[subtree] = order_diagnostics + + print(f"\n Subtree: {subtree} ({len(corrected_files_order)} files)") + if order_diagnostics["changed"]: + print(" โ†บ Corrected file order from interface imports") + + # Collect all files data for this subtree + files_data: Dict[str, Dict[str, Any]] = {} + for file_path in corrected_files_order: + if file_path not in subtree_interfaces: + continue + + file_dict = subtree_interfaces[file_path] + file_code = file_dict.get("file_code", "") + units_to_code = file_dict.get("units_to_code", {}) + units_to_features = file_dict.get("units_to_features", {}) + + if not units_to_code: + self.logger.warning(f"No units found for {file_path}") + continue + + total_files += 1 + total_units += len(units_to_code) + + print(f" - {file_path}: {len(units_to_code)} units") + + files_data[file_path] = { + "file_code": file_code, + "units_to_code": units_to_code, + "units_to_features": units_to_features, + } + + if not files_data: + self.logger.warning(f"No files with units found for subtree {subtree}") + continue + + # Create agent and plan entire subtree at once + agent = TaskPlannerAgent( + llm_client=self.llm, + subtree=subtree, + files_data=files_data, + files_order=[fp for fp in corrected_files_order if fp in files_data], + repo_name=self.repo_name, + repo_info=self.repo_info, + logger=self.logger + ) + + result = agent.plan_subtree_tasks( + max_retry=5, + max_steps=5 + ) + + if result["success"]: + # Reassign priorities based on topological file order so that + # files depended-on by others are implemented first. + file_order_map = { + fp: idx + for idx, fp in enumerate(corrected_files_order) + if fp in files_data + } + for fp, tasks in result["planned_tasks"].items(): + base_priority = file_order_map.get(fp, 999) + for i, task in enumerate(tasks): + task.priority = base_priority * 100 + i + self.planned_tasks_dict[subtree][fp] = tasks + total_planned = sum(len(t) for t in result["planned_tasks"].values()) + print(f" [OK] {total_planned} tasks planned for subtree") + else: + self.logger.error(f"Failed to plan tasks for subtree {subtree}") + print(" [FAIL] Planning failed for subtree") + + # Serialize results (ZeroRepo compatible format) + # Filter out tasks with empty units_key (e.g., __init__.py files + # that the LLM planned but have no units to implement) + planned_tasks_serializable = {} + for subtree, files_dict in self.planned_tasks_dict.items(): + planned_tasks_serializable[subtree] = {} + for file_path, tasks_list in files_dict.items(): + valid_tasks = [ + task.to_dict() for task in tasks_list + if task.units_key # skip empty + ] + if valid_tasks: + planned_tasks_serializable[subtree][file_path] = valid_tasks + + result = { + "planned_tasks_dict": planned_tasks_serializable, + "agent_results_dict": self.agent_results_dict, + "file_order_diagnostics": self.file_order_diagnostics, + "subtree_order": subtree_order, + "success": True + } + + # Count total tasks + total_tasks = sum( + len(tasks) + for files_dict in self.planned_tasks_dict.values() + for tasks in files_dict.values() + ) + + # Complete step + if self.trajectory and step_id: + self.trajectory.complete_step(step_id, metadata={ + "total_tasks": total_tasks, + "total_units": total_units, + "total_files": total_files + }) + + print(f"\n Planned {total_tasks} tasks for {total_units} units across {total_files} files") + + # Add integration tests and documentation tasks + self._add_special_tasks(planned_tasks_serializable, self.agent_results_dict, subtree_order) + + # Add project file tasks (after all core implementation) + self._add_project_file_tasks(planned_tasks_serializable, self.agent_results_dict) + + # Update subtree order to include special and project files + updated_subtree_order = subtree_order + ["FINAL_TASKS", "PROJECT_FILES"] + + result = { + "planned_tasks_dict": planned_tasks_serializable, + "agent_results_dict": self.agent_results_dict, + "file_order_diagnostics": self.file_order_diagnostics, + "subtree_order": updated_subtree_order, + "success": True + } + + # Recount total tasks including project files + total_tasks = sum( + len(tasks) + for files_dict in planned_tasks_serializable.values() + for tasks in files_dict.values() + ) + + print(f" [OK] Added project file tasks (total tasks: {total_tasks})") + + return result + + def _add_special_tasks( + self, + planned_tasks: Dict, + agent_results: Dict, + subtree_order: List[str] + ): + """Add integration test and documentation tasks (ZeroRepo compatible).""" + print("\n Adding integration test and documentation tasks...") + + # Add integration test task for each subtree + for subtree in subtree_order: + if subtree in planned_tasks: + # Get subtree path information + subtree_path = self._get_subtree_path(subtree) + + integration_test_task = PlannedTask( + task=( + f"Write comprehensive integration tests for the {subtree} module " + f"{f'located in {subtree_path}' if subtree_path else ''}. " + f"Test the interactions between all components in this module, " + f"verify data flow, error handling, and edge cases, and ensure all public APIs " + f"work correctly together. " + f"Focus on testing the integration points between different files in this module. " + f"In addition to the integration tests, create small, focused usage examples for this module " + f"(e.g., example scripts or functions) that demonstrate typical end-to-end usage of its main APIs. " + f"Create appropriate test files and example files in the module directory or the test/example " + f"directory as needed, following the existing project conventions." + ), + file_path="", # Special marker - let agent decide placement + units_key=[f"{subtree}_integration_tests"], + unit_to_code={f"{subtree}_integration_tests": f"# Integration tests for {subtree} module"}, + unit_to_features={f"{subtree}_integration_tests": [f"{subtree} integration testing"]}, + priority=1000, # Lower priority (higher number) - run after regular implementation + subtree=subtree, + task_type="integration_test", + ) + + # Add integration test to the subtree + integration_file_path = f"_{subtree}" + planned_tasks[subtree][integration_file_path] = [integration_test_task.to_dict()] + agent_results[subtree][integration_file_path] = {"success": True, "type": "integration_test"} + + self.logger.info(f"Added integration test task for subtree: {subtree} (path: {subtree_path})") + print(f" - Added integration test task for subtree: {subtree}") + + # Create a special subtree for final tasks + final_subtree = "FINAL_TASKS" + planned_tasks[final_subtree] = {} + agent_results[final_subtree] = {} + + # === Cross-module wiring verification task === + edges = self.data_flow.get("data_flow", []) + if edges: + edges_desc = "\n".join( + f" - {e.get('source', '?')} โ†’ {e.get('target', '?')}: " + f"{e.get('data_type', 'N/A')}" + for e in edges + ) + wiring_task = PlannedTask( + task=( + "Verify and fix cross-module wiring for all data flow edges.\n\n" + "The following data flow edges are defined in the system design. " + "For EACH edge, you must:\n" + "1. Read the source module's actual code (not just the skeleton)\n" + "2. Read the target module's actual code\n" + "3. Check if code exists that calls the source and passes results " + "to the target\n" + "4. If NOT connected or connected incorrectly, fix the production code\n" + "5. Write a test that verifies the connection works\n\n" + "Data Flow Edges (CHECKLIST):\n" + f"{edges_desc}\n\n" + "Common wiring bugs to fix:\n" + "- Route handler returning placeholder string instead of calling " + "the real handler\n" + "- Module A defines function but Module B never imports/calls it\n" + "- Data format mismatch at module boundary\n" + "- CSS class names in templates not matching stylesheet definitions\n" + "\nDo NOT create main.py โ€” it will be created in a later task." + ), + file_path="", + units_key=["cross_module_wiring"], + unit_to_code={"cross_module_wiring": "# Cross-module wiring verification"}, + unit_to_features={"cross_module_wiring": [ + "cross-module data flow wiring" + ]}, + priority=1500, + subtree=final_subtree, + task_type="wiring", + ) + planned_tasks[final_subtree][""] = [wiring_task.to_dict()] + agent_results[final_subtree][""] = { + "success": True, "type": "wiring" + } + self.logger.info( + "Added cross-module wiring task with %d data flow edges", len(edges) + ) + print(f" - Added cross-module wiring verification task " + f"({len(edges)} edges)") + + # === UI Polish task === + ui_polish_task = PlannedTask( + task=( + "Review and improve the user-facing interface of this application.\n\n" + "FIRST: Determine what type of user interface this project has:\n" + "- Web application (HTML pages, templates, CSS)\n" + "- GUI application (tkinter, PyQt, pygame, etc.)\n" + "- CLI tool (terminal output, argument parsing)\n" + "- Library with no direct UI\n\n" + "If it is a pure library with no user-facing output, skip this " + "task โ€” commit an empty change and report PASS.\n\n" + "For ALL other project types, follow these steps:\n\n" + "## Step 1: Inventory existing assets\n" + "List all files related to user-facing output:\n" + "- Style modules (styles.py, *.css, theme files)\n" + "- Template/page/view files\n" + "- Layout/component files\n" + "- Static assets directory\n" + "If any necessary files are MISSING (e.g., no CSS file exists " + "but HTML references styles), CREATE them.\n\n" + "## Step 2: Audit every user-facing output\n" + "**For web apps:**\n" + "- Does every page use the shared layout (head+CSS, nav, footer)?\n" + "- Do HTML class names match the CSS definitions exactly?\n" + "- Is content in proper containers? Are forms styled?\n" + "- Are all navigation links correct and complete?\n\n" + "**For GUI apps:**\n" + "- Is there consistent widget styling and layout?\n" + "- Are windows properly sized with sensible defaults?\n" + "- Is there a menu bar or toolbar for navigation?\n\n" + "**For CLI tools:**\n" + "- Is output well-formatted with aligned columns?\n" + "- Are error messages clear and helpful?\n" + "- Does --help show all commands with descriptions?\n" + "- Are long operations showing progress?\n\n" + "## Step 3: Fix all issues\n" + "- Create missing style/template/static files if needed\n" + "- Fix class name mismatches between HTML and CSS\n" + "- Add missing layout wrapping to bare pages\n" + "- Replace placeholder/stub responses with real renderers\n" + "- Ensure consistent look across all pages/screens/commands\n\n" + "## Step 4: Verify\n" + "- Web: test client requests โ†’ check for + + + + +
+
Legend
+
imports
+
inherits
+
invokes
+
references
+
+
Controls
+ Click node/group โ€” select & filter edges
+ Double-click node โ€” expand / collapse
+ Double-click group border โ€” collapse
+ Click background โ€” deselect
+ Drag node โ€” move
+ Scroll โ€” zoom +
+
+ + + + +""" + + +def main(): + from common.paths import RPG_FILE + + parser = argparse.ArgumentParser(description="Visualize RPG as interactive graph") + parser.add_argument("rpg_file", nargs="?", default=str(RPG_FILE), + help="Path to rpg.json (default: .rpgkit/data/rpg.json)") + parser.add_argument("--dep-graph", default=None, + help="Path to dep_graph.json (default: dep_graph_file field or sibling dep_graph.json)") + parser.add_argument("-o", "--output", default=None, + help="Output HTML file (default: .html)") + args = parser.parse_args() + + rpg_path = Path(args.rpg_file).expanduser() + if not rpg_path.exists(): + print(f"Error: {rpg_path} not found", file=sys.stderr) + sys.exit(1) + + try: + data = load_rpg(rpg_path, args.dep_graph) + except FileNotFoundError as exc: + print(f"Error: {exc}", file=sys.stderr) + sys.exit(1) + html_content = generate_html(data) + + output = args.output or str(rpg_path.with_suffix(".html")) + Path(output).write_text(html_content, encoding="utf-8") + + tree = normalize_to_tree(data) + dep = extract_dep_graph(data) + dep_map = data.get("_dep_to_rpg_map", {}) + n = count_nodes(tree) + e = len(get_semantic_edges(data)) + print(f"Generated: {output}") + print(f" Feat Graph โ€” Nodes: {n}, Semantic Edges: {e}") + print(f" Dep Graph โ€” Nodes: {len(dep['nodes'])}, Edges: {len(dep['edges'])}") + print(f" Mapping โ€” {len(dep_map)} dep nodes -> {sum(len(v) for v in dep_map.values())} RPG features") + print(f" Open in browser: file://{Path(output).resolve()}") + + +if __name__ == "__main__": + main() diff --git a/RPG-Kit/scripts/run_batch.py b/RPG-Kit/scripts/run_batch.py new file mode 100644 index 0000000..0a26304 --- /dev/null +++ b/RPG-Kit/scripts/run_batch.py @@ -0,0 +1,1165 @@ +#!/usr/bin/env python3 +"""Run Batch โ€” Unified TDD batch executor. + +Prepares a batch, dispatches a single sub-agent session that autonomously +completes the full write โ†’ test โ†’ fix cycle, then verifies and merges. + +Architecture: + run_batch.py (this file) + โ”œโ”€โ”€ batch_prepare โ€” reuse code_gen.task_loader pickers + โ”œโ”€โ”€ batch_prompt โ€” build TDD prompt for the sub-agent + โ”œโ”€โ”€ batch_dispatch โ€” call LLMClient to dispatch sub-agent + โ”œโ”€โ”€ batch_verify โ€” post-verification pytest run + โ””โ”€โ”€ batch_complete โ€” merge branch, update state + +Each batch gets at most 2 attempts (initial + one auto-retry). +Each attempt gives the sub-agent up to 5 internal TDD iterations. + +Usage: + python3 run_batch.py --next --json # Next pending batch + python3 run_batch.py --next --merge-file --json # File-merge mode + python3 run_batch.py --resume --json # Resume interrupted batch + python3 run_batch.py --retry --json # Retry a failed batch + python3 run_batch.py --final-test --json # Full repo validation (pytest + smoke) + python3 run_batch.py --global-review --json # Full feature review + visual QA (run after --final-test) +""" + +import json +import argparse +import logging +import signal +import sys +import time +from pathlib import Path +from typing import Dict, Any, Optional, List, Tuple + +# --------------------------------------------------------------------------- +# Path setup โ€” ensure scripts/ is importable +# --------------------------------------------------------------------------- +sys.path.insert(0, str(Path(__file__).parent)) + +from common.execution_state import ( + BatchExecutionState, + CodeGenState, + load_code_gen_state, + save_code_gen_state, + get_or_create_code_gen_trajectory, + complete_batch as state_complete_batch, + skip_current_batch as state_skip_batch, +) +from common.git_utils import GitRunner +from common.task_batch import PlannedTask, get_task_by_id +from common.paths import ( + TASKS_FILE, + INTERFACES_FILE, + CODE_GEN_STATE_FILE as STATE_FILE, + BASE_CLASSES_FILE, + DATA_FLOW_FILE, + DEP_GRAPH_FILE, + REPO_RPG_FILE, + FEATURE_SPEC_FILE, + LOGS_DIR as _LOGS_DIR, + WORKSPACE_ROOT, + get_scripts_dir, + REPO_DIR, +) +from code_gen.context_collector import build_dependency_context +from code_gen.prompts import ( + build_test_prompt_from_batch, + build_code_prompt_from_batch, + build_project_file_prompt_from_batch, + build_merged_test_prompt, + build_merged_code_prompt, + is_project_file_batch, + is_project_docs_batch, + _format_dependency_context, +) +from code_gen.test_runner import ( + ensure_dev_venv, + ensure_deps_installed, +) +from code_gen.rpg_updater import run_rpg_update + +# Git branch helpers extracted to code_gen.git_ops. These are +# internal helpers used only by Module 5 ("Batch Orchestrator") +# below; no external surface contract. +from code_gen.git_ops import ( + ensure_on_main, + setup_batch_branch, + merge_batch_branch, + abandon_batch_branch, +) + +# Post-verification helper extracted to code_gen.post_verify. +from code_gen.post_verify import post_verify + +# Result-dict builders extracted to code_gen.result_builders. Internal +# helpers used only by Module 5's orchestrator. +from code_gen.result_builders import ( + _error, + _all_done, + _success_result, + _failure_result, +) + +# Final-test stage extracted to code_gen.final_validation. +from code_gen.final_validation import final_test + +# Global-review stage extracted to code_gen.global_review. +from code_gen.global_review import global_review + +# Per-batch TDD prompt builders extracted to code_gen.batch_prompts. +from code_gen.batch_prompts import ( + build_tdd_prompt, + build_resume_prompt, +) + +# Sub-agent dispatch (re-exported from code_gen.sub_agent). External +# callers โ€” ``code_gen.subtree_review``, ``rpg_edit.review``, +# ``rpg_edit.code`` โ€” still do ``from run_batch import dispatch_sub_agent``; +# keep these names live at the module level for backwards compatibility. +# ``test_run_batch_preserves_external_surface`` guards this contract. +from code_gen.sub_agent import ( # noqa: F401 + dispatch_sub_agent, + parse_batch_result, + parse_pytest_summary, + truncate_test_output, +) + +# Task-picker helpers extracted to code_gen.task_loader. +from code_gen.task_loader import ( + get_next_pending_task_id, + get_next_merged_tasks, +) +from smoke_test import run_smoke_test + +logger = logging.getLogger(__name__) + +# ============================================================================ +# Constants +# ============================================================================ + +from code_gen._constants import DEFAULT_AGENT_TIMEOUT # noqa: E402 + +MAX_BATCH_ATTEMPTS = 2 # initial + 1 auto-retry + +# Timeout values used inside the extracted modules +# (``code_gen.batch_prompts`` / ``.post_verify`` / ``.final_validation`` / +# ``.global_review``) live in ``code_gen._constants``; the orchestrator +# only needs the sub-agent timeout directly for its argparse default. + + + +# ============================================================================ +# Module 1: Prompt Builder +# ---------------------------------------------------------------------------- +# Extracted to ``code_gen.batch_prompts``. Imported above. +# ============================================================================ + + +# ============================================================================ +# Module 2: Git Operations +# ---------------------------------------------------------------------------- +# Extracted to ``code_gen.git_ops``. Imported above; nothing to define here. +# ============================================================================ + + +# ============================================================================ +# Module 3: Sub-agent Dispatch +# ---------------------------------------------------------------------------- +# Extracted to ``code_gen.sub_agent``. The names are re-exported below +# (see "Sub-agent dispatch (re-exported from code_gen.sub_agent)") so +# legacy callers ``from run_batch import dispatch_sub_agent`` still work. +# ============================================================================ + + +# ============================================================================ +# Module 4: Post-Verification +# ---------------------------------------------------------------------------- +# Extracted to ``code_gen.post_verify``. Imported above. +# ============================================================================ + + +# ============================================================================ +# Module 5: Batch Orchestrator +# ============================================================================ + +def _prepare_batch_context( + global_state: CodeGenState, + task: PlannedTask, + merged_tasks: Optional[List[PlannedTask]], +) -> Tuple[BatchExecutionState, Optional[Dict[str, Any]]]: + """Build BatchExecutionState and dependency context for a task. + + This mirrors the historical ``prepare_batch`` logic but returns data structures + instead of printing JSON. + + Returns: + (batch_state, dependency_context) + """ + batch_state = BatchExecutionState( + batch_id=task.task_id, + file_path=task.file_path, + subtree=task.subtree, + ) + + # Build dependency context + dep_context = None + try: + dep_context = build_dependency_context( + batch=task, + completed_task_ids=global_state.completed_task_ids, + tasks_path=TASKS_FILE, + interfaces_path=INTERFACES_FILE, + base_classes_path=BASE_CLASSES_FILE, + data_flow_path=DATA_FLOW_FILE, + feature_spec_path=FEATURE_SPEC_FILE, + ) + except Exception as exc: + logger.warning("Failed to build dependency context: %s", exc) + + is_merged = merged_tasks and len(merged_tasks) > 1 + + # Generate prompts based on task type + if is_project_file_batch(task): + batch_state.test_prompt = "" + batch_state.code_prompt = build_project_file_prompt_from_batch(task, dependency_context=dep_context) + batch_state.skip_tests = is_project_docs_batch(task) + elif is_merged: + batch_state.merged_task_ids = [t.task_id for t in merged_tasks] + batch_state.test_prompt = build_merged_test_prompt(merged_tasks, dependency_context=dep_context) + batch_state.code_prompt = build_merged_code_prompt(merged_tasks, dependency_context=dep_context) + elif task.task_type in ("integration_test", "final_test_docs"): + batch_state.test_prompt = build_test_prompt_from_batch(task, dependency_context=dep_context) + batch_state.code_prompt = build_code_prompt_from_batch(task, dependency_context=dep_context) + # skip_code_gen stays False โ€” agent can fix genuine integration bugs + else: + batch_state.test_prompt = build_test_prompt_from_batch(task, dependency_context=dep_context) + batch_state.code_prompt = build_code_prompt_from_batch(task, dependency_context=dep_context) + + return batch_state, dep_context + + +def run_single_attempt( + prompt: str, + repo_path: Path, + task: PlannedTask, + attempt: int, + agent_timeout: int = DEFAULT_AGENT_TIMEOUT, + trajectory=None, +) -> Dict[str, Any]: + """Execute a single sub-agent attempt and post-verify. + + Args: + prompt: Full prompt for the sub-agent. + repo_path: Project repo path. + task: PlannedTask object. + attempt: Attempt number (1 or 2). + agent_timeout: Timeout for sub-agent. + trajectory: Trajectory for recording. + + Returns: + Dict with keys: passed, agent_passed, verify_passed, + agent_error, failure_reason, test_output, duration. + """ + step_id = None + if trajectory: + try: + # Derive stage prefix from task_type for clear trajectory naming + _stage_map = { + "integration_test": "gen_test", + "final_test_docs": "gen_test", + "main_entry": "gen_code", + "project_requirements": "gen_code", + "project_docs": "gen_code", + "implementation": "gen_code", + } + stage = _stage_map.get(task.task_type, "gen_code") + step = trajectory.add_step( + f"{stage}_{task.task_id}_attempt{attempt}", + f"Sub-agent attempt {attempt}", + ) + trajectory.start_step(step.step_id) + step_id = step.step_id + except Exception: + pass + + start = time.time() + result = { + "attempt": attempt, + "agent_passed": False, + "verify_passed": False, + "passed": False, + "agent_error": None, + "failure_reason": "", + "test_output": "", + "agent_pytest_summary": None, + "duration": 0.0, + } + + # --- Dispatch sub-agent --- + response, error = dispatch_sub_agent( + prompt, repo_path, + timeout=agent_timeout, + trajectory=trajectory, + step_id=step_id, + purpose="run_batch", + max_retries=3, + ) + + if error: + result["agent_error"] = error + result["failure_reason"] = f"Sub-agent error: {error}" + result["duration"] = time.time() - start + return result + + # --- Parse sub-agent's self-report --- + agent_passed, agent_reason = parse_batch_result(response) + agent_summary = parse_pytest_summary(response) + result["agent_passed"] = agent_passed + result["agent_pytest_summary"] = agent_summary + if not agent_passed: + result["failure_reason"] = agent_reason + logger.info("Sub-agent self-reported FAIL: %s", agent_reason) + elif agent_summary is None: + # PASS without the required PYTEST_SUMMARY line is suspicious; + # log it so post_verify_failure analysis is easier. + logger.warning( + "Sub-agent reported PASS but did not provide PYTEST_SUMMARY line" + ) + + # --- Post-verification (authoritative) --- + verify_passed, test_output = post_verify(repo_path, task) + result["verify_passed"] = verify_passed + result["test_output"] = test_output + result["passed"] = verify_passed # Post-verify is the authority + + if verify_passed and not agent_passed: + logger.info("Sub-agent reported FAIL but post-verification PASSED โ€” treating as success") + result["failure_reason"] = "" + elif not verify_passed and agent_passed: + logger.warning( + "Sub-agent reported PASS (PYTEST_SUMMARY=%r) but post-verification FAILED", + agent_summary, + ) + first_line = test_output.splitlines()[0] if test_output.strip() else "no output" + result["failure_reason"] = ( + f"Post-verification rejected sub-agent's PASS claim " + f"(its PYTEST_SUMMARY={agent_summary!r}); pytest re-run says: {first_line}" + ) + elif not verify_passed: + result["failure_reason"] = agent_reason + + result["duration"] = time.time() - start + + # Complete trajectory step + if trajectory and step_id: + try: + trajectory.complete_step(step_id, { + "attempt": attempt, + "passed": result["passed"], + "duration": result["duration"], + }) + except Exception: + pass + + return result + + +def run_rpg_update_safe( + task: PlannedTask, + repo_path: Path, + global_state: CodeGenState, +) -> Optional[str]: + """Run RPG update, logging but not raising on failure. + + Returns: + rpg_backup_path if a new backup was created, else None. + """ + if task.task_type in ( + "integration_test", "final_test_docs", "main_entry", + "project_requirements", "project_docs", + ) or (task.file_path.startswith("<") and task.file_path.endswith(">")): + logger.info("Skipping RPG update for %s task", task.task_type) + return None + try: + should_backup = global_state.rpg_backup_path is None + rpg_result = run_rpg_update( + batch=task, + repo_path=repo_path, + rpg_path=REPO_RPG_FILE, + backup=should_backup, + ) + logger.info("RPG update: edges_added=%s", rpg_result.get("edges_added", 0)) + if should_backup and rpg_result.get("backup_path"): + return rpg_result["backup_path"] + except Exception as exc: + logger.warning("RPG update failed (non-fatal): %s", exc) + return None + + +def _refresh_dep_graph_safe( + repo_path: Path, + changed_files: Optional[List[str]] = None, +) -> None: + """Refresh dep_graph after code changes (non-fatal on error). + + Strategy: + * If ``changed_files`` is provided (typical codegen path: a single + file just got generated/edited), use the incremental + ``RPGService.sync_from_file_list`` path so we only re-AST the + touched file. This is the ~10ร— speed-up codegen benefits from. + * If ``changed_files`` is empty / ``None`` (e.g. integration-test + batches that don't have a single owning file), fall back to a + full ``refresh_dep_graph`` so the graph still stays correct. + + The codegen pipeline does its own commit hygiene (each batch lands + on its own git branch then merges), so this entry point intentionally + does NOT advance ``meta.git`` โ€” that's owned by the pre-commit / + post-merge hooks and ``/rpgkit.update_rpg``. + """ + try: + import sys + scripts_dir = Path(get_scripts_dir()) + if str(scripts_dir) not in sys.path: + sys.path.insert(0, str(scripts_dir)) + from rpg.service import RPGService + + rpg_path = REPO_RPG_FILE + dep_graph_path = DEP_GRAPH_FILE + if not rpg_path.exists(): + return + + svc = RPGService.load(str(rpg_path)) + + # โ”€โ”€ Incremental path: codegen knows exactly which file changed โ”€โ”€ + if changed_files: + # Filter to .py only โ€” sync_from_file_list assumes Python. + py_files = [f for f in changed_files if f.endswith(".py")] + if not py_files: + # No .py touched (e.g. only docs/config edits) โ€” skip. + logger.info("dep_graph: no .py files in batch, skipping refresh") + svc.save(str(rpg_path)) + return + + result = svc.sync_from_file_list( + file_paths=py_files, + code_dir=str(repo_path), + workspace_root=str(WORKSPACE_ROOT), + save_path=str(dep_graph_path), + ) + svc.save(str(rpg_path)) + logger.info( + "dep_graph refreshed (mode=%s reason=%s): %d nodes, %d depโ†’rpg mappings", + result.get("mode"), result.get("reason"), + len(svc.rpg.dep_graph.G.nodes()), + len(svc.rpg._dep_to_rpg_map), + ) + return + + # โ”€โ”€ Fallback: full rebuild โ”€โ”€ + svc.refresh_dep_graph( + str(repo_path), + workspace_root=str(WORKSPACE_ROOT), + save_path=str(dep_graph_path), + ) + svc.save(str(rpg_path)) + logger.info("dep_graph refreshed (full): %d nodes, %d depโ†’rpg mappings", + len(svc.rpg.dep_graph.G.nodes()), + len(svc.rpg._dep_to_rpg_map)) + except Exception as exc: + logger.warning("dep_graph refresh failed (non-fatal): %s", exc) + + +def _task_files_for_dep_graph(task: PlannedTask) -> Optional[List[str]]: + """Return the list of files to pass to ``_refresh_dep_graph_safe``. + + Returns ``None`` for batches where the file set is ambiguous or + irrelevant (integration tests, docs, project files), so the caller + falls back to a full refresh. This mirrors the same skip criteria + used by ``run_rpg_update_safe``. + """ + if task.task_type in ( + "integration_test", "final_test_docs", "main_entry", + "project_requirements", "project_docs", + ): + return None + # Marker paths like ```` aren't real files. + if task.file_path.startswith("<") and task.file_path.endswith(">"): + return None + return [task.file_path] + + +def run_batch( + batch_id: Optional[str] = None, + next_batch: bool = False, + resume: bool = False, + retry: Optional[str] = None, + merge_file: bool = False, + max_units: int = 0, + agent_timeout: int = DEFAULT_AGENT_TIMEOUT, + tasks_path: Path = TASKS_FILE, + state_path: Path = STATE_FILE, + repo_path: Optional[Path] = None, +) -> Dict[str, Any]: + """Main batch orchestrator. + + Coordinates the full lifecycle of a single batch: + 1. Determine which batch to run + 2. Create git branch from main + 3. Setup venv and install deps + 4. Build prompt and dispatch sub-agent + 5. Post-verify with pytest + 6. On failure: auto-retry once with resume context + 7. Merge branch on success / abandon on failure + 8. Update state and report + + Args: + batch_id: Specific batch ID to run. + next_batch: Run the next pending batch. + resume: Resume an interrupted batch. + retry: Retry a specific failed batch ID. + merge_file: Enable file-merge mode. + max_units: Max units per merged batch (0 = no limit). + agent_timeout: Sub-agent timeout in seconds. + tasks_path: Path to tasks.json. + state_path: Path to code_gen_state.jsonl. + repo_path: Repo root (default: from paths.py). + + Returns: + JSON-serializable result dict. + """ + repo_path = repo_path or REPO_DIR + scripts = get_scripts_dir() + global_state = load_code_gen_state(state_path) + git = GitRunner(str(repo_path)) + + # โ”€โ”€ Step 1: Determine batch โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + merged_tasks = None + + if retry: + batch_id = retry + # Remove from failed list so it can be retried + if batch_id in global_state.failed_task_ids: + global_state.failed_task_ids.remove(batch_id) + global_state.failed_tasks = len(global_state.failed_task_ids) + save_code_gen_state(global_state, state_path) + logger.info("Retrying batch: %s", batch_id) + + elif resume: + if not global_state.current_batch_id: + return _error("No batch in progress to resume", scripts) + batch_id = global_state.current_batch_id + logger.info("Resuming batch: %s", batch_id) + + elif next_batch: + if merge_file: + merged_tasks = get_next_merged_tasks( + tasks_path, global_state, max_units, + repo_path=repo_path, state_path=state_path, + ) + if not merged_tasks: + return _all_done(global_state, tasks_path, scripts) + batch_id = merged_tasks[0].task_id + else: + batch_id = get_next_pending_task_id( + tasks_path, global_state, + repo_path=repo_path, state_path=state_path, + ) + if not batch_id: + return _all_done(global_state, tasks_path, scripts) + logger.info("Next batch: %s (merge_file=%s)", batch_id, merge_file) + + elif batch_id: + logger.info("Running specific batch: %s", batch_id) + + else: + return _error("No batch specified. Use --next, --resume, --retry, or --batch-id.", scripts) + + # โ”€โ”€ Step 2: Load task โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + task = get_task_by_id(tasks_path, batch_id) + if not task: + return _error(f"Task '{batch_id}' not found in tasks.json", scripts) + + if batch_id in global_state.completed_task_ids: + return _error(f"Task '{batch_id}' is already completed", scripts) + + logger.info( + "Batch: id=%s file=%s type=%s units=%s", + batch_id, task.file_path, task.task_type, task.units_key, + ) + + # โ”€โ”€ Step 3: Setup git branch โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + reuse_branch = bool(retry) or resume + try: + branch_ok, branch_name, initial_commit = setup_batch_branch( + git, batch_id, repo_path, reuse_existing=reuse_branch, + ) + except RuntimeError as exc: + return _error(f"Git setup failed: {exc}", scripts) + + if not branch_ok: + return _error(f"Failed to create branch for batch '{batch_id}'", scripts) + + logger.info("Branch: %s (initial_commit=%s)", branch_name, initial_commit[:8] if initial_commit else "none") + + # โ”€โ”€ Step 4: Setup venv โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + try: + created_new, venv_path = ensure_dev_venv(repo_path) + if created_new: + logger.info("Created dev venv at %s", venv_path) + ensure_deps_installed(repo_path) + except Exception as exc: + logger.warning("Venv setup issue (non-fatal): %s", exc) + + # โ”€โ”€ Step 5: Build prompts โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + # Track whether we entered resume-but-not-yet-passing mode + _resume_check_output = "" + + # For resume mode, try to recover existing state first + if resume and global_state.current_batch_state: + batch_state = BatchExecutionState.from_dict(global_state.current_batch_state) + dep_context = None + try: + dep_context = build_dependency_context( + batch=task, + completed_task_ids=global_state.completed_task_ids, + tasks_path=TASKS_FILE, + interfaces_path=INTERFACES_FILE, + base_classes_path=BASE_CLASSES_FILE, + data_flow_path=DATA_FLOW_FILE, + feature_spec_path=FEATURE_SPEC_FILE, + ) + except Exception as exc: + logger.warning("Failed to build dependency context: %s", exc) + + # Check if batch already passes (sub-agent may have finished before interrupt) + logger.info("Resume: checking if batch already passes...") + already_passed, check_output = post_verify(repo_path, task) + if already_passed: + logger.info("Resume: batch already passes! Completing directly.") + rpg_backup = run_rpg_update_safe(task, repo_path, global_state) + _refresh_dep_graph_safe( + repo_path, + changed_files=_task_files_for_dep_graph(task), + ) + merge_ok, merge_error = merge_batch_branch( + git, branch_name, batch_id, + file_path=task.file_path, units=task.units_key, + ) + if not merge_ok: + # Ensure we're on main; clear batch state so --retry works + try: + ensure_on_main(git) + except RuntimeError: + pass + if merge_error == "branch_missing": + # Sub-agent didn't use the batch branch โ€” skip without + # consuming a retry slot (see plan A3). The helper + # promotes to failed after _MAX_BATCH_PREPARES skips. + skipped = state_skip_batch(batch_id, state_path) + if skipped: + return _error( + f"Batch '{batch_id}' skipped: branch '{branch_name}' " + f"was not created. Re-run --next to retry.", + scripts, + ) + return _error( + f"Batch '{batch_id}' kept skipping (sub-agent never " + f"used the batch branch); promoted to failed. " + f"Investigate why, then `--retry {batch_id}` to try again.", + scripts, + ) + state_complete_batch(batch_id, False, state_path) + return _error( + f"Tests pass but branch merge failed: {merge_error}. " + f"Branch '{branch_name}' preserved. " + f"Retry: python3 {scripts}/run_batch.py --retry {batch_id} --json", + scripts, + ) + state_complete_batch(batch_id, True, state_path, rpg_backup_path=rpg_backup) + return _success_result( + batch_id, task, batch_state, [{"attempt": 0, "passed": True, "duration": 0}], + 0.0, branch_merged=True, scripts=scripts, + tasks_path=tasks_path, state_path=state_path, + ) + # Tests didn't pass โ€” will proceed to attempt loop with resume prompt + _resume_check_output = check_output + else: + batch_state, dep_context = _prepare_batch_context( + global_state, task, merged_tasks, + ) + batch_state.branch_name = branch_name + batch_state.initial_commit = initial_commit + batch_state.started_at = time.strftime("%Y-%m-%dT%H:%M:%S") + batch_state.start_iteration() + + # Save state (for resume capability) + global_state.set_current_batch(batch_state) + save_code_gen_state(global_state, state_path) + + tdd_prompt = build_tdd_prompt( + batch_state, task, repo_path, + merged_tasks=merged_tasks, + dependency_context=dep_context, + ) + + # Trajectory + trajectory = None + try: + trajectory = get_or_create_code_gen_trajectory( + global_state, base_dir=repo_path, state_path=state_path, + ) + except Exception: + pass + + # โ”€โ”€ Step 6: Attempt loop โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + attempts = [] + final_passed = False + + # For resume mode where tests didn't pass in the early check, + # start with a resume prompt instead of a fresh one + if _resume_check_output: + current_prompt = build_resume_prompt( + original_prompt=tdd_prompt, + attempt_number=1, + failure_reason="Resumed after interruption", + last_test_output=_resume_check_output, + ) + else: + current_prompt = tdd_prompt + + for attempt_num in range(1, MAX_BATCH_ATTEMPTS + 1): + logger.info("โ”โ”โ” Attempt %d/%d for batch %s โ”โ”โ”", attempt_num, MAX_BATCH_ATTEMPTS, batch_id) + + attempt_result = run_single_attempt( + current_prompt, repo_path, task, attempt_num, + agent_timeout=agent_timeout, + trajectory=trajectory, + ) + attempts.append(attempt_result) + + if attempt_result["passed"]: + final_passed = True + logger.info("[OK] Batch PASSED on attempt %d", attempt_num) + break + + # Prepare resume prompt for next attempt + if attempt_num < MAX_BATCH_ATTEMPTS: + logger.info("[FAIL] Attempt %d failed, preparing retry...", attempt_num) + # If the agent self-reported PASS but post-verify caught the + # failure, surface that explicitly in the next prompt so the + # agent doesn't repeat the false-positive pattern (A1 + F2). + sub_agent_claimed_pass = ( + attempt_result["agent_passed"] + and not attempt_result["verify_passed"] + ) + current_prompt = build_resume_prompt( + original_prompt=tdd_prompt, + attempt_number=attempt_num + 1, + failure_reason=attempt_result["failure_reason"], + last_test_output=attempt_result["test_output"], + sub_agent_claimed_pass=sub_agent_claimed_pass, + agent_pytest_summary=attempt_result.get("agent_pytest_summary"), + ) + else: + logger.info("[FAIL] All %d attempts exhausted for batch %s", MAX_BATCH_ATTEMPTS, batch_id) + + # โ”€โ”€ Step 7: Finalize โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + total_duration = sum(a["duration"] for a in attempts) + + if final_passed: + # RPG update + rpg_backup = run_rpg_update_safe(task, repo_path, global_state) + _refresh_dep_graph_safe( + repo_path, + changed_files=_task_files_for_dep_graph(task), + ) + + # Merge branch + merge_ok, merge_error = merge_batch_branch( + git, branch_name, batch_id, + file_path=task.file_path, units=task.units_key, + ) + if not merge_ok: + logger.error("Branch merge failed: %s", merge_error) + # Ensure we're on main; clear batch state so --retry works + try: + ensure_on_main(git) + except RuntimeError: + pass + if merge_error == "branch_missing": + # Sub-agent didn't use the batch branch โ€” skip without + # consuming a retry slot (see plan A3). The helper + # promotes to failed after _MAX_BATCH_PREPARES skips. + skipped = state_skip_batch(batch_id, state_path) + if skipped: + return _error( + f"Batch '{batch_id}' skipped: branch '{branch_name}' " + f"was not created. Re-run --next to retry.", + scripts, + ) + return _error( + f"Batch '{batch_id}' kept skipping (sub-agent never " + f"used the batch branch); promoted to failed. " + f"Investigate why, then `--retry {batch_id}` to try again.", + scripts, + ) + state_complete_batch(batch_id, False, state_path) + return _error( + f"Tests passed but branch merge failed: {merge_error}. " + f"Branch '{branch_name}' preserved. " + f"Retry: python3 {scripts}/run_batch.py --retry {batch_id} --json", + scripts, + ) + + # Update state + state_complete_batch(batch_id, True, state_path, rpg_backup_path=rpg_backup) + + # โ”€โ”€ Subtree review: check if the subtree just completed โ”€โ”€ + try: + from code_gen.subtree_review import is_subtree_just_completed, run_subtree_review + + # Reload state to get the freshly-updated completed_task_ids + fresh_state = load_code_gen_state(state_path) + completed_subtree = is_subtree_just_completed( + batch_id, fresh_state.completed_task_ids, tasks_path, + ) + if completed_subtree: + logger.info( + "โ”โ”โ” Subtree '%s' complete โ€” running review โ”โ”โ”", + completed_subtree, + ) + review_result = run_subtree_review( + subtree_name=completed_subtree, + completed_task_ids=fresh_state.completed_task_ids, + repo_path=repo_path, + tasks_path=tasks_path, + agent_timeout=agent_timeout, + ) + logger.info( + "Review result for '%s': %s (%.1fs)", + completed_subtree, + review_result.status, + review_result.duration, + ) + # Persist review result + fresh_state.subtree_reviews[completed_subtree] = review_result.to_dict() + save_code_gen_state(fresh_state, state_path) + except Exception as exc: + logger.warning("Subtree review failed (non-blocking): %s", exc) + # Ensure we're back on main after any review failure + try: + ensure_on_main(git) + except RuntimeError: + pass + + return _success_result( + batch_id, task, batch_state, attempts, total_duration, + branch_merged=True, scripts=scripts, tasks_path=tasks_path, + state_path=state_path, + ) + else: + # Mark failed, preserve branch + abandon_batch_branch(git, branch_name) + state_complete_batch(batch_id, False, state_path) + + return _failure_result( + batch_id, task, batch_state, attempts, total_duration, + scripts=scripts, tasks_path=tasks_path, state_path=state_path, + ) + + +# ============================================================================ +# Module 6: Final Test +# ---------------------------------------------------------------------------- +# Extracted to ``code_gen.final_validation``. Imported above. +# ============================================================================ + + +# ============================================================================ +# Module 6b: Global Review +# ---------------------------------------------------------------------------- +# Extracted to ``code_gen.global_review``. Imported below; nothing here. +# ============================================================================ + + +# ============================================================================ +# Module 7: Result Builders +# ---------------------------------------------------------------------------- +# Extracted to ``code_gen.result_builders``. Imported above. +# ============================================================================ + + +# ============================================================================ +# CLI +# ============================================================================ + +def print_result(result: Dict[str, Any], json_output: bool = False) -> None: + """Print result to stdout and log it.""" + # Always log the result as JSON for the file log + logger.info("Batch result: %s", json.dumps(result, indent=2)) + + if json_output: + print(json.dumps(result, indent=2)) + return + + success = result.get("success", False) + rtype = result.get("type", "") + + if rtype == "final_test": + icon = "[OK]" if success else "[FAIL]" + print(f"\n {icon} Final Test: passed={result.get('passed',0)} " + f"failed={result.get('failed',0)} errors={result.get('errors',0)}") + elif rtype == "complete": + print(f"\n [END] {result.get('message', '')}") + elif rtype == "batch_complete": + print(f"\n [OK] Batch {result.get('batch_id','')} completed " + f"({result.get('attempts_used',0)} attempt(s), " + f"{result.get('total_duration',0):.1f}s)") + elif rtype == "batch_failed": + print(f"\n [FAIL] Batch {result.get('batch_id','')} failed " + f"({result.get('attempts_used',0)} attempt(s))") + print(f" Reason: {result.get('failure_reason','')}") + else: + icon = "[OK]" if success else "[FAIL]" + msg = result.get("message", result.get("error", "")) + print(f"\n {icon} {msg}") + + if "stats" in result: + s = result["stats"] + print(f" Progress: {s.get('completed',0)}/{s.get('total',0)} completed, " + f"{s.get('failed',0)} failed") + + if "next_action" in result: + print(f"\n -> {result['next_action']}") + + +def main() -> int: + # Convert SIGTERM โ†’ SystemExit so "except BaseException" in Popen calls + # triggers killpg cleanup instead of the process being silently killed. + # Install before argparse so the handler is active as early as possible. + signal.signal(signal.SIGTERM, lambda sig, frame: sys.exit(1)) + + parser = argparse.ArgumentParser( + description="Run Batch โ€” unified TDD batch executor", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + mode = parser.add_mutually_exclusive_group() + mode.add_argument("--next", action="store_true", help="Run the next pending batch") + mode.add_argument("--loop", action="store_true", + help="Run ALL pending batches sequentially (recommended)") + mode.add_argument("--resume", action="store_true", help="Resume an interrupted batch") + mode.add_argument("--retry", metavar="BATCH_ID", help="Retry a specific failed batch") + mode.add_argument("--batch-id", metavar="ID", help="Run a specific batch by ID") + mode.add_argument("--final-test", action="store_true", + help="Run full repo test suite (pytest + smoke, no global review)") + mode.add_argument("--smoke-test", action="store_true", help="Run post-codegen smoke tests") + mode.add_argument("--global-review", action="store_true", + help="Run global feature review + repair (standalone)") + mode.add_argument("--prune-failed", action="store_true", + help="Delete all preserved failed batch/* branches (cleanup)") + + parser.add_argument("--merge-file", action="store_true", + help="File-merge mode: group same-file tasks into one batch") + parser.add_argument("--max-units", type=int, default=0, + help="Max units per merged batch (0 = no limit)") + parser.add_argument("--agent-timeout", type=int, default=DEFAULT_AGENT_TIMEOUT, + help=f"Sub-agent timeout in seconds (default: {DEFAULT_AGENT_TIMEOUT})") + parser.add_argument("--review-iterations", type=int, default=10, + help="Max iterations for global review (default: 10)") + parser.add_argument("--json", action="store_true", help="Output as JSON") + + args = parser.parse_args() + + # Setup logging + log_level = logging.DEBUG if not args.json else logging.WARNING + logging.basicConfig( + level=logging.DEBUG, # root logger accepts all; handlers filter + format="%(asctime)s [%(name)s] %(levelname)s: %(message)s", + datefmt="%H:%M:%S", + ) + + # Console handler: respect --json (WARNING) vs interactive (DEBUG) + root_logger = logging.getLogger() + # basicConfig already added a StreamHandler; adjust its level + for handler in root_logger.handlers: + if isinstance(handler, logging.StreamHandler) and not isinstance(handler, logging.FileHandler): + handler.setLevel(log_level) + + # File handler: capture DEBUG records to .rpgkit/logs/code_gen.log via + # the shared helper (idempotent; degrades gracefully on read-only FS). + from common.logging_setup import setup_file_logging + setup_file_logging("code_gen") + + if args.final_test: + result = final_test() + print_result(result, json_output=args.json) + return 0 if result.get("success") else 1 + + if args.smoke_test: + smoke_result = run_smoke_test() + result = smoke_result.to_dict() + print_result(result, json_output=args.json) + return 0 if result.get("success") else 1 + + if args.global_review: + result = global_review( + max_iterations=args.review_iterations, + timeout_per_iteration=args.agent_timeout, + ) + if args.json: + print(json.dumps(result, indent=2)) + else: + icon = "[OK]" if result.get("success") else "[FAIL]" + iters = len(result.get("iterations", [])) + print(f"\n {icon} Global Review: {iters} iteration(s), " + f"{result.get('total_duration', 0):.1f}s") + for it in result.get("iterations", []): + detail = it.get("detail", it.get("error", it.get("reason", ""))) + it_icon = "[OK]" if it.get("review_passed") else "[FAIL]" + print(f" {it_icon} Iteration {it.get('iteration', '?')}: {detail}") + return 0 if result.get("success") else 1 + + if args.prune_failed: + repo_path = REPO_DIR + git = GitRunner(str(repo_path)) + import subprocess as _sp + raw = _sp.run( + ["git", "branch", "--format=%(refname:short)"], + cwd=repo_path, capture_output=True, text=True, + ) + all_branches = [b.strip() for b in raw.stdout.splitlines() if b.strip()] + current = git.get_current_branch() + deleted = [] + errors = [] + for branch in all_branches: + if not branch.startswith("batch/"): + continue + if branch == current: + continue + try: + git.delete_branch(branch, force=True) + deleted.append(branch) + logger.info("Pruned failed branch: %s", branch) + except Exception as exc: + errors.append(f"{branch}: {exc}") + result = { + "success": not errors, + "deleted": deleted, + "errors": errors, + "type": "prune_failed", + } + print_result(result, json_output=args.json) + return 0 if not errors else 1 + + if args.loop: + return _run_loop(args) + + result = run_batch( + batch_id=args.batch_id, + next_batch=args.next, + resume=args.resume, + retry=args.retry, + merge_file=args.merge_file, + max_units=args.max_units, + agent_timeout=args.agent_timeout, + ) + + print_result(result, json_output=args.json) + return 0 if result.get("success") else 1 + + +def _run_loop(args) -> int: + """Run all pending batches sequentially until done or interrupted. + + Prints a summary line after each batch. Ctrl+C stops gracefully + between batches (current batch completes first). + """ + batch_num = 0 + total_passed = 0 + total_failed = 0 + start_time = time.time() + + print("\n [START] Starting batch loop (Ctrl+C to stop after current batch)\n") + + try: + while True: + batch_num += 1 + + result = run_batch( + next_batch=True, + merge_file=args.merge_file, + max_units=args.max_units, + agent_timeout=args.agent_timeout, + ) + + rtype = result.get("type", "") + + # All done + if rtype == "complete": + print_result(result, json_output=args.json) + elapsed = time.time() - start_time + print(f"\n [TIME] Total time: {elapsed/60:.1f} min " + f"({total_passed} passed, {total_failed} failed)") + return 0 + + # Batch completed or failed โ€” log and continue + # Always log full result to file + logger.info("Batch result: %s", json.dumps(result, indent=2)) + + if rtype == "batch_complete": + total_passed += 1 + stats = result.get("stats", {}) + print(f" [OK] [{batch_num}] {result.get('batch_id','')} โ€” " + f"PASS ({result.get('attempts_used',0)} attempt(s), " + f"{result.get('total_duration',0):.0f}s) โ€” " + f"{stats.get('completed',0)}/{stats.get('total',0)} done") + elif rtype == "batch_failed": + total_failed += 1 + stats = result.get("stats", {}) + print(f" [FAIL] [{batch_num}] {result.get('batch_id','')} โ€” " + f"FAIL: {result.get('failure_reason','')[:80]} โ€” " + f"{stats.get('completed',0)}/{stats.get('total',0)} done") + else: + # Error or unexpected โ€” print and stop + print_result(result, json_output=args.json) + return 1 if not result.get("success") else 0 + except KeyboardInterrupt: + elapsed = time.time() - start_time + print(f"\n [WARNING] Interrupted after {batch_num} batches " + f"({total_passed} passed, {total_failed} failed, " + f"{elapsed/60:.1f} min)") + logger.info("Loop interrupted by user after %d batches", batch_num) + return 130 + + +if __name__ == "__main__": + try: + sys.exit(main()) + except KeyboardInterrupt: + print("\n [WARNING] Interrupted by user", file=sys.stderr) + sys.exit(130) + except Exception as exc: + import traceback + tb = traceback.format_exc() + # Try to log to file even if main() setup failed + try: + _LOGS_DIR.mkdir(parents=True, exist_ok=True) + with open(_LOGS_DIR / "code_gen.log", "a", encoding="utf-8") as _f: + _f.write(f"\nUNHANDLED EXCEPTION:\n{tb}\n") + except Exception: + pass + print(json.dumps({ + "success": False, + "error": str(exc), + "traceback": tb, + }, indent=2)) + sys.exit(1) diff --git a/RPG-Kit/scripts/skeleton/__init__.py b/RPG-Kit/scripts/skeleton/__init__.py new file mode 100644 index 0000000..b225d1b --- /dev/null +++ b/RPG-Kit/scripts/skeleton/__init__.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python3 +"""Skeleton Module. + +Skeleton-specific models, prompts and the file designer: +- skeleton_models: RepoSkeleton, DirectoryNode, FileNode +- skeleton_prompts: Prompts for skeleton generation +- file_designer: FileDesigner, LLMClient + +For backward compatibility this package also re-exports the canonical RPG +data model and helpers from the ``rpg`` package: +- rpg.code_unit: CodeUnit, ParsedFile, ... +- rpg.models: RPG, Node, NodeMetaData, NodeType, Edge, EdgeType +- rpg.builder: create_initial_rpg, load_refactor_feature_data, get_rpg_statistics +- rpg.dep_graph: DependencyGraph + +New code should import these directly from ``rpg.*`` instead of ``skeleton``. +""" + +from .skeleton_models import RepoSkeleton, DirectoryNode, FileNode +from rpg.code_unit import ( + CodeUnit, ParsedFile, + ParsedWorkspace, ParsedModule, + CodeSnippetBuilder, merge_codeunits, class_ast_to_header_str, + compare_code_units, +) +from .skeleton_prompts import ( + RAW_SKELETON_PROMPT, + GROUP_SKELETON_PROMPT, + RAW_SKELETON_REVIEW_PROMPT, + GROUP_SKELETON_REVIEW_PROMPT, + build_component_summary, + extract_features_from_subtree, + extract_leaf_descriptions_from_subtree, + format_feature_list, +) +from rpg.models import RPG, Node, NodeMetaData, NodeType, Edge, EdgeType +from rpg.builder import create_initial_rpg, load_refactor_feature_data, get_rpg_statistics +from rpg.dep_graph import DependencyGraph + +# Lazy import: FileDesigner depends on pydantic which may not be available +# in all environments (e.g., project .venv_dev used by run_batch.py) +def __getattr__(name): + if name in ("FileDesigner", "LLMClient"): + from .file_designer import FileDesigner, LLMClient + globals()["FileDesigner"] = FileDesigner + globals()["LLMClient"] = LLMClient + return globals()[name] + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + +__all__ = [ + # skeleton_models + "RepoSkeleton", + "DirectoryNode", + "FileNode", + # code_unit + "CodeUnit", + "ParsedFile", + "ParsedWorkspace", + "ParsedModule", + "CodeSnippetBuilder", + "merge_codeunits", + "class_ast_to_header_str", + "compare_code_units", + # skeleton_prompts + "RAW_SKELETON_PROMPT", + "GROUP_SKELETON_PROMPT", + "RAW_SKELETON_REVIEW_PROMPT", + "GROUP_SKELETON_REVIEW_PROMPT", + "build_component_summary", + "extract_features_from_subtree", + "extract_leaf_descriptions_from_subtree", + "format_feature_list", + # rpg_models + "RPG", + "Node", + "NodeMetaData", + "NodeType", + "EdgeType", + "Edge", + # rpg_builder + "create_initial_rpg", + "load_refactor_feature_data", + "get_rpg_statistics", + # file_designer + "FileDesigner", + "LLMClient", + # dep_graph + "DependencyGraph", +] diff --git a/RPG-Kit/scripts/skeleton/file_designer.py b/RPG-Kit/scripts/skeleton/file_designer.py new file mode 100644 index 0000000..353a6ed --- /dev/null +++ b/RPG-Kit/scripts/skeleton/file_designer.py @@ -0,0 +1,734 @@ +#!/usr/bin/env python3 +"""File Designer. + +This module provides the core FileDesigner functionality for building +repository skeletons from RPG structures. + +Key components: +- FileDesigner: Main orchestrator for skeleton building +- Two-stage process: RawSkeleton + GroupSkeleton +""" + +import logging +from typing import Dict, List, Optional, Tuple, Any +from pathlib import Path +from pydantic import BaseModel, Field + +from rpg.models import RPG, Node, NodeType, NodeMetaData +from .skeleton_models import RepoSkeleton +from .skeleton_prompts import ( + RAW_SKELETON_PROMPT, + GROUP_SKELETON_PROMPT, + build_component_summary, + extract_features_from_subtree, + extract_leaf_descriptions_from_subtree, + format_feature_list +) + +# Import common LLMClient with trajectory support +import sys +sys.path.insert(0, str(Path(__file__).parent.parent)) +from common import LLMClient +from common.utils import get_project_background_context + + +# ============================================================================ +# Validation Functions +# ============================================================================ + +def validate_directory_structure( + dir_assignments: Dict[str, str], + required_components: List[str] +) -> Tuple[bool, str]: + """Validate that all required components have directory assignments. + + Args: + dir_assignments: Mapping of component_name -> directory_path + required_components: List of component names that must be covered + + Returns: + (is_valid, error_message) + """ + errors = [] + assigned_components = set(dir_assignments.keys()) + required_set = set(required_components) + + # Check for missing components + missing = required_set - assigned_components + if missing: + errors.append(f"Missing directory assignments for components: {sorted(missing)}") + + # Check for extra/unrecognized components + extra = assigned_components - required_set + if extra: + errors.append(f"Unrecognized components in assignments: {sorted(extra)}") + + # Check for empty directory paths and Python identifier validity + for comp, dir_path in dir_assignments.items(): + if not dir_path or not dir_path.strip(): + errors.append(f"Component '{comp}' has empty directory path") + continue + # Each path segment used as a Python package must be a valid identifier + for segment in dir_path.replace("\\", "/").strip("/").split("/"): + if segment and not segment.isidentifier(): + errors.append( + f"Component '{comp}': directory segment '{segment}' is not a valid " + f"Python identifier (avoid hyphens; use underscores instead)" + ) + + if errors: + return False, "\n".join(errors) + return True, "All components have valid directory assignments." + + +def validate_file_path_constraint( + file_path: str, + allowed_dirs: List[str] +) -> Tuple[bool, str]: + """Validate that a file path is under one of the allowed directories. + + Args: + file_path: The file path to validate + allowed_dirs: List of allowed directory prefixes + + Returns: + (is_valid, error_message) + """ + if not file_path: + return False, "Empty file path" + + # Normalize paths + normalized_path = file_path.replace("\\", "/").strip("/") + + for allowed_dir in allowed_dirs: + normalized_dir = allowed_dir.replace("\\", "/").strip("/") + # Check if file_path starts with allowed_dir + if normalized_path.startswith(normalized_dir + "/") or normalized_path == normalized_dir: + return True, "" + + return False, f"File path '{file_path}' is not under any allowed directory: {allowed_dirs}" + + +# ============================================================================ +# Data Models for Structured Output +# ============================================================================ + +class DirectoryAssignment(BaseModel): + """Assignment of a component to a directory.""" + component_name: str = Field(description="Name of the component") + directory_path: str = Field(description="Directory path (e.g., 'src/parser')") + reasoning: str = Field(description="Brief explanation for this assignment") + + +class DirectoryStructureOutput(BaseModel): + """Output for directory structure generation.""" + assignments: List[DirectoryAssignment] = Field( + description="List of component-to-directory assignments" + ) + overall_reasoning: str = Field( + description="Overall rationale for the directory structure" + ) + + +class FileAssignment(BaseModel): + """Assignment of features to a file.""" + file_path: str = Field(description="Full file path (e.g., 'src/parser/tokenizer.py')") + features: List[str] = Field(description="List of feature paths assigned to this file") + purpose: str = Field(description="Brief description of the file's purpose") + + +class FileAssignmentOutput(BaseModel): + """Output for file assignment step.""" + assignments: List[FileAssignment] = Field( + ..., + description="List of file assignments" + ) + +# ============================================================================ +# File Designer +# ============================================================================ + +class FileDesigner: + """Main orchestrator for skeleton building.""" + + def __init__( + self, + rpg: RPG, + llm_client: Optional[LLMClient] = None, + max_iterations: int = 10, + config: Optional[Dict[str, Any]] = None, + trajectory: Optional[Any] = None, + step_id: Optional[str] = None + ): + """Initialize FileDesigner. + + Args: + rpg: The RPG structure to build skeleton from + llm_client: LLM client for API calls + max_iterations: Maximum iterations for iterative design + config: Optional configuration dictionary + trajectory: Optional trajectory tracker for logging steps + step_id: Optional step ID for trajectory tracking + """ + self.rpg = rpg + self.llm_client = llm_client or LLMClient(trajectory=trajectory, step_id=step_id) + self.max_iterations = max_iterations + self.config = config or {} + self.trajectory = trajectory + self.step_id = step_id + + self.logger = logging.getLogger(__name__) + + # Load project background / technology context (empty string if unavailable) + try: + self._project_background = get_project_background_context() + except Exception: + self._project_background = "" + + # Initialize empty skeleton + self.skeleton = RepoSkeleton({}) + + # Component to directory mapping (for RPG update) + self.component_to_dir: Dict[str, str] = {} + + # Statistics + self.stats = { + "components_processed": 0, + "features_assigned": 0, + "files_created": 0, + "init_files_created": 0, + "iterations_used": 0, + "llm_calls_made": 0, + "validation_retries": 0 + } + + def run(self, result_path: Optional[Path] = None) -> Tuple[RepoSkeleton, RPG, Dict[str, Any]]: + """Execute complete skeleton building workflow. + + Returns: + Tuple of (skeleton, updated_rpg, results_dict) + """ + self.logger.info("=" * 70) + self.logger.info("FILE DESIGNER - SKELETON BUILDING") + self.logger.info("=" * 70) + + try: + # Step 1: Extract component data from RPG + components_data = self._extract_components_from_rpg() + self.logger.info(f"Extracted {len(components_data)} components from RPG") + + if not components_data: + return self.skeleton, self.rpg, {"success": False, "error": "No components found"} + + # Step 2: Generate directory structure (Raw Skeleton) + self.logger.info("\n[Step 1] Generating directory structure...") + dir_assignments = self._generate_directory_structure(components_data) + + if not dir_assignments: + return self.skeleton, self.rpg, {"success": False, "error": "Directory structure generation failed"} + + # Step 3: Assign features to files for each component (Group Skeleton) + self.logger.info("\n[Step 2] Assigning features to files...") + file_assignments = self._assign_features_to_files(components_data, dir_assignments) + + if not file_assignments: + return self.skeleton, self.rpg, {"success": False, "error": "Feature assignment failed"} + + # Step 4: Build final skeleton + self.logger.info("\n[Step 3] Building final skeleton structure...") + self._build_final_skeleton(file_assignments) + + # Step 5: Update RPG with directory assignments + self.logger.info("\n[Step 4] Updating RPG with directory assignments...") + self._update_rpg_with_directories() + + # Step 6: Save results + if result_path: + self.skeleton.save_json(str(result_path)) + self.logger.info(f"Skeleton saved to: {result_path}") + + # Build success response + results = { + "success": True, + "statistics": self.stats, + "components_processed": self.stats["components_processed"], + "features_assigned": self.stats["features_assigned"], + "files_created": self.stats["files_created"], + "skeleton_nodes": len(self.skeleton.path_to_node), + } + + self.logger.info("\n" + "=" * 70) + self.logger.info("SKELETON BUILDING COMPLETED SUCCESSFULLY") + self.logger.info("=" * 70) + self._print_statistics() + + return self.skeleton, self.rpg, results + + except Exception as e: + self.logger.error(f"Skeleton building failed: {e}") + return self.skeleton, self.rpg, {"success": False, "error": str(e)} + + def _extract_components_from_rpg(self) -> List[Dict[str, Any]]: + """Extract component data from RPG for skeleton building.""" + components = [] + + # Get all level-1 nodes (functional areas) as components + for node in self.rpg.nodes.values(): + if node.level == 1 and node.name and node.id != self.rpg.repo_node.id: + # Extract subtree for this component + subtree = self._extract_subtree_from_node(node) + + component = { + "name": node.name, + "description": getattr(node.meta, 'description', '') if node.meta else '', + "refactored_subtree": subtree + } + components.append(component) + + return components + + def _extract_subtree_from_node(self, node: Node) -> Dict[str, Any]: + """Extract hierarchical subtree from RPG node.""" + children = node.children() + + if not children: + # Leaf node + return node.name + + subtree = {} + for child in children: + child_subtree = self._extract_subtree_from_node(child) + if isinstance(child_subtree, str): + # Child is a leaf + if child.name not in subtree: + subtree[child.name] = child_subtree + else: + # Child has subtree + subtree[child.name] = child_subtree + + return subtree + + def _generate_directory_structure( + self, + components_data: List[Dict[str, Any]], + max_retries: int = 3 + ) -> Optional[Dict[str, str]]: + """Generate directory structure mapping components to directories with validation.""" + # Extract required component names + required_components = [comp["name"] for comp in components_data] + + # Build base prompts + repo_info = f"Repository: {self.rpg.repo_name}\nPurpose: {self.rpg.repo_info}" + component_summary = build_component_summary(components_data) + + # Include technology context when available + tech_section = "" + if self._project_background and self._project_background.strip(): + tech_section = ( + f"\n{self._project_background}\n" + "When a specific technology stack is described above, design the directory\n" + "structure to accommodate framework-specific conventions (e.g., `templates/`\n" + "for Jinja2, `models.py` for ORM, `app.py` for Flask entry point).\n" + ) + + # Sanitize repo name for use as a Python package directory + # (e.g., "blog-system" -> "blog_system") + safe_repo_name = self.rpg.repo_name.replace("-", "_") + + base_user_prompt = f"""## Repository Information +{repo_info} +{tech_section} +## Components to Organize ({len(components_data)} total) +{component_summary} + +## Task +Assign each component to an appropriate directory path. +Use "{safe_repo_name}" as the project name in paths (e.g., src/{safe_repo_name}/...). +IMPORTANT: Directory names MUST be valid Python identifiers (use underscores, not hyphens). +IMPORTANT: You MUST assign ALL {len(required_components)} components: {', '.join(required_components)} +""" + + last_error = "" + + for attempt in range(max_retries): + self.logger.info(f" Directory structure generation attempt {attempt + 1}/{max_retries}") + + # Build prompt with error feedback if needed + user_prompt = base_user_prompt + if last_error: + user_prompt += f"\n\n## Previous Attempt Failed\nError: {last_error}\nPlease fix the issues and try again." + + # Call LLM + _, result, _ = self.llm_client.call_structured( + system_prompt=RAW_SKELETON_PROMPT, + user_prompt=user_prompt, + response_model=DirectoryStructureOutput, + purpose=f"directory_structure_{attempt + 1}" + ) + + self.stats["llm_calls_made"] += 1 + + if not result: + last_error = "Failed to parse LLM response" + self.stats["validation_retries"] += 1 + continue + + # Process assignments into simple mapping + component_to_dir = {} + for assignment in result.assignments: + component_to_dir[assignment.component_name] = assignment.directory_path + + # Validate completeness + is_valid, error_msg = validate_directory_structure(component_to_dir, required_components) + + if is_valid: + self.logger.info("\n Directory Structure (validated):") + for comp, dir_path in component_to_dir.items(): + self.logger.info(f" - {comp} โ†’ {dir_path}/") + self.logger.info(f"\n Reasoning: {result.overall_reasoning}") + + # Store for later RPG update + self.component_to_dir = component_to_dir + return component_to_dir + else: + self.logger.warning(f" Validation failed: {error_msg}") + last_error = error_msg + self.stats["validation_retries"] += 1 + + self.logger.error(f"Directory structure generation failed after {max_retries} attempts") + return None + + def _assign_features_to_files( + self, + components_data: List[Dict[str, Any]], + dir_assignments: Dict[str, str] + ) -> Optional[List[Dict[str, Any]]]: + """Assign features to files for each component.""" + all_assignments = [] + + for comp_data in components_data: + comp_name = comp_data["name"] + comp_desc = comp_data.get("description", "") + refactored_subtree = comp_data.get("refactored_subtree", {}) + + if comp_name not in dir_assignments: + self.logger.warning(f"No directory assignment for component: {comp_name}") + continue + + comp_dir = dir_assignments[comp_name] + + # Extract all features for this component + features = extract_features_from_subtree(refactored_subtree, comp_name) + feat_descs = extract_leaf_descriptions_from_subtree(refactored_subtree, comp_name) + if not features: + self.logger.warning(f"No features found for component: {comp_name}") + continue + + self.logger.info(f" Processing: {comp_name}") + self.logger.info(f" Directory: {comp_dir}/") + self.logger.info(f" Features: {len(features)}") + + # Build user prompt for feature assignment + feature_list = format_feature_list(features, feat_descs) + repo_info = f"Repository: {self.rpg.repo_name}\nPurpose: {self.rpg.repo_info}" + + # Include technology context when available + tech_section = "" + if self._project_background and self._project_background.strip(): + tech_section = f"\n{self._project_background}\n" + + user_prompt = f"""## Repository Information +{repo_info} +{tech_section} +## Component: {comp_name} +Description: {comp_desc} +Directory: {comp_dir} + +## Features to Assign ({len(features)} total) +{feature_list} + +## Task +Assign ALL the above features to Python files under {comp_dir}/. +Every feature MUST be assigned to exactly one file. +""" + + # Call LLM for feature assignment + _, result, _ = self.llm_client.call_structured( + system_prompt=GROUP_SKELETON_PROMPT, + user_prompt=user_prompt, + response_model=FileAssignmentOutput, + purpose=f"feature_assignment_{comp_name}" + ) + + self.stats["llm_calls_made"] += 1 + + if not result: + self.logger.error(f"Feature assignment failed for component: {comp_name}") + continue + + # Process and validate assignments + comp_assignments = [] + assigned_features = set() + path_errors = [] + + for assignment in result.assignments: + file_path = assignment.file_path + features_list = assignment.features + + # Validate file path is under the allowed directory + is_valid_path, path_error = validate_file_path_constraint( + file_path, [comp_dir] + ) + if not is_valid_path: + path_errors.append(path_error) + self.logger.warning(f" Path constraint violation: {path_error}") + # Try to fix by prepending the correct directory + if not file_path.startswith(comp_dir): + file_name = file_path.split("/")[-1] + file_path = f"{comp_dir}/{file_name}" + self.logger.info(f" Auto-corrected to: {file_path}") + + # Validate features exist + valid_features = [] + for feature in features_list: + if feature in features and feature not in assigned_features: + valid_features.append(feature) + assigned_features.add(feature) + + if valid_features: + comp_assignments.append({ + "file_path": file_path, + "features": valid_features, + "purpose": assignment.purpose, + "component": comp_name + }) + + if path_errors: + self.logger.warning(f" {len(path_errors)} path constraint violations were auto-corrected") + + # Check for unassigned features + unassigned = [f for f in features if f not in assigned_features] + if unassigned: + # Create fallback file for unassigned features + fallback_file = f"{comp_dir}/misc.py" + comp_assignments.append({ + "file_path": fallback_file, + "features": unassigned, + "purpose": "Miscellaneous features", + "component": comp_name + }) + + all_assignments.extend(comp_assignments) + self.stats["features_assigned"] += len(features) + self.stats["components_processed"] += 1 + + self.logger.info(f" Assigned {len(assigned_features)} features to {len(comp_assignments)} files") + + return all_assignments + + def _build_final_skeleton(self, file_assignments: List[Dict[str, Any]]): + """Build the final skeleton structure from file assignments.""" + # Pre-merge assignments with the same file_path so that features from + # multiple components going to the same file (e.g. shared misc.py) are + # all preserved instead of the last write silently overwriting earlier ones. + merged: Dict[str, List[str]] = {} + for assignment in file_assignments: + file_path = assignment["file_path"] + features = assignment["features"] + if file_path in merged: + merged[file_path].extend(features) + else: + merged[file_path] = list(features) + + for file_path, features in merged.items(): + self.skeleton.insert_file( + file_path=file_path, + code="", + feature_paths=features + ) + self.stats["files_created"] += 1 + + # Add __init__.py files to all directories + init_files_added = self.skeleton.add_init_files() + self.stats["init_files_created"] = init_files_added + self.logger.info(f"Added {init_files_added} __init__.py files") + + self.logger.info(f"Created skeleton with {len(self.skeleton.path_to_node)} total nodes") + + def _update_rpg_with_directories(self): + """Update RPG nodes with directory path assignments. + + This writes the assigned directory paths back into the RPG nodes' + metadata, similar to ZeroRepo's behavior. + """ + updated_count = 0 + + for component_name, dir_path in self.component_to_dir.items(): + # Find the component node in RPG (level 1 node with matching name) + component_node = None + for node in self.rpg.nodes.values(): + if node.level == 1 and node.name == component_name: + component_node = node + break + + if not component_node: + self.logger.warning(f"Could not find RPG node for component: {component_name}") + continue + + # Update node metadata with directory path + if component_node.meta is None: + component_node.meta = NodeMetaData( + type_name=NodeType.DIRECTORY, + path=dir_path + ) + else: + component_node.meta.type_name = NodeType.DIRECTORY + component_node.meta.path = dir_path + + updated_count += 1 + self.logger.debug(f" Updated RPG node '{component_name}' with path: {dir_path}") + + self.logger.info(f" Updated {updated_count} RPG nodes with directory paths") + + def patch( + self, + missing_by_component: Dict[str, List[str]], + dir_assignments: Dict[str, str] + ) -> List[Dict[str, Any]]: + """Assign only missing features to files, reusing existing directory assignments. + + Skips directory structure generation entirely โ€” uses existing assignments + from the already-built skeleton. + + Args: + missing_by_component: {component_name: [full_feature_path, ...]} + dir_assignments: {component_name: directory_path} from existing RPG/skeleton + + Returns: + List of file assignment dicts (same format as _assign_features_to_files) + """ + all_assignments = [] + + for comp_name, missing_features in missing_by_component.items(): + if not missing_features: + continue + if comp_name not in dir_assignments: + self.logger.warning(f"No directory assignment for component: {comp_name}") + continue + + comp_dir = dir_assignments[comp_name] + missing_features_set = set(missing_features) + self.logger.info(f" Patching: {comp_name} ({len(missing_features)} missing features)") + self.logger.info(f" Directory: {comp_dir}/") + + feature_list = format_feature_list(missing_features) + repo_info = f"Repository: {self.rpg.repo_name}\nPurpose: {self.rpg.repo_info}" + + tech_section = "" + if self._project_background and self._project_background.strip(): + tech_section = f"\n{self._project_background}\n" + + user_prompt = f"""## Repository Information +{repo_info} +{tech_section} +## Component: {comp_name} +Directory: {comp_dir} + +## Missing Features to Assign ({len(missing_features)} total) +{feature_list} + +## Task +Assign ALL the above features to Python files under {comp_dir}/. +Every feature MUST be assigned to exactly one file. +You may add features to existing files in this directory or create new files. +""" + + _, result, _ = self.llm_client.call_structured( + system_prompt=GROUP_SKELETON_PROMPT, + user_prompt=user_prompt, + response_model=FileAssignmentOutput, + purpose=f"patch_feature_assignment_{comp_name}" + ) + + self.stats["llm_calls_made"] += 1 + + if not result: + self.logger.error(f"Patch assignment failed for component: {comp_name}") + fallback_file = f"{comp_dir}/misc.py" + all_assignments.append({ + "file_path": fallback_file, + "features": missing_features, + "purpose": "Miscellaneous features (patch fallback)", + "component": comp_name + }) + continue + + comp_assignments = [] + assigned_features = set() + + for assignment in result.assignments: + file_path = assignment.file_path + features_list = assignment.features + + is_valid_path, path_error = validate_file_path_constraint(file_path, [comp_dir]) + if not is_valid_path: + self.logger.warning(f" Path constraint violation: {path_error}") + if not file_path.startswith(comp_dir): + file_name = file_path.split("/")[-1] + file_path = f"{comp_dir}/{file_name}" + self.logger.info(f" Auto-corrected to: {file_path}") + + valid_features = [] + for feature in features_list: + if feature in missing_features_set and feature not in assigned_features: + valid_features.append(feature) + assigned_features.add(feature) + + if valid_features: + comp_assignments.append({ + "file_path": file_path, + "features": valid_features, + "purpose": assignment.purpose, + "component": comp_name + }) + + unassigned = [f for f in missing_features if f not in assigned_features] + if unassigned: + fallback_file = f"{comp_dir}/misc.py" + comp_assignments.append({ + "file_path": fallback_file, + "features": unassigned, + "purpose": "Miscellaneous features", + "component": comp_name + }) + + all_assignments.extend(comp_assignments) + self.stats["features_assigned"] += len(missing_features) + self.stats["components_processed"] += 1 + self.logger.info( + f" Assigned {len(assigned_features)} features to {len(comp_assignments)} files" + ) + + return all_assignments + + def _print_statistics(self): + """Print final statistics.""" + print("Statistics:") + print(f" Components processed: {self.stats['components_processed']}") + print(f" Features assigned: {self.stats['features_assigned']}") + print(f" Files created: {self.stats['files_created']}") + print(f" __init__.py files added: {self.stats['init_files_created']}") + print(f" LLM calls made: {self.stats['llm_calls_made']}") + + skeleton_stats = self.skeleton.get_statistics() + print(f" Total skeleton nodes: {skeleton_stats['total_nodes']}") + print(f" File nodes: {skeleton_stats['file_nodes']}") + print(f" Directory nodes: {skeleton_stats['directory_nodes']}") + print(f" __init__.py files: {skeleton_stats.get('init_files', 0)}") + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + print("FileDesigner module loaded successfully") + print("Use this module from build_skeleton.py for full functionality") \ No newline at end of file diff --git a/RPG-Kit/scripts/skeleton/skeleton_models.py b/RPG-Kit/scripts/skeleton/skeleton_models.py new file mode 100644 index 0000000..e09d07a --- /dev/null +++ b/RPG-Kit/scripts/skeleton/skeleton_models.py @@ -0,0 +1,508 @@ +#!/usr/bin/env python3 +"""Skeleton Models. + +This module provides skeleton data structures for representing +the repository file structure. + +Key classes: +- RepoNode: Base class for repository nodes +- DirectoryNode: Directory node +- FileNode: File node with feature assignments +- RepoSkeleton: Main skeleton structure +""" + +import os +import json +import logging +from typing import List, Optional, Dict, Any +from abc import abstractmethod +from pathlib import PurePosixPath + +from rpg.code_unit import CodeUnit, ParsedFile + + +def normalize_path(path: str) -> str: + """Normalize file path to unix style.""" + if not path: + return "." + # Convert to posix path and normalize + posix_path = str(PurePosixPath(path)) + # Remove leading "./" if present + if posix_path.startswith("./"): + posix_path = posix_path[2:] + # Handle empty path + if not posix_path or posix_path == ".": + return "." + return posix_path + + +class RepoNode: + """Base class for repository nodes.""" + + def __init__(self, name: str, path: str): + self.name = name + self.path = normalize_path(path) + self.parent = None + + @property + @abstractmethod + def is_dir(self) -> bool: + pass + + @property + @abstractmethod + def is_file(self) -> bool: + pass + + def children(self) -> List["RepoNode"]: + return [] + + def __hash__(self): + return hash(self.path) + + def __eq__(self, other): + return isinstance(other, RepoNode) and self.path == other.path + + +class DirectoryNode(RepoNode): + """Directory node in repository skeleton.""" + + def __init__(self, name: str, path: str, tags: Optional[List[str]] = None): + super().__init__(name, path) + self.tags = tags or [] + self._children: List[RepoNode] = [] + + def add_child(self, node: RepoNode): + """Add child node, preventing duplicates.""" + if not any(child.path == node.path for child in self._children): + node.parent = self + self._children.append(node) + + @property + def is_dir(self) -> bool: + return True + + @property + def is_file(self) -> bool: + return False + + def children(self) -> List[RepoNode]: + return self._children + + def has_tag(self, tag_name: str) -> bool: + """Check if directory has a specific tag.""" + return any(tag == tag_name for tag in self.tags) + + def __repr__(self): + tag_str = f" [tags: {', '.join(self.tags)}]" if self.tags else "" + return f"DirectoryNode(name='{self.name}', path='{self.path}'){tag_str}" + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary for JSON serialization.""" + # Remove duplicates while preserving order + seen = set() + unique_children = [] + for child in self._children: + if child.path not in seen: + unique_children.append(child) + seen.add(child.path) + else: + logging.warning(f"Duplicate child path: {child.path}") + + return { + "type": "directory", + "name": self.name, + "path": self.path, + "tags": self.tags, + "children": [child.to_dict() for child in unique_children], + } + + @staticmethod + def from_dict(data: Dict[str, Any]) -> "DirectoryNode": + """Create from dictionary.""" + return DirectoryNode( + name=data["name"], + path=data["path"], + tags=data.get("tags", []) + ) + + +class FileNode(RepoNode): + """File node in repository skeleton.""" + + def __init__(self, name: str, path: str, code: str = "", feature_paths: Optional[List[str]] = None): + super().__init__(name, path) + self.code = code + self.feature_paths = feature_paths if feature_paths else [] + # Parse code to extract units + self.parsed = ParsedFile(code, path) if code else None + self.units: List[CodeUnit] = self.parsed.units if self.parsed else [] + + @property + def is_file(self) -> bool: + return True + + @property + def is_dir(self) -> bool: + return False + + def __repr__(self): + return f"" + + def update_code(self, code: str): + """Update code and re-parse units.""" + self.code = code + self.parsed = ParsedFile(code, self.path) if code else None + self.units = self.parsed.units if self.parsed else [] + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary for JSON serialization (ZeroRepo compatible).""" + return { + "type": "file", + "name": self.name, + "path": self.path, + "code": self.code, + "feature_paths": self.feature_paths, + "units": [unit.to_dict() for unit in self.units] + } + + @staticmethod + def from_dict(data: Dict[str, Any]) -> "FileNode": + """Create from dictionary (ZeroRepo compatible).""" + file_node = FileNode( + name=data["name"], + path=data["path"], + code=data.get("code", "") + ) + file_node.feature_paths = data.get("feature_paths", []) + # Restore units from saved data if available + if data.get("units"): + file_node.units = [CodeUnit.from_dict(u) for u in data["units"]] + return file_node + + +class RepoSkeleton: + """Repository skeleton structure.""" + + def __init__(self, file_map: Optional[Dict[str, str]] = None): + """Initialize repository skeleton. + + Args: + file_map: Optional mapping of file_path -> code + """ + self.root = DirectoryNode(name="project_root", path=".") + self.path_to_node: Dict[str, RepoNode] = {".": self.root} + + if file_map: + for file_path, code in sorted(file_map.items()): + self._insert_file(file_path, code) + + def _insert_file(self, file_path: str, code: str, feature_paths: Optional[List[str]] = None): + """Insert a file into the skeleton.""" + norm_path = normalize_path(file_path) + parts = norm_path.split("/") + current_node = self.root + current_path = "" + + for i, part in enumerate(parts): + if not part: + continue + + is_last = i == len(parts) - 1 + node_path = normalize_path(os.path.join(current_path, part)) + + if is_last: + # Create file node + if node_path in self.path_to_node: + existing = self.path_to_node[node_path] + if existing.is_dir: + logging.error(f"Path conflict: '{node_path}' exists as directory") + return + elif existing.is_file: + logging.info(f"Overwriting file at: {node_path}") + existing.code = code + if feature_paths: + existing.feature_paths = feature_paths + return + + file_node = FileNode( + name=part, + path=node_path, + code=code, + feature_paths=feature_paths or [] + ) + self.path_to_node[node_path] = file_node + current_node.add_child(file_node) + else: + # Create or find directory node + existing_node = self.path_to_node.get(node_path) + if existing_node is None: + dir_node = DirectoryNode(name=part, path=node_path) + self.path_to_node[node_path] = dir_node + current_node.add_child(dir_node) + current_node = dir_node + elif existing_node.is_dir: + current_node = existing_node + else: + logging.error(f"Path conflict: '{node_path}' exists as file, expected directory") + return + + current_path = node_path + + def insert_file(self, file_path: str, code: str, feature_paths: Optional[List[str]] = None): + """Public method to insert file.""" + self._insert_file(file_path, code, feature_paths) + + def find_file(self, path: str) -> Optional[FileNode]: + """Find file by path.""" + norm_input = normalize_path(path) + + # Exact match first + for key, node in self.path_to_node.items(): + if isinstance(node, FileNode) and normalize_path(key) == norm_input: + return node + + # Suffix match fallback + for key, node in self.path_to_node.items(): + if isinstance(node, FileNode) and normalize_path(key).endswith(norm_input): + return node + + return None + + def find_dir(self, path: str) -> Optional[DirectoryNode]: + """Find directory by path.""" + norm_input = normalize_path(path) + + # Exact match first + for key, node in self.path_to_node.items(): + if isinstance(node, DirectoryNode) and normalize_path(key) == norm_input: + return node + + # Suffix match fallback + for key, node in self.path_to_node.items(): + if isinstance(node, DirectoryNode) and normalize_path(key).endswith(norm_input): + return node + + return None + + def all_paths(self, include_dirs: bool = True, include_files: bool = True) -> List[str]: + """Get all paths in skeleton.""" + return sorted( + path for path, node in self.path_to_node.items() + if (include_dirs and node.is_dir) or (include_files and node.is_file) + ) + + def find_files_by_feature_path(self, feature_path: str) -> List[FileNode]: + """Find files containing a specific feature path.""" + return [ + node for node in self.path_to_node.values() + if isinstance(node, FileNode) and feature_path in node.feature_paths + ] + + def get_all_file_nodes(self) -> List[FileNode]: + """Get all file nodes.""" + return [ + node for node in self.path_to_node.values() + if isinstance(node, FileNode) + ] + + def get_file_code_map(self) -> Dict[str, str]: + """Get mapping of file paths to code.""" + return { + node.path: node.code or "" + for node in self.path_to_node.values() + if isinstance(node, FileNode) + } + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary for JSON serialization.""" + return { + "root": self.root.to_dict() + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "RepoSkeleton": + """Create from dictionary.""" + skeleton = object.__new__(cls) + skeleton.path_to_node = {} + + def walk_and_register(node_data: Dict[str, Any], parent: Optional[DirectoryNode] = None) -> RepoNode: + node_type = node_data["type"] + + if node_type == "directory": + node = DirectoryNode.from_dict(node_data) + elif node_type == "file": + node = FileNode.from_dict(node_data) + else: + raise ValueError(f"Unknown node type: {node_type}") + + skeleton.path_to_node[node.path] = node + + if parent: + # Prevent duplicate children + if not any(child.path == node.path for child in parent.children()): + parent.add_child(node) + + node.parent = parent + + # Process children for directories + if isinstance(node, DirectoryNode): + for child_data in node_data.get("children", []): + walk_and_register(child_data, node) + + return node + + skeleton.root = walk_and_register(data["root"]) + return skeleton + + def save_json(self, filepath: str): + """Save to JSON file.""" + with open(filepath, "w", encoding="utf-8") as f: + json.dump(self.to_dict(), f, ensure_ascii=False, indent=2) + + @classmethod + def load_json(cls, filepath: str) -> "RepoSkeleton": + """Load from JSON file.""" + with open(filepath, "r", encoding="utf-8") as f: + data = json.load(f) + return cls.from_dict(data) + + def add_init_files(self, skip_root: bool = True, docstring_template: Optional[str] = None) -> int: + """Add __init__.py files to all directories in the skeleton. + + This ensures that all directories are proper Python packages. + + Args: + skip_root: Whether to skip adding __init__.py to root directory. + docstring_template: Optional docstring template. + Use {name} for directory name, {path} for directory path. + + Returns: + Number of __init__.py files added. + """ + init_files_added = 0 + + # Get all directory nodes + dir_nodes = [n for n in self.path_to_node.values() if isinstance(n, DirectoryNode)] + + for dir_node in dir_nodes: + # Skip root directory if requested + if skip_root and (dir_node.path == "." or dir_node == self.root): + continue + + # Skip non-Python directories (like docs, assets, etc.) + # Only add __init__.py to directories that contain Python files or subdirectories + has_python_content = False + for child in dir_node.children(): + if isinstance(child, FileNode) and child.name.endswith('.py'): + has_python_content = True + break + if isinstance(child, DirectoryNode): + has_python_content = True + break + + # Also add if the directory is under a common Python package pattern + is_python_pkg_path = any( + dir_node.path.startswith(prefix) + for prefix in ['src/', 'lib/', 'pkg/', 'packages/'] + ) or '/src/' in dir_node.path + + if not has_python_content and not is_python_pkg_path: + continue + + # Build __init__.py path + init_path = normalize_path(os.path.join(dir_node.path, "__init__.py")) + + # Skip if __init__.py already exists + if init_path in self.path_to_node: + continue + + # Generate content for __init__.py + if docstring_template: + code = docstring_template.format( + name=dir_node.name, + path=dir_node.path + ) + else: + # Default minimal docstring + code = f'"""Package: {dir_node.name}"""\n' + + # Create __init__.py file node + init_node = FileNode( + name="__init__.py", + path=init_path, + code=code, + feature_paths=[] + ) + + # Add to directory and path registry + dir_node.add_child(init_node) + self.path_to_node[init_path] = init_node + init_files_added += 1 + + logging.debug(f"Added __init__.py to: {dir_node.path}") + + logging.info(f"Added {init_files_added} __init__.py files to skeleton") + return init_files_added + + def get_statistics(self) -> Dict[str, Any]: + """Get skeleton statistics.""" + total_nodes = len(self.path_to_node) + file_nodes = [n for n in self.path_to_node.values() if isinstance(n, FileNode)] + dir_nodes = [n for n in self.path_to_node.values() if isinstance(n, DirectoryNode)] + + total_features = sum(len(f.feature_paths) for f in file_nodes) + init_files = len([f for f in file_nodes if f.name == "__init__.py"]) + + return { + "total_nodes": total_nodes, + "file_nodes": len(file_nodes), + "directory_nodes": len(dir_nodes), + "total_features": total_features, + "files_with_features": len([f for f in file_nodes if f.feature_paths]), + "init_files": init_files, + } + + def to_tree_string(self, skip_root: bool = True, show_features: bool = False) -> str: + """Generate tree string representation.""" + def _render_node(node: RepoNode, prefix: str = "", is_last: bool = True) -> str: + lines = [] + + if not (skip_root and node == self.root): + connector = "โ””โ”€โ”€ " if is_last else "โ”œโ”€โ”€ " + if node == self.root: + lines.append(node.name) + else: + node_str = node.name + if show_features and isinstance(node, FileNode) and node.feature_paths: + node_str += f" ({len(node.feature_paths)} features)" + lines.append(f"{prefix}{connector}{node_str}") + + if isinstance(node, DirectoryNode): + children = node.children() + for i, child in enumerate(children): + is_child_last = (i == len(children) - 1) + child_prefix = prefix + (" " if is_last else "โ”‚ ") if not (skip_root and node == self.root) else "" + lines.append(_render_node(child, child_prefix, is_child_last)) + + return "\n".join(lines) + + return _render_node(self.root) + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + file_map = { + "src/main.py": "# Main file", + "src/utils/helpers.py": "# Helper functions", + "tests/test_main.py": "# Tests" + } + + skeleton = RepoSkeleton(file_map) + print("Created skeleton with files:") + for path in skeleton.all_paths(include_dirs=False): + print(f" {path}") + + print(f"\nSkeleton statistics: {skeleton.get_statistics()}") + print(f"\nTree structure:\n{skeleton.to_tree_string()}") \ No newline at end of file diff --git a/RPG-Kit/scripts/skeleton/skeleton_prompts.py b/RPG-Kit/scripts/skeleton/skeleton_prompts.py new file mode 100644 index 0000000..565dd14 --- /dev/null +++ b/RPG-Kit/scripts/skeleton/skeleton_prompts.py @@ -0,0 +1,392 @@ +#!/usr/bin/env python3 +"""Skeleton Prompts. + +This module contains professional prompts for skeleton generation. + +Key prompts: +- RAW_SKELETON: Design initial directory structure +- GROUP_SKELETON: Assign features to files +""" + +# ============================================================================ +# Raw Skeleton Generation Prompt +# ============================================================================ + +RAW_SKELETON_PROMPT = """You are a repository architect responsible for designing the initial project structure of a software repository in its early development stage. + +You will be provided with: +- A summary describing the repository's purpose, domain, and scope. +- A list of functional components, each representing a major grouping within the repository. + +Your task is to propose a clean, modular file-system skeleton that organizes the repository into appropriate top-level folders. + +## Requirements +1. The structure must clearly separate each functional component and reflect logical domain boundaries. +2. Folder names must be concise, meaningful, and follow Python naming conventions (snake_case). +3. Component names serve as functional descriptions, not required folder names. + - Rename folders as needed for clarity and readability. + - Include a mapping from folder names to the original component names. +4. You may choose a flat layout (folders at root) or a nested layout (e.g., under `src`) depending on what best enhances clarity, maintainability, and scalability. +5. Include standard auxiliary folders when appropriate, such as: + - `utils` โ€” shared utilities + - `tests` โ€” test code + - `docs` โ€” documentation + - `configs`, `scripts` โ€” configuration and automation assets +6. Avoid unnecessary complexity or deep nesting. The structure should be intuitive and developer-friendly. + +## Naming Guidelines +- Use short, semantically precise names that clearly indicate a folder's purpose. +- Do not reuse component names verbatim; translate them into practical module or folder names. +- Avoid vague names such as `module`, `misc`, `feature1`, or `temp`. + +## Output Format +Your response must contain exactly one block and exactly one block: + + +Your internal reasoning and draftsโ€”treat this like architectural scratch work. +Feel free to explore options, debate trade-offs, sketch out intermediate designs, or work step-by-step until you're confident in your final direction. + + + +{ + "assignments": [ + { + "component_name": "component1", + "directory_path": "src/project/area", + "reasoning": "Brief explanation for this assignment" + } + ], + "overall_reasoning": "Overall design rationale" +} +""" + +# ============================================================================ +# Group Skeleton Generation Prompt +# ============================================================================ + +GROUP_SKELETON_PROMPT = """You are a repository architect responsible for incrementally assigning features from a functional component into a production-grade Python repository structure. + +Your primary goals are clarity, modularity, and long-term maintainability. The resulting layout should resemble a modern, well-structured Python library rather than a direct projection of the feature tree. + +You may: +- Group related features into shared modules, +- Introduce or adjust folders when semantically appropriate, +- Refine or reorganize previous design decisions as needed. + +Your task is to assign each feature to a `.py` file path that: +- Begins with the designated folder, +- Groups semantically related features together (even if they originate from different branches of the feature tree), +- Reflects realistic Python module organization, +- Uses folders where helpful to express higher-level structure. + +## Rules +- Assign each feature exactly once (no duplication or omission). +- Only assign leaf-level features. +- All assigned files must reside under the designated folder in a coherent, maintainable structure. + +### File and Folder Structure +- Group features into meaningful modules based on real-world development practices, not solely on the original tree layout. +- By default, prefer placing a reasonably large set of closely related features in the same file rather than splitting into many very small files. +- Aim for moderate-to-substantial module sizes when possible: for cohesive groups, assigning on the order of 10โ€“100 leaf features per file is acceptable if they share a clear theme. +- Use single-feature files only for particularly complex, central, or architecturally significant features. +- Keep files reasonably scoped; avoid mixing clearly unrelated features in the same module. +- When a folder becomes crowded, introduce semantically meaningful subfolders rather than scattering features into many tiny modules. + +### Naming and Organization Guidelines +1. Use clear, concise, semantically meaningful names in `snake_case`. Each file or folder should represent a well-scoped functional area. +2. Names should reflect functional purpose without redundancy. Avoid repeating folder context in filenames when it is obvious (for example, inside `auth/`, prefer `token.py` over `auth_token.py`). +3. Avoid vague or purely placeholder names such as `module_part1.py` or `other_module.py`. +4. Utility-style modules are allowed when they are clearly scoped. Examples: + - `vector_utils.py`, `io_utils.py`, or `text_utils.py` inside appropriately named folders, + - `util.py` or `utils.py` within a well-defined domain folder, where the utility code is narrowly focused on that domain. + These should not become unbounded catch-all modules. +5. It is acceptable to place features originating from multiple original subtrees into the same file if they form a coherent functional unit in the repository architecture. + +## Output Format +Your response must include exactly one `` block and exactly one `` block, and you **MUST** follow the structure below: + + +Internal reasoning and drafts โ€” use this area for exploration, planning, and structural considerations. + + + +{ + "assignments": [ + { + "file_path": "src/project/component/module.py", + "features": ["feature1", "feature2"], + "purpose": "Brief description of file purpose" + } + ] +} +""" + +# ============================================================================ +# Review Prompts +# ============================================================================ + +RAW_SKELETON_REVIEW_PROMPT = """You are a senior reviewer responsible for evaluating a proposed raw project skeleton for a software repository. Your goal is to verify that the directory layout forms a clean, scalable, and well-structured foundation aligned with the provided functional subtrees. + +## Review Objective +Assess the skeleton across four dimensions and provide detailed, actionable, category-specific feedback. + +## Evaluation Dimensions +1. The structure should demonstrate thoughtful functional grouping rather than a direct 1:1 mapping from each subtree, with clear opportunities for consolidation or abstraction. +2. Lightweight or utility-style bridging components should be placed appropriately without unnecessary nesting or over-isolation. +3. Each subtree should appear exactly once in the structure, without duplication, fragmentation, or ambiguous ownership. +4. Folder names should be clear, specific, consistent, and aligned with common software naming conventions. + +## Output Format +Return **only** a valid JSON object in the following format: +{ + "review": { + "Functional Grouping": { + "feedback": "", + "pass": true/false + }, + "Simplified Bridging Components": { + "feedback": "", + "pass": true/false + }, + "Exclusive Assignment": { + "feedback": "", + "pass": true/false + }, + "Semantic Naming": { + "feedback": "", + "pass": true/false + } + }, + "final_pass": true/false +} + +Rules: +- `final_pass` should be `true` only if all four dimensions pass, or if remaining issues are minor and easily fixable. +- All `feedback` fields must provide concrete, actionable guidance. +- Do not add new fields or categories beyond the four listed. +""" + +GROUP_SKELETON_REVIEW_PROMPT = """You are a senior software architect reviewing the feature-to-file assignments proposed by an architecture assistant. Your role is to critically evaluate the structural quality of the resulting Python module layout across the five criteria below. + +## Review Criteria +### 1. File Scope Appropriateness +- Each file must have a clear, focused responsibility. +- Group features only when they share meaningful semantic or functional alignment. +- Split files when they accumulate unrelated logic or become overloaded. +- Complex features generally merit isolation; simple, tightly related ones may be grouped. + +### 2. File Structure Organization +- The folder hierarchy should reflect clean separations of concern and meaningful domain boundaries. +- Introduce subfolders when a directory becomes crowded or mixes distinct types of functionality. +- Avoid excessively flat or deeply nested layouts. +- Detect filename clusters with shared prefixes and organize them into subfolders; avoid redundant naming (e.g., `nlp/nlp_tokenizer.py`). + +### 3. Modularity & Cohesion +- Modules should exhibit strong internal cohesion and minimal coupling. +- Each module should map to a single clear abstraction. +- Flag mixed-purpose, catch-all, or poorly scoped modules for redesign. +### 4. Naming Quality +- Names must be clear, concise, meaningful, and consistently in `snake_case`. +- Avoid redundancy between folder and file names. +- Reject vague, generic, placeholder, or suffix-based names. +- Prefer succinct, expressive names that accurately reflect functionality. + +### 5. Structural Soundness & Scalability +- The architecture should support clean layering (data, logic, interface) and long-term scalability. +- Shared logic should be abstracted into appropriate modules. +- Avoid structural bottlenecks, overloaded directories, or ambiguous boundaries. + +### Special Emphasis +- Apply strict scrutiny to both naming and structural decisions. +- Placeholder or incremental naming patterns (`_a.py`, `_b.py`, `_c.py`) must be rejected. +- When flagging an issue, always recommend specific, meaningful alternatives. + +## Output Format +Return **only valid JSON**, with no extra comments or text: +{ + "review": { + "File Scope Appropriateness": { + "feedback": "", + "pass": true/false + }, + "File Structure Organization": { + "feedback": "", + "pass": true/false + }, + "Modularity & Cohesion": { + "feedback": "", + "pass": true/false + }, + "Naming Quality": { + "feedback": "", + "pass": true/false + }, + "Structural Soundness": { + "feedback": "", + "pass": true/false + } + }, + "final_pass": true/false +}""" + +# ============================================================================ +# Utility Functions +# ============================================================================ + +def build_component_summary(components: list) -> str: + """Build formatted component summary for prompts.""" + summary_lines = [] + for i, comp in enumerate(components, 1): + name = comp.get("name", f"Component {i}") + desc = comp.get("description", "No description") + + # Count features in refactored_subtree + feature_count = count_features_in_subtree(comp.get("refactored_subtree", {})) + + summary_lines.append(f"{i}. **{name}**") + summary_lines.append(f" Description: {desc}") + summary_lines.append(f" Features: {feature_count}") + summary_lines.append("") + + return "\n".join(summary_lines) + + +def count_features_in_subtree(subtree) -> int: + """Count total features in a component's subtree.""" + if isinstance(subtree, dict): + total = 0 + for key, value in subtree.items(): + if key == "description": + continue + total += count_features_in_subtree(value) + return total + elif isinstance(subtree, list): + return len([item for item in subtree if item]) + else: + return 1 if subtree else 0 + + +def extract_features_from_subtree(subtree, prefix=""): + """Extract all feature paths from a subtree structure.""" + features = [] + + if isinstance(subtree, dict): + for key, value in subtree.items(): + if key == "description": + continue + + current_path = f"{prefix}/{key}" if prefix else key + + if isinstance(value, dict): + # Check if this is just a description wrapper + if set(value.keys()) == {"description"}: + # This is a leaf feature with only description metadata + features.append(current_path) + else: + # Nested structure - extract sub-features with full path + features.extend(extract_features_from_subtree(value, current_path)) + elif isinstance(value, list): + # List of leaf features - each item gets full path + for item in value: + if isinstance(item, dict): + name = item.get("name", "") + if name: + features.append(f"{current_path}/{name}") + elif item: + features.append(f"{current_path}/{item}") + else: + # Single feature value - this is a leaf node + if value: + # If value is the same as key, it means this is a leaf feature + if isinstance(value, str) and value == key: + features.append(current_path) + else: + # Otherwise it's a nested feature + features.append(current_path) + + elif isinstance(subtree, list): + for item in subtree: + if isinstance(item, dict): + name = item.get("name", "") + if name: + feature_path = f"{prefix}/{name}" if prefix else name + features.append(feature_path) + elif item: + feature_path = f"{prefix}/{item}" if prefix else str(item) + features.append(feature_path) + else: + # This is a leaf feature - use the current prefix as the full path + if subtree: + features.append(prefix if prefix else str(subtree)) + + return features + + +def extract_leaf_descriptions_from_subtree(subtree, prefix=""): + """Extract descriptions from dict-format leaf nodes in a subtree. + + Returns: + Dict mapping full feature paths to their descriptions + """ + descriptions = {} + if isinstance(subtree, dict): + for key, value in subtree.items(): + if key == "description": + continue + current_path = f"{prefix}/{key}" if prefix else key + if isinstance(value, dict): + if set(value.keys()) != {"description"}: + descriptions.update(extract_leaf_descriptions_from_subtree(value, current_path)) + elif isinstance(value, list): + for item in value: + if isinstance(item, dict): + name = item.get("name", "") + desc = item.get("description", "") + if name and desc: + descriptions[f"{current_path}/{name}"] = desc + elif isinstance(subtree, list): + for item in subtree: + if isinstance(item, dict): + name = item.get("name", "") + desc = item.get("description", "") + if name and desc: + path = f"{prefix}/{name}" if prefix else name + descriptions[path] = desc + return descriptions + + +def format_feature_list(features: list, descriptions: dict = None) -> str: + """Format feature list for prompts, optionally with descriptions.""" + if not features: + return "No features found" + + formatted_lines = [] + for i, feature in enumerate(features, 1): + desc = descriptions.get(feature, "") if descriptions else "" + if desc: + formatted_lines.append(f"{i}. {feature}: {desc}") + else: + formatted_lines.append(f"{i}. {feature}") + + return "\n".join(formatted_lines) + + +if __name__ == "__main__": + # Test prompt utilities + test_component = { + "name": "parser", + "description": "Text parsing functionality", + "refactored_subtree": { + "tokenizer": ["tokenize_text", "handle_whitespace"], + "validator": { + "syntax": ["check_syntax", "report_errors"], + "semantic": ["validate_meaning"] + } + } + } + + features = extract_features_from_subtree(test_component["refactored_subtree"]) + print("Extracted features:") + print(format_feature_list(features)) + + print(f"\nFeature count: {count_features_in_subtree(test_component['refactored_subtree'])}") \ No newline at end of file diff --git a/RPG-Kit/scripts/smoke_test.py b/RPG-Kit/scripts/smoke_test.py new file mode 100644 index 0000000..cd0a80a --- /dev/null +++ b/RPG-Kit/scripts/smoke_test.py @@ -0,0 +1,414 @@ +#!/usr/bin/env python3 +"""Smoke Test โ€” Post-codegen integration sanity check. + +Runs after all batches complete to verify the assembled project works +as a whole. Unlike unit tests (per-batch TDD), smoke tests verify +cross-module integration: imports resolve, entry point runs, and +no unimplemented stubs remain. + +Three layers: + 1. Import completeness โ€” every .py can be imported without error + 2. Entry point โ€” main.py --help works (if main.py exists) + 3. Stub detection โ€” unimplemented functions (pass, ..., NotImplementedError) + +Usage: + python3 smoke_test.py --json # Run all layers + python3 smoke_test.py --layer imports # Import check only + python3 smoke_test.py --layer entry # Entry point only + python3 smoke_test.py --layer stubs # Stub detection only +""" + +import argparse +import json +import logging +import os +import subprocess +import sys +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Dict, List, Optional + +# --------------------------------------------------------------------------- +# Path setup +# --------------------------------------------------------------------------- +sys.path.insert(0, str(Path(__file__).parent)) + +from common.paths import DEV_VENV_DIR, REPO_DIR, get_scripts_dir + +logger = logging.getLogger(__name__) + +# ============================================================================ +# Data Classes +# ============================================================================ + + +@dataclass +class SmokeFinding: + """A single finding from smoke testing.""" + layer: str # "imports", "entry_point", "stubs" + severity: str # "error", "warning", "info" + check: str # short check name + message: str # human-readable description + details: str = "" # optional details (traceback, output, etc.) + + def to_dict(self) -> Dict[str, Any]: + d = {"layer": self.layer, "severity": self.severity, + "check": self.check, "message": self.message} + if self.details: + d["details"] = self.details[:2000] + return d + + +@dataclass +class SmokeResult: + """Complete smoke test result.""" + success: bool = True + project_type: str = "unknown" + duration: float = 0.0 + layers: Dict[str, Any] = field(default_factory=dict) + findings: List[SmokeFinding] = field(default_factory=list) + + def add_finding(self, finding: SmokeFinding) -> None: + self.findings.append(finding) + if finding.severity == "error": + self.success = False + + def to_dict(self) -> Dict[str, Any]: + return { + "success": self.success, + "type": "smoke_test", + "project_type": self.project_type, + "duration": round(self.duration, 1), + "layers": self.layers, + "findings": [f.to_dict() for f in self.findings], + "error_count": sum(1 for f in self.findings if f.severity == "error"), + "warning_count": sum(1 for f in self.findings if f.severity == "warning"), + } + + +# ============================================================================ +# Helpers +# ============================================================================ + +def _get_python_exe(repo_path: Path) -> str: + """Get the dev venv python path, falling back to sys.executable. + + ``repo_path`` is parameterised (not just ``DEV_VENV_DIR``) so tests + can target an alternative tree; the bare-name part of the venv path + is sourced from :data:`common.paths.DEV_VENV_NAME` via re-export. + """ + venv_python = repo_path / DEV_VENV_DIR.name / "bin" / "python" + if venv_python.exists(): + return str(venv_python) + return sys.executable + + +def _find_source_files(repo_path: Path) -> List[Path]: + """Find all .py source files (excluding tests, venv, __pycache__).""" + skip_dirs = {".venv_dev", ".venv", "venv", "__pycache__", ".git", + ".rpgkit", ".pytest_cache", "node_modules"} + result = [] + for py_file in repo_path.rglob("*.py"): + parts = set(py_file.relative_to(repo_path).parts) + if parts & skip_dirs: + continue + # Skip test files + name = py_file.name + if name.startswith("test_") or name.endswith("_test.py"): + continue + if any(p in ("tests", "test", "testing") for p in py_file.relative_to(repo_path).parts): + continue + result.append(py_file) + return sorted(result) + + +def _run_in_repo(repo_path: Path, cmd: List[str], timeout: int = 30) -> subprocess.CompletedProcess: + """Run a command in the repo directory with the dev venv.""" + env = os.environ.copy() + env["PYTHONPATH"] = str(repo_path) + # Suppress interactive prompts + env["PYTHONDONTWRITEBYTECODE"] = "1" + return subprocess.run( + cmd, capture_output=True, text=True, timeout=timeout, + cwd=str(repo_path), env=env, + ) + + +# ============================================================================ +# Layer 1: Import Completeness +# ============================================================================ + +def check_imports(repo_path: Path, result: SmokeResult) -> Dict[str, Any]: + """Verify all source files can be imported without errors. + + Runs imports in batched subprocesses to isolate failures while + keeping the check fast. + """ + logger.info("Layer 1: Import completeness check") + python_exe = _get_python_exe(repo_path) + source_files = _find_source_files(repo_path) + + layer = {"total_files": len(source_files), "importable": 0, "failed": 0, "failures": []} + + # Build module names + modules = [] + for py_file in source_files: + rel = py_file.relative_to(repo_path) + module_parts = list(rel.with_suffix("").parts) + if module_parts[-1] == "__init__": + module_parts = module_parts[:-1] + if not module_parts: + continue + modules.append(".".join(module_parts)) + + if not modules: + layer["passed"] = True + return layer + + # Batch check: try importing all at once first + batch_code = "import sys; sys.path.insert(0,'.'); " + "; ".join( + f"import {m}" for m in modules + ) + try: + proc = _run_in_repo(repo_path, [python_exe, "-c", batch_code], timeout=30) + if proc.returncode == 0: + # All imports passed + layer["importable"] = len(modules) + layer["passed"] = True + logger.info(" Imports: %d/%d passed (batch)", len(modules), len(modules)) + return layer + except subprocess.TimeoutExpired: + pass # Fall through to individual checks + + # Batch failed โ€” check individually to find which ones fail + for module_name in modules: + import_code = f"import sys; sys.path.insert(0,'.'); import {module_name}" + try: + proc = _run_in_repo(repo_path, [python_exe, "-c", import_code], timeout=15) + if proc.returncode == 0: + layer["importable"] += 1 + else: + layer["failed"] += 1 + error_line = proc.stderr.strip().splitlines()[-1] if proc.stderr.strip() else "Unknown error" + layer["failures"].append({"module": module_name, "error": error_line}) + result.add_finding(SmokeFinding( + layer="imports", severity="error", + check="import_fails", + message=f"Cannot import {module_name}: {error_line}", + details=proc.stderr[-500:] if proc.stderr else "", + )) + except subprocess.TimeoutExpired: + layer["failed"] += 1 + layer["failures"].append({"module": module_name, "error": "import timed out (15s)"}) + result.add_finding(SmokeFinding( + layer="imports", severity="error", + check="import_timeout", + message=f"Import of {module_name} timed out (possible infinite loop at import time)", + )) + + layer["passed"] = layer["failed"] == 0 + logger.info(" Imports: %d/%d passed", layer["importable"], layer["total_files"]) + return layer + + +# ============================================================================ +# Layer 2: Entry Point Validation +# ============================================================================ + +def check_entry_point(repo_path: Path, result: SmokeResult) -> Dict[str, Any]: + """Verify main.py can start and --help works.""" + logger.info("Layer 2: Entry point check") + main_py = repo_path / "main.py" + python_exe = _get_python_exe(repo_path) + + if not main_py.exists(): + logger.info(" No main.py found, skipping") + return {"skipped": True, "reason": "no main.py"} + + layer = {"exists": True, "help_works": False, "help_length": 0, "startup_error": None} + + # Try --help (safe, exits immediately) + try: + proc = _run_in_repo(repo_path, [python_exe, "main.py", "--help"], timeout=15) + if proc.returncode == 0: + layer["help_works"] = True + layer["help_length"] = len(proc.stdout) + if len(proc.stdout) < 30: + result.add_finding(SmokeFinding( + layer="entry_point", severity="warning", + check="help_too_short", + message=f"main.py --help output is only {len(proc.stdout)} chars (possible stub)", + )) + else: + layer["startup_error"] = proc.stderr.strip().splitlines()[-1] if proc.stderr.strip() else "nonzero exit" + result.add_finding(SmokeFinding( + layer="entry_point", severity="error", + check="help_fails", + message=f"main.py --help failed: {layer['startup_error']}", + details=proc.stderr[-1000:] if proc.stderr else "", + )) + except subprocess.TimeoutExpired: + layer["startup_error"] = "timed out (15s)" + result.add_finding(SmokeFinding( + layer="entry_point", severity="error", + check="help_timeout", + message="main.py --help timed out (15s) โ€” may hang on startup", + )) + + layer["passed"] = layer["help_works"] + return layer + + +# ============================================================================ +# Layer 3: Stub/Placeholder Detection +# ============================================================================ + +def check_stubs(repo_path: Path, result: SmokeResult) -> Dict[str, Any]: + """Detect unimplemented stubs and placeholders across all source files. + + Uses static_completeness_check from code_gen.static_checks, which + detects pass-only functions, placeholder returns, NotImplementedError, + and Ellipsis bodies. + """ + logger.info("Layer 3: Stub/placeholder detection") + from code_gen.static_checks import static_completeness_check + + source_files = _find_source_files(repo_path) + file_paths = [str(f.relative_to(repo_path)) for f in source_files] + issues = static_completeness_check(file_paths, repo_path) + + layer: Dict[str, Any] = { + "total_files": len(source_files), + "stub_count": 0, + "placeholder_count": 0, + "stubs": [], + } + + for issue in issues: + is_stub = issue.startswith("STUB:") + is_critical = is_stub or issue.startswith("MISSING:") or issue.startswith("PARSE_ERROR:") + if is_stub: + layer["stub_count"] += 1 + else: + layer["placeholder_count"] += 1 + + result.add_finding(SmokeFinding( + layer="stubs", + severity="error" if is_critical else "warning", + check="stub_detected" if is_stub else "placeholder_detected", + message=issue, + )) + layer["stubs"].append(issue) + + layer["passed"] = layer["stub_count"] == 0 + logger.info(" Stubs: %d stubs, %d placeholders", + layer["stub_count"], layer["placeholder_count"]) + return layer + + +# ============================================================================ +# Main Orchestrator +# ============================================================================ + +def run_smoke_test( + repo_path: Optional[Path] = None, + layers: Optional[List[str]] = None, +) -> SmokeResult: + """Run smoke tests on the generated repository. + + Args: + repo_path: Path to the project repo. Defaults to common paths. + layers: Which layers to run. None = all. Options: imports, entry, stubs + + Returns: + SmokeResult with findings and per-layer details. + """ + repo_path = repo_path or REPO_DIR + run_layers = set(layers) if layers else {"imports", "entry", "stubs"} + start = time.time() + + result = SmokeResult() + + # Layer 1: Import completeness + if "imports" in run_layers: + result.layers["imports"] = check_imports(repo_path, result) + + # Layer 2: Entry point + if "entry" in run_layers: + result.layers["entry_point"] = check_entry_point(repo_path, result) + + # Layer 3: Stub/placeholder detection + if "stubs" in run_layers: + result.layers["stubs"] = check_stubs(repo_path, result) + + result.duration = time.time() - start + return result + + +# ============================================================================ +# CLI +# ============================================================================ + +def main() -> int: + parser = argparse.ArgumentParser( + description="Smoke Test โ€” post-codegen integration sanity check", + ) + parser.add_argument("--json", action="store_true", help="Output as JSON") + parser.add_argument("--layer", choices=["imports", "entry", "stubs"], + action="append", help="Run specific layer(s) only") + parser.add_argument("--repo", type=Path, help="Path to repo (default: auto)") + + args = parser.parse_args() + + # Setup logging + log_level = logging.DEBUG if not args.json else logging.WARNING + logging.basicConfig( + level=log_level, + format="%(asctime)s [%(name)s] %(levelname)s: %(message)s", + datefmt="%H:%M:%S", + ) + + # File handler for persistent logging via the shared helper + # (idempotent; degrades gracefully on read-only FS). + from common.logging_setup import setup_file_logging + setup_file_logging("smoke_test") + + result = run_smoke_test( + repo_path=args.repo, + layers=args.layer, + ) + + r = result.to_dict() + + if args.json: + print(json.dumps(r, indent=2)) + return 0 if result.success else 1 + + icon = "โœ…" if result.success else "โŒ" + print(f"\n {icon} Smoke Test ({result.project_type}) โ€” {result.duration:.1f}s") + + for layer_name, layer_data in result.layers.items(): + if isinstance(layer_data, dict) and layer_data.get("skipped"): + print(f" โญ {layer_name}: skipped ({layer_data.get('reason','')})") + elif isinstance(layer_data, dict): + passed = layer_data.get("passed", True) + licon = "โœ…" if passed else "โŒ" + print(f" {licon} {layer_name}") + + if result.findings: + print(f"\n Findings ({len(result.findings)}):") + for f in result.findings: + sev_icon = "โŒ" if f.severity == "error" else "โš ๏ธ" + print(f" {sev_icon} [{f.layer}] {f.message}") + + scripts = get_scripts_dir() + if not result.success: + print("\n Fix the issues above, then re-run:") + print(f" python3 {scripts}/smoke_test.py --json") + + return 0 if result.success else 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/RPG-Kit/scripts/summary_skeleton.py b/RPG-Kit/scripts/summary_skeleton.py new file mode 100644 index 0000000..ddb9ad4 --- /dev/null +++ b/RPG-Kit/scripts/summary_skeleton.py @@ -0,0 +1,447 @@ +#!/usr/bin/env python3 +"""Summary Skeleton Script. + +Generate a formatted summary report of the skeleton.json file. +This script produces a visual summary including: +- Status message +- Summary statistics table +- Component paths table (with file count and feature count) +- Directory structure tree with colored annotations + +Output uses ANSI colors: +- Component comments: Cyan +- Feature comments: Yellow +""" + +import json +import argparse +import sys +from pathlib import Path +from typing import Any, Dict, List, Tuple, TextIO + +# Import centralized paths +from common.paths import SKELETON_FILE, SKELETON_SUMMARY_FILE + +# ANSI color codes +class Colors: + RESET = "\033[0m" + BOLD = "\033[1m" + DIM = "\033[2m" + + # Directory names - Blue + DIR = "\033[34m" + + # File names - White (default, but bold) + FILE = "\033[97m" + + # Component comments (directories) - Cyan + Bold + COMPONENT = "\033[1;36m" + + # Feature comments (files) - Yellow + FEATURE = "\033[33m" + + # Feature tags - Magenta (for the brackets) + FEATURE_TAG = "\033[35m" + + # Status colors + GREEN = "\033[32m" + RED = "\033[31m" + + # Tree structure - Dim + TREE = "\033[2m" + + +# ============================================================================ +# Utility Functions +# ============================================================================ + + +def write_unicode_table( + output: TextIO, + headers: List[str], + rows: List[List[Any]], + title: str = "", + indent: int = 2 +) -> None: + """Write a table with Unicode box drawing characters to output stream.""" + col_widths = [len(str(h)) for h in headers] + for row in rows: + for i, cell in enumerate(row): + col_widths[i] = max(col_widths[i], len(str(cell))) + + col_widths = [w + 2 for w in col_widths] + prefix = " " * indent + + if title: + output.write(f"\n{prefix}{title}\n") + + # Top border + output.write(prefix + "โ”Œ" + "โ”ฌ".join("โ”€" * w for w in col_widths) + "โ”\n") + + # Header row + header_row = prefix + "โ”‚" + for i, header in enumerate(headers): + header_row += f" {str(header).ljust(col_widths[i] - 1)}โ”‚" + output.write(header_row + "\n") + + # Header separator + output.write(prefix + "โ”œ" + "โ”ผ".join("โ”€" * w for w in col_widths) + "โ”ค\n") + + # Data rows + for row in rows: + data_row = prefix + "โ”‚" + for i, cell in enumerate(row): + data_row += f" {str(cell).ljust(col_widths[i] - 1)}โ”‚" + output.write(data_row + "\n") + + if row != rows[-1]: + output.write(prefix + "โ”œ" + "โ”ผ".join("โ”€" * w for w in col_widths) + "โ”ค\n") + + # Bottom border + output.write(prefix + "โ””" + "โ”ด".join("โ”€" * w for w in col_widths) + "โ”˜\n") + + +def count_files_in_subtree(node: Dict[str, Any]) -> int: + """Count total files in a subtree.""" + if node.get("type") == "file": + return 1 + count = 0 + for child in node.get("children", []): + count += count_files_in_subtree(child) + return count + + +def count_features_in_subtree(node: Dict[str, Any]) -> int: + """Count total features in a subtree.""" + if node.get("type") == "file": + return len(node.get("feature_paths", [])) + count = 0 + for child in node.get("children", []): + count += count_features_in_subtree(child) + return count + + +def count_directories_in_tree(node: Dict[str, Any]) -> int: + """Count total directories in tree (excluding root).""" + if node.get("type") == "file": + return 0 + count = 1 # Count this directory + for child in node.get("children", []): + count += count_directories_in_tree(child) + return count + + +def find_component_directory_node( + root: Dict[str, Any], + component_path: str +) -> Dict[str, Any] | None: + """Find the node corresponding to a component directory path.""" + if root.get("path", ".").strip("./") == component_path.strip("./"): + return root + + for child in root.get("children", []): + result = find_component_directory_node(child, component_path) + if result: + return result + + return None + + +def get_component_stats( + root: Dict[str, Any], + component_directories: Dict[str, str] +) -> Dict[str, Tuple[int, int]]: + """Get file count and feature count for each component. + + Returns: + Dict mapping component name to (file_count, feature_count) + """ + stats = {} + + for comp_name, dir_path in component_directories.items(): + node = find_component_directory_node(root, dir_path) + if node: + file_count = count_files_in_subtree(node) + feature_count = count_features_in_subtree(node) + stats[comp_name] = (file_count, feature_count) + else: + stats[comp_name] = (0, 0) + + return stats + + +# ============================================================================ +# Tree Rendering +# ============================================================================ + + +def render_tree( + node: Dict[str, Any], + component_directories: Dict[str, str], + prefix: str = "", + is_last: bool = True, + is_root: bool = True, + use_color: bool = True +) -> List[str]: + """Render directory tree with annotations. + + Component directories get cyan comments. + Files get yellow feature annotations with magenta tags. + """ + lines = [] + + node_type = node.get("type", "directory") + node_name = node.get("name", "") + node_path = node.get("path", "").strip("./") + + # Color helpers + c_tree = Colors.TREE if use_color else "" + c_dir = Colors.DIR if use_color else "" + c_file = Colors.FILE if use_color else "" + c_comp = Colors.COMPONENT if use_color else "" + c_feat = Colors.FEATURE if use_color else "" + c_tag = Colors.FEATURE_TAG if use_color else "" + c_reset = Colors.RESET if use_color else "" + c_dim = Colors.DIM if use_color else "" + + # Determine connector and child prefix + if is_root: + connector = "" + child_prefix = "" + else: + connector = f"{c_tree}โ””โ”€โ”€ {c_reset}" if is_last else f"{c_tree}โ”œโ”€โ”€ {c_reset}" + child_prefix = prefix + (f"{c_tree} {c_reset}" if is_last else f"{c_tree}โ”‚ {c_reset}") + + # Build the line + if node_type == "directory": + # Check if this is a component directory + component_name = None + for comp, path in component_directories.items(): + if path.strip("./") == node_path: + component_name = comp + break + + # Directory name with trailing slash (colored) + dir_display = f"{c_dir}{node_name}/{c_reset}" + line = f"{prefix}{connector}{dir_display}" + + # Calculate visible length for padding (without ANSI codes) + visible_len = len(prefix.replace(c_tree, "").replace(c_reset, "")) + \ + len(connector.replace(c_tree, "").replace(c_reset, "")) + \ + len(node_name) + 1 + + # Add component annotation if applicable + if component_name: + # Count features in this component + feature_count = count_features_in_subtree(node) + padding = " " * max(1, 42 - visible_len) + comment = f"{c_comp}* {component_name.replace('_', ' ').title()} ({feature_count} features){c_reset}" + line = f"{line}{padding}{comment}" + + lines.append(line) + + # Render children + children = node.get("children", []) + for i, child in enumerate(children): + is_child_last = (i == len(children) - 1) + lines.extend(render_tree( + child, + component_directories, + child_prefix, + is_child_last, + is_root=False, + use_color=use_color + )) + + else: # file + # File name (colored) + file_display = f"{c_file}{node_name}{c_reset}" + line = f"{prefix}{connector}{file_display}" + + # Calculate visible length for padding + visible_len = len(prefix.replace(c_tree, "").replace(c_reset, "")) + \ + len(connector.replace(c_tree, "").replace(c_reset, "")) + \ + len(node_name) + + # Add feature annotation + feature_paths = node.get("feature_paths", []) + if feature_paths: + # Extract just the feature names (last part of path) + feature_names = [fp.split("/")[-1] for fp in feature_paths] + # Format each feature with tags + formatted_features = [f"{c_tag}[{c_feat}{name}{c_tag}]{c_reset}" for name in feature_names] + padding = " " * max(1, 42 - visible_len) + line = f"{line}{padding}{' '.join(formatted_features)}" + + lines.append(line) + + return lines + + +# ============================================================================ +# Main Summary Function +# ============================================================================ + + +def generate_summary( + skeleton_data: Dict[str, Any], + use_color: bool = True, + output: TextIO = None +) -> None: + """Generate and print the skeleton summary.""" + # Default to stdout if no output specified + if output is None: + output = sys.stdout + + def write(text: str = "") -> None: + output.write(text + "\n") + + repo_name = skeleton_data.get("repository_name", "project") + root = skeleton_data.get("root", {}) + component_directories = skeleton_data.get("component_directories", {}) + statistics = skeleton_data.get("statistics", {}) + + # Calculate statistics + total_components = statistics.get("total_components", len(component_directories)) + total_features = statistics.get("total_features", count_features_in_subtree(root)) + total_files = statistics.get("total_files", count_files_in_subtree(root)) + total_directories = count_directories_in_tree(root) + + # Color helpers + c_green = Colors.GREEN if use_color else "" + c_bold = Colors.BOLD if use_color else "" + c_dim = Colors.DIM if use_color else "" + c_cyan = Colors.COMPONENT if use_color else "" + c_reset = Colors.RESET if use_color else "" + + # Print header + write() + write(f" {c_dim}{'โ•' * 70}{c_reset}") + write(f" {c_bold}Skeleton Building Status: {c_green}[OK] Complete{c_reset}") + write(f" {c_dim}{'โ•' * 70}{c_reset}") + write() + write(f" The skeleton is fully validated and consistent with the feature tree.") + + # Print summary table + write_unicode_table( + output, + headers=["Metric", "Value"], + rows=[ + ["Total Components", total_components], + ["Total Features", total_features], + ["Total Files", total_files], + ["Total Directories", total_directories], + ], + title="Summary", + indent=2 + ) + + # Get component stats + component_stats = get_component_stats(root, component_directories) + + # Print component paths table (before directory structure) + comp_rows = [] + for comp_name in component_directories: + dir_path = component_directories[comp_name] + file_count, feature_count = component_stats.get(comp_name, (0, 0)) + comp_rows.append([comp_name, dir_path, file_count, feature_count]) + + write_unicode_table( + output, + headers=["Component", "Directory Path", "Files", "Features"], + rows=comp_rows, + title="Component Paths", + indent=2 + ) + + # Print directory structure with separator + write() + write(f" {c_dim}{'โ”€' * 70}{c_reset}") + write(f" {c_bold}Directory Structure{c_reset}") + write(f" {c_dim}{'โ”€' * 70}{c_reset}") + write() + write(f" {c_dim}Legend: {c_cyan}* Component{c_reset} {Colors.FEATURE_TAG if use_color else ''}[{Colors.FEATURE if use_color else ''}feature{Colors.FEATURE_TAG if use_color else ''}]{c_reset}") + write() + + tree_lines = render_tree(root, component_directories, use_color=use_color) + for line in tree_lines: + write(f" {line}") + + write() + write(f" {c_dim}{'โ•' * 70}{c_reset}") + + +def load_skeleton(path: Path) -> Dict[str, Any] | None: + """Load skeleton JSON file.""" + try: + with path.open("r", encoding="utf-8") as f: + data = json.load(f) + if isinstance(data, dict) and "root" in data: + return data + except Exception as e: + print(f"Error loading skeleton: {e}") + return None + + +def main() -> int: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Generate summary report of skeleton.json" + ) + parser.add_argument( + "--input", "-i", + type=str, + default=str(SKELETON_FILE), + help=f"Input skeleton file (default: {SKELETON_FILE})" + ) + parser.add_argument( + "--output", "-o", + type=str, + default=str(SKELETON_SUMMARY_FILE), + help=f"Output summary file (default: {SKELETON_SUMMARY_FILE})" + ) + parser.add_argument( + "--stdout", + action="store_true", + help="Print to stdout instead of saving to file" + ) + parser.add_argument( + "--no-color", + action="store_true", + help="Disable colored output (automatically disabled when saving to file)" + ) + + args = parser.parse_args() + + # Load skeleton + input_path = Path(args.input) + if not input_path.exists(): + print(f"Error: Skeleton file not found: {input_path}") + print("Please run /rpgkit.build_skeleton first.") + return 1 + + skeleton_data = load_skeleton(input_path) + if not skeleton_data: + print(f"Error: Invalid skeleton file: {input_path}") + return 1 + + # Determine output mode + if args.stdout: + # Output to stdout with colors + use_color = not args.no_color + generate_summary(skeleton_data, use_color=use_color, output=sys.stdout) + else: + # Output to file without colors + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + + with open(output_path, "w", encoding="utf-8") as f: + generate_summary(skeleton_data, use_color=False, output=f) + + print(f"Summary saved to: {output_path}") + + return 0 + + +if __name__ == "__main__": + exit(main()) diff --git a/RPG-Kit/scripts/tools/browser.py b/RPG-Kit/scripts/tools/browser.py new file mode 100644 index 0000000..54eab05 --- /dev/null +++ b/RPG-Kit/scripts/tools/browser.py @@ -0,0 +1,830 @@ +#!/usr/bin/env python3 +"""Browser Tool โ€” Web page screenshot, analysis, and interaction. + +Provides CLI commands for verifying and interacting with web applications +using headless Chromium via Playwright. + +Read-only commands (safe, deterministic): + python tools/browser.py inspect (recommended: all-in-one) + python tools/browser.py screenshot [--output FILE] + python tools/browser.py accessibility-tree + python tools/browser.py list-links + python tools/browser.py list-forms + python tools/browser.py get-html [--selector CSS] + +Interactive command (flexible, user writes Playwright code): + python tools/browser.py run-script --script 'page.fill(...); ...' + python tools/browser.py run-script --file script.py + +Note: For scripts with single quotes or complex quoting, write the script +to a temp file and use --file instead of --script to avoid shell escaping issues. +""" + +import argparse +import atexit +import json +import os +import re +import signal +import sys +import time +import traceback +from contextlib import contextmanager +from pathlib import Path +from typing import Optional +from urllib.parse import urlparse + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +DEFAULT_OUTPUT_DIR = ".rpgkit/tmp/screenshots" +DEFAULT_TIMEOUT = 10000 # 10s per Playwright operation +SCRIPT_TIMEOUT = 60 # 60s hard limit for run-script + +# Track browser PIDs for emergency cleanup +_active_browser_pids: list = [] + + +# --------------------------------------------------------------------------- +# Browser helpers +# --------------------------------------------------------------------------- + +@contextmanager +def open_browser(headless: bool = True): + """Context manager that yields (playwright, browser) and always cleans up. + + Safety guarantees: + - Browser is always closed, even on exception or SIGTERM + - Playwright server is always stopped + - atexit handler kills any leaked chromium processes + - All cleanup errors are logged (not silently swallowed) + """ + from playwright.sync_api import sync_playwright + pw = sync_playwright().start() + browser = pw.chromium.launch(headless=headless) + + # Track browser PID for emergency cleanup + try: + # Playwright exposes the process via internal API + process = browser._impl_obj._browser_process + if process and process.pid: + _active_browser_pids.append(process.pid) + except Exception: + pass # Not critical โ€” just a safety net + + try: + yield pw, browser + finally: + # Close browser + try: + browser.close() + except Exception as e: + print(f"[browser.py] Warning: browser.close() failed: {e}", + file=sys.stderr) + + # Stop Playwright + try: + pw.stop() + except Exception as e: + print(f"[browser.py] Warning: pw.stop() failed: {e}", + file=sys.stderr) + + # Remove from active PID tracking + _active_browser_pids.clear() + + +def _emergency_cleanup(): + """Atexit handler: kill any leaked chromium processes.""" + for pid in _active_browser_pids: + try: + os.kill(pid, signal.SIGTERM) + print(f"[browser.py] Emergency cleanup: killed chromium pid {pid}", + file=sys.stderr) + except (OSError, ProcessLookupError): + pass + + +atexit.register(_emergency_cleanup) + + +def open_page(browser, url: str, timeout: int = DEFAULT_TIMEOUT, + exit_on_error: bool = False): + """Open a page with fallback loading strategies. + + Args: + browser: Playwright browser instance. + url: URL to navigate to. + timeout: Page load timeout in ms. + exit_on_error: If True, sys.exit(1) on load failure. + """ + page = browser.new_page() + page.set_default_timeout(timeout) + try: + page.goto(url, wait_until="networkidle", timeout=timeout) + except Exception: + try: + page.goto(url, wait_until="domcontentloaded", timeout=timeout) + except Exception as e: + err_type = type(e).__name__ + print(f"[browser.py] Page load failed for {url}", file=sys.stderr) + print(f" Error: {err_type}: {e}", file=sys.stderr) + if "ERR_CONNECTION_REFUSED" in str(e): + print(" [HINT] Is the server running? Start it first.", file=sys.stderr) + elif "TIMEOUT" in str(e).upper(): + print(" [HINT] Server is slow or unresponsive. Try increasing --timeout.", file=sys.stderr) + elif "ERR_NAME_NOT_RESOLVED" in str(e): + print(" [HINT] Hostname not found. Check the URL.", file=sys.stderr) + + # Report if page failed to load + if "chrome-error" in page.url: + print(f"Error: could not load {url} (connection refused or unreachable)", + file=sys.stderr) + print(" [HINT] Make sure the web server is running on the correct port.", + file=sys.stderr) + if exit_on_error: + browser.close() + sys.exit(1) + + return page + + +def _ensure_dir(path: str): + """Create parent directories for an output path.""" + Path(path).parent.mkdir(parents=True, exist_ok=True) + + +def _auto_filename(url: str, label: str = "", ext: str = "png") -> str: + """Generate a unique screenshot filename from URL + timestamp. + + Naming: HHMMSS_