Skip to content
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,6 @@ Thumbs.db
*.tmp
*.log
*.bak
# BMAD (local only)
.bmad-core/
.bmad-*/
55 changes: 41 additions & 14 deletions codewiki/cli/adapters/doc_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,34 +26,37 @@
class CLIDocumentationGenerator:
"""
CLI adapter for documentation generation with progress reporting.

This class wraps the backend documentation generator and adds
CLI-specific features like progress tracking and error handling.
"""

def __init__(
self,
repo_path: Path,
output_dir: Path,
config: Dict[str, Any],
verbose: bool = False,
generate_html: bool = False
generate_html: bool = False,
target_file: str = None
):
"""
Initialize the CLI documentation generator.

Args:
repo_path: Repository path
output_dir: Output directory
config: LLM configuration
verbose: Enable verbose output
generate_html: Whether to generate HTML viewer
target_file: Optional path to a single file for focused documentation
"""
self.repo_path = repo_path
self.output_dir = output_dir
self.config = config
self.verbose = verbose
self.generate_html = generate_html
self.target_file = target_file
self.progress_tracker = ProgressTracker(total_stages=5, verbose=verbose)
self.job = DocumentationJob()

Expand Down Expand Up @@ -141,7 +144,10 @@ def generate(self) -> DocumentationJob:
max_token_per_module=self.config.get('max_token_per_module', 36369),
max_token_per_leaf_module=self.config.get('max_token_per_leaf_module', 16000),
max_depth=self.config.get('max_depth', 2),
agent_instructions=self.config.get('agent_instructions')
agent_instructions=self.config.get('agent_instructions'),
target_file=self.target_file,
use_claude_code=self.config.get('use_claude_code', False),
use_gemini_code=self.config.get('use_gemini_code', False),
)

# Run backend documentation generation
Expand Down Expand Up @@ -196,29 +202,50 @@ async def _run_backend_generation(self, backend_config: BackendConfig):

# Stage 2: Module Clustering
self.progress_tracker.start_stage(2, "Module Clustering")
if self.verbose:
self.progress_tracker.update_stage(0.5, "Clustering modules with LLM...")

# Import clustering function

# Determine clustering method based on config
use_claude_code = backend_config.use_claude_code
use_gemini_code = backend_config.use_gemini_code
if use_claude_code:
if self.verbose:
self.progress_tracker.update_stage(0.5, "Clustering modules with Claude Code CLI...")
elif use_gemini_code:
if self.verbose:
self.progress_tracker.update_stage(0.5, "Clustering modules with Gemini CLI...")
else:
if self.verbose:
self.progress_tracker.update_stage(0.5, "Clustering modules with LLM...")

# Import clustering functions
from codewiki.src.be.cluster_modules import cluster_modules
from codewiki.src.utils import file_manager
from codewiki.src.config import FIRST_MODULE_TREE_FILENAME, MODULE_TREE_FILENAME

working_dir = str(self.output_dir.absolute())
file_manager.ensure_directory(working_dir)
first_module_tree_path = os.path.join(working_dir, FIRST_MODULE_TREE_FILENAME)
module_tree_path = os.path.join(working_dir, MODULE_TREE_FILENAME)

try:
if os.path.exists(first_module_tree_path):
module_tree = file_manager.load_json(first_module_tree_path)
else:
module_tree = cluster_modules(leaf_nodes, components, backend_config)
if use_claude_code:
# Use Claude Code CLI for clustering
from codewiki.src.be.claude_code_adapter import claude_code_cluster
module_tree = claude_code_cluster(leaf_nodes, components, backend_config)
elif use_gemini_code:
# Use Gemini CLI for clustering (larger context window)
from codewiki.src.be.gemini_code_adapter import gemini_code_cluster
module_tree = gemini_code_cluster(leaf_nodes, components, backend_config)
else:
# Use standard LLM clustering
module_tree = cluster_modules(leaf_nodes, components, backend_config)
file_manager.save_json(module_tree, first_module_tree_path)

file_manager.save_json(module_tree, module_tree_path)
self.job.module_count = len(module_tree)

if self.verbose:
self.progress_tracker.update_stage(1.0, f"Created {len(module_tree)} modules")
except Exception as e:
Expand Down
100 changes: 90 additions & 10 deletions codewiki/cli/commands/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ def parse_patterns(patterns_str: str) -> List[str]:
default="docs",
help="Output directory for generated documentation (default: ./docs)",
)
@click.option(
"--file",
"-f",
type=click.Path(exists=True),
default=None,
help="Generate documentation for a single file instead of the entire repository",
)
@click.option(
"--create-branch",
is_flag=True,
Expand Down Expand Up @@ -78,7 +85,6 @@ def parse_patterns(patterns_str: str) -> List[str]:
)
@click.option(
"--focus",
"-f",
type=str,
default=None,
help="Comma-separated modules/paths to focus on (e.g., 'src/core,src/api')",
Expand Down Expand Up @@ -126,10 +132,21 @@ def parse_patterns(patterns_str: str) -> List[str]:
default=None,
help="Maximum depth for hierarchical decomposition (overrides config)",
)
@click.option(
"--use-claude-code",
is_flag=True,
help="Use Claude Code CLI as the LLM backend instead of direct API calls",
)
@click.option(
"--use-gemini-code",
is_flag=True,
help="Use Gemini CLI as the LLM backend instead of direct API calls (supports larger context)",
)
@click.pass_context
def generate_command(
ctx,
output: str,
file: Optional[str],
create_branch: bool,
github_pages: bool,
no_cache: bool,
Expand All @@ -142,24 +159,30 @@ def generate_command(
max_tokens: Optional[int],
max_token_per_module: Optional[int],
max_token_per_leaf_module: Optional[int],
max_depth: Optional[int]
max_depth: Optional[int],
use_claude_code: bool,
use_gemini_code: bool,
):
"""
Generate comprehensive documentation for a code repository.

Analyzes the current repository and generates documentation using LLM-powered
analysis. Documentation is output to ./docs/ by default.

Examples:

\b
# Basic generation
$ codewiki generate


\b
# Generate documentation for a single file
$ codewiki generate --file src/main.py

\b
# With git branch creation and GitHub Pages
$ codewiki generate --create-branch --github-pages

\b
# Force full regeneration
$ codewiki generate --no-cache
Expand Down Expand Up @@ -187,6 +210,14 @@ def generate_command(
\b
# Override max depth for hierarchical decomposition
$ codewiki generate --max-depth 3

\b
# Use Claude Code CLI as the LLM backend
$ codewiki generate --use-claude-code

\b
# Use Gemini CLI as the LLM backend (larger context window)
$ codewiki generate --use-gemini-code
"""
logger = create_logger(verbose=verbose)
start_time = time.time()
Expand Down Expand Up @@ -216,9 +247,46 @@ def generate_command(

config = config_manager.get_config()
api_key = config_manager.get_api_key()

logger.success("Configuration valid")


# Validate that only one CLI backend is selected
if use_claude_code and use_gemini_code:
raise ConfigurationError(
"Cannot use both --use-claude-code and --use-gemini-code.\n\n"
"Please select only one CLI backend."
)

# Validate Claude Code CLI if flag is set
if use_claude_code:
import shutil
claude_path = shutil.which("claude")
if not claude_path:
raise ConfigurationError(
"Claude Code CLI not found.\n\n"
"The --use-claude-code flag requires Claude Code CLI to be installed.\n\n"
"To install Claude Code CLI, see: https://docs.anthropic.com/en/docs/claude-code\n"
"Make sure 'claude' is available in your PATH."
)
if verbose:
logger.debug(f"Claude Code CLI found: {claude_path}")
logger.success("Claude Code CLI available")

# Validate Gemini CLI if flag is set
if use_gemini_code:
import shutil
gemini_path = shutil.which("gemini")
if not gemini_path:
raise ConfigurationError(
"Gemini CLI not found.\n\n"
"The --use-gemini-code flag requires Gemini CLI to be installed.\n\n"
"To install Gemini CLI: npm install -g @anthropic-ai/gemini-cli\n"
"Make sure 'gemini' is available in your PATH."
)
if verbose:
logger.debug(f"Gemini CLI found: {gemini_path}")
logger.success("Gemini CLI available")

# Validate repository
logger.step("Validating repository...", 2, 4)

Expand Down Expand Up @@ -342,6 +410,14 @@ def generate_command(
elif config.agent_instructions and not config.agent_instructions.is_empty():
agent_instructions_dict = config.agent_instructions.to_dict()

# Log Claude Code mode if enabled
if use_claude_code and verbose:
logger.debug("Claude Code CLI mode enabled")

# Log Gemini Code mode if enabled
if use_gemini_code and verbose:
logger.debug("Gemini CLI mode enabled (large context window)")

# Create generator
generator = CLIDocumentationGenerator(
repo_path=repo_path,
Expand All @@ -359,9 +435,13 @@ def generate_command(
'max_token_per_leaf_module': max_token_per_leaf_module if max_token_per_leaf_module is not None else config.max_token_per_leaf_module,
# Max depth setting (runtime override takes precedence)
'max_depth': max_depth if max_depth is not None else config.max_depth,
# CLI integrations
'use_claude_code': use_claude_code,
'use_gemini_code': use_gemini_code,
},
verbose=verbose,
generate_html=github_pages
generate_html=github_pages,
target_file=str(file) if file else None
)

# Run generation
Expand Down
43 changes: 41 additions & 2 deletions codewiki/src/be/agent_tools/generate_sub_module_documentations.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from pydantic_ai import RunContext, Tool, Agent
from typing import Union, Any

from codewiki.src.be.agent_tools.deps import CodeWikiDeps
from codewiki.src.be.agent_tools.read_code_components import read_code_components_tool
Expand All @@ -12,16 +13,54 @@
logger = logging.getLogger(__name__)


def normalize_sub_module_specs(specs: Union[dict[str, list[str]], list[dict]]) -> dict[str, list[str]]:
"""Normalize sub_module_specs to dict format.

Handles both formats:
- Dict format (Claude): {"module_name": ["comp1", "comp2"], ...}
- List format (GPT/Azure OpenAI): [{"name": "module_name", "components": ["comp1", "comp2"]}, ...]

Also handles variations in key names that GPT models might use.
"""
if isinstance(specs, dict):
return specs

if isinstance(specs, list):
result = {}
for item in specs:
if isinstance(item, dict):
# Try different key names that GPT models might use
name = item.get('name') or item.get('module_name') or item.get('sub_module_name') or item.get('submodule_name')
components = item.get('components') or item.get('core_components') or item.get('core_component_ids') or item.get('files') or []

if name:
result[name] = components if isinstance(components, list) else [components]
return result

# Fallback: return empty dict
logger.warning(f"Unexpected sub_module_specs format: {type(specs)}")
return {}



async def generate_sub_module_documentation(
ctx: RunContext[CodeWikiDeps],
sub_module_specs: dict[str, list[str]]
sub_module_specs: dict[str, list[str]] | list[dict[str, Any]]
) -> str:
"""Generate detailed description of a given sub-module specs to the sub-agents

Args:
sub_module_specs: The specs of the sub-modules to generate documentation for. E.g. {"sub_module_1": ["core_component_1.1", "core_component_1.2"], "sub_module_2": ["core_component_2.1", "core_component_2.2"], ...}
sub_module_specs: The specs of the sub-modules to generate documentation for.
Accepts two formats:
- Dict format: {"sub_module_1": ["core_component_1.1", "core_component_1.2"], "sub_module_2": ["core_component_2.1", "core_component_2.2"], ...}
- List format: [{"name": "sub_module_1", "components": ["core_component_1.1", "core_component_1.2"]}, ...]
"""
# Normalize the input to dict format (handles both Claude and GPT model outputs)
sub_module_specs = normalize_sub_module_specs(sub_module_specs)

if not sub_module_specs:
logger.warning("No valid sub-module specs provided after normalization")
return "No valid sub-module specs provided."

deps = ctx.deps
previous_module_name = deps.current_module_name
Expand Down
Loading