import type { FileNode, FunctionNode, ClassNode, ImportNode, ExportNode, } from "@codeboard/shared"; import type { LanguageParser } from "./base.js"; const FUNC_RE = /^(\s*)def\s+(\w+)\s*\(([^)]*)\)(?:\s*->\s*([^:]+))?\s*:/gm; const CLASS_RE = /^(\s*)class\s+(\w+)(?:\(([^)]*)\))?\s*:/gm; const IMPORT_RE = /^(?:from\s+([\w.]+)\s+)?import\s+(.+)$/gm; const DOCSTRING_RE = /^\s*(?:"""([\s\S]*?)"""|'''([\s\S]*?)''')/; function parseParams(raw: string): string[] { if (!raw.trim()) return []; return raw .split(",") .map((p) => p.trim().split(":")[0].split("=")[0].trim()) .filter((p) => p && p !== "self" && p !== "cls"); } export const pythonParser: LanguageParser = { extensions: [".py"], parse(content: string, filePath: string): FileNode { const lines = content.split("\n"); const functions: FunctionNode[] = []; const classes: ClassNode[] = []; const imports: ImportNode[] = []; const exports: ExportNode[] = []; let match: RegExpExecArray | null; FUNC_RE.lastIndex = 0; while ((match = FUNC_RE.exec(content)) !== null) { const indent = match[1].length; const name = match[2]; const params = parseParams(match[3]); const returnType = match[4]?.trim(); const lineStart = content.substring(0, match.index).split("\n").length; let lineEnd = lineStart; for (let i = lineStart; i < lines.length; i++) { const line = lines[i]; if ( i > lineStart && line.trim() && !line.startsWith(" ".repeat(indent + 1)) && !line.startsWith("\t".repeat(indent === 0 ? 1 : indent)) ) { lineEnd = i; break; } lineEnd = i + 1; } let docstring: string | undefined; if (lineStart < lines.length) { const bodyStart = lines.slice(lineStart, lineStart + 5).join("\n"); const docMatch = DOCSTRING_RE.exec(bodyStart); if (docMatch) { docstring = (docMatch[1] ?? docMatch[2]).trim(); } } if (indent === 0) { functions.push({ name, params, returnType, lineStart, lineEnd, docstring, calls: [], }); } } CLASS_RE.lastIndex = 0; while ((match = CLASS_RE.exec(content)) !== null) { const name = match[2]; const methods: FunctionNode[] = []; const classLineStart = content.substring(0, match.index).split("\n").length; const classBody = content.substring(match.index + match[0].length); const methodRe = /^\s{2,}def\s+(\w+)\s*\(([^)]*)\)(?:\s*->\s*([^:]+))?\s*:/gm; let methodMatch: RegExpExecArray | null; while ((methodMatch = methodRe.exec(classBody)) !== null) { const methodLineStart = classLineStart + classBody.substring(0, methodMatch.index).split("\n").length; methods.push({ name: methodMatch[1], params: parseParams(methodMatch[2]), returnType: methodMatch[3]?.trim(), lineStart: methodLineStart, lineEnd: methodLineStart + 1, calls: [], }); } classes.push({ name, methods, properties: [] }); } IMPORT_RE.lastIndex = 0; while ((match = IMPORT_RE.exec(content)) !== null) { const fromModule = match[1]; const importedNames = match[2] .split(",") .map((s) => s.trim().split(" as ")[0].trim()) .filter(Boolean); if (fromModule) { imports.push({ source: fromModule, specifiers: importedNames }); } else { for (const name of importedNames) { imports.push({ source: name, specifiers: [name] }); } } } const allRe = /^__all__\s*=\s*\[([^\]]*)\]/m; const allMatch = allRe.exec(content); if (allMatch) { const names = allMatch[1] .split(",") .map((s) => s.trim().replace(/['"]/g, "")) .filter(Boolean); for (const name of names) { exports.push({ name, isDefault: false }); } } let complexity = 0; for (const line of lines) { const trimmed = line.trim(); if (trimmed.startsWith("if ") || trimmed.startsWith("elif ")) complexity++; if (trimmed.startsWith("for ") || trimmed.startsWith("while ")) complexity++; if (trimmed.startsWith("except")) complexity++; if (trimmed.includes(" and ") || trimmed.includes(" or ")) complexity++; } return { path: filePath, language: "python", size: content.length, functions, classes, imports, exports, complexity, }; }, };