feat: initial CodeBoard monorepo scaffold
Turborepo monorepo with npm workspaces: - apps/web: Next.js 14 frontend with Tailwind v4, SSE progress, doc viewer - apps/worker: BullMQ job processor (clone → parse → LLM generate) - packages/shared: TypeScript types - packages/parser: Babel-based AST parser (JS/TS) + regex (Python) - packages/llm: OpenAI/Anthropic provider abstraction + prompt pipeline - packages/diagrams: Mermaid architecture & dependency graph generators - packages/database: Prisma schema (PostgreSQL) - Docker multi-stage build (web + worker targets) All packages compile successfully with tsc and next build.
This commit is contained in:
157
packages/parser/src/languages/python.ts
Normal file
157
packages/parser/src/languages/python.ts
Normal file
@@ -0,0 +1,157 @@
|
||||
import type {
|
||||
FileNode,
|
||||
FunctionNode,
|
||||
ClassNode,
|
||||
ImportNode,
|
||||
ExportNode,
|
||||
} from "@codeboard/shared";
|
||||
import type { LanguageParser } from "./base.js";
|
||||
|
||||
const FUNC_RE = /^(\s*)def\s+(\w+)\s*\(([^)]*)\)(?:\s*->\s*([^:]+))?\s*:/gm;
|
||||
const CLASS_RE = /^(\s*)class\s+(\w+)(?:\(([^)]*)\))?\s*:/gm;
|
||||
const IMPORT_RE = /^(?:from\s+([\w.]+)\s+)?import\s+(.+)$/gm;
|
||||
const DOCSTRING_RE = /^\s*(?:"""([\s\S]*?)"""|'''([\s\S]*?)''')/;
|
||||
|
||||
function parseParams(raw: string): string[] {
|
||||
if (!raw.trim()) return [];
|
||||
return raw
|
||||
.split(",")
|
||||
.map((p) => p.trim().split(":")[0].split("=")[0].trim())
|
||||
.filter((p) => p && p !== "self" && p !== "cls");
|
||||
}
|
||||
|
||||
export const pythonParser: LanguageParser = {
|
||||
extensions: [".py"],
|
||||
|
||||
parse(content: string, filePath: string): FileNode {
|
||||
const lines = content.split("\n");
|
||||
const functions: FunctionNode[] = [];
|
||||
const classes: ClassNode[] = [];
|
||||
const imports: ImportNode[] = [];
|
||||
const exports: ExportNode[] = [];
|
||||
|
||||
let match: RegExpExecArray | null;
|
||||
|
||||
FUNC_RE.lastIndex = 0;
|
||||
while ((match = FUNC_RE.exec(content)) !== null) {
|
||||
const indent = match[1].length;
|
||||
const name = match[2];
|
||||
const params = parseParams(match[3]);
|
||||
const returnType = match[4]?.trim();
|
||||
const lineStart =
|
||||
content.substring(0, match.index).split("\n").length;
|
||||
|
||||
let lineEnd = lineStart;
|
||||
for (let i = lineStart; i < lines.length; i++) {
|
||||
const line = lines[i];
|
||||
if (
|
||||
i > lineStart &&
|
||||
line.trim() &&
|
||||
!line.startsWith(" ".repeat(indent + 1)) &&
|
||||
!line.startsWith("\t".repeat(indent === 0 ? 1 : indent))
|
||||
) {
|
||||
lineEnd = i;
|
||||
break;
|
||||
}
|
||||
lineEnd = i + 1;
|
||||
}
|
||||
|
||||
let docstring: string | undefined;
|
||||
if (lineStart < lines.length) {
|
||||
const bodyStart = lines.slice(lineStart, lineStart + 5).join("\n");
|
||||
const docMatch = DOCSTRING_RE.exec(bodyStart);
|
||||
if (docMatch) {
|
||||
docstring = (docMatch[1] ?? docMatch[2]).trim();
|
||||
}
|
||||
}
|
||||
|
||||
if (indent === 0) {
|
||||
functions.push({
|
||||
name,
|
||||
params,
|
||||
returnType,
|
||||
lineStart,
|
||||
lineEnd,
|
||||
docstring,
|
||||
calls: [],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
CLASS_RE.lastIndex = 0;
|
||||
while ((match = CLASS_RE.exec(content)) !== null) {
|
||||
const name = match[2];
|
||||
const methods: FunctionNode[] = [];
|
||||
const classLineStart =
|
||||
content.substring(0, match.index).split("\n").length;
|
||||
|
||||
const classBody = content.substring(match.index + match[0].length);
|
||||
const methodRe = /^\s{2,}def\s+(\w+)\s*\(([^)]*)\)(?:\s*->\s*([^:]+))?\s*:/gm;
|
||||
let methodMatch: RegExpExecArray | null;
|
||||
while ((methodMatch = methodRe.exec(classBody)) !== null) {
|
||||
const methodLineStart =
|
||||
classLineStart +
|
||||
classBody.substring(0, methodMatch.index).split("\n").length;
|
||||
methods.push({
|
||||
name: methodMatch[1],
|
||||
params: parseParams(methodMatch[2]),
|
||||
returnType: methodMatch[3]?.trim(),
|
||||
lineStart: methodLineStart,
|
||||
lineEnd: methodLineStart + 1,
|
||||
calls: [],
|
||||
});
|
||||
}
|
||||
|
||||
classes.push({ name, methods, properties: [] });
|
||||
}
|
||||
|
||||
IMPORT_RE.lastIndex = 0;
|
||||
while ((match = IMPORT_RE.exec(content)) !== null) {
|
||||
const fromModule = match[1];
|
||||
const importedNames = match[2]
|
||||
.split(",")
|
||||
.map((s) => s.trim().split(" as ")[0].trim())
|
||||
.filter(Boolean);
|
||||
|
||||
if (fromModule) {
|
||||
imports.push({ source: fromModule, specifiers: importedNames });
|
||||
} else {
|
||||
for (const name of importedNames) {
|
||||
imports.push({ source: name, specifiers: [name] });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const allRe = /^__all__\s*=\s*\[([^\]]*)\]/m;
|
||||
const allMatch = allRe.exec(content);
|
||||
if (allMatch) {
|
||||
const names = allMatch[1]
|
||||
.split(",")
|
||||
.map((s) => s.trim().replace(/['"]/g, ""))
|
||||
.filter(Boolean);
|
||||
for (const name of names) {
|
||||
exports.push({ name, isDefault: false });
|
||||
}
|
||||
}
|
||||
|
||||
let complexity = 0;
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (trimmed.startsWith("if ") || trimmed.startsWith("elif ")) complexity++;
|
||||
if (trimmed.startsWith("for ") || trimmed.startsWith("while ")) complexity++;
|
||||
if (trimmed.startsWith("except")) complexity++;
|
||||
if (trimmed.includes(" and ") || trimmed.includes(" or ")) complexity++;
|
||||
}
|
||||
|
||||
return {
|
||||
path: filePath,
|
||||
language: "python",
|
||||
size: content.length,
|
||||
functions,
|
||||
classes,
|
||||
imports,
|
||||
exports,
|
||||
complexity,
|
||||
};
|
||||
},
|
||||
};
|
||||
Reference in New Issue
Block a user