feat: initial CodeBoard monorepo scaffold
Turborepo monorepo with npm workspaces: - apps/web: Next.js 14 frontend with Tailwind v4, SSE progress, doc viewer - apps/worker: BullMQ job processor (clone → parse → LLM generate) - packages/shared: TypeScript types - packages/parser: Babel-based AST parser (JS/TS) + regex (Python) - packages/llm: OpenAI/Anthropic provider abstraction + prompt pipeline - packages/diagrams: Mermaid architecture & dependency graph generators - packages/database: Prisma schema (PostgreSQL) - Docker multi-stage build (web + worker targets) All packages compile successfully with tsc and next build.
This commit is contained in:
121
packages/parser/src/file-walker.ts
Normal file
121
packages/parser/src/file-walker.ts
Normal file
@@ -0,0 +1,121 @@
|
||||
import { readdir, stat, readFile } from "node:fs/promises";
|
||||
import { join, relative, extname, basename } from "node:path";
|
||||
|
||||
const IGNORED_DIRS = new Set([
|
||||
"node_modules",
|
||||
".git",
|
||||
"dist",
|
||||
"build",
|
||||
"vendor",
|
||||
"__pycache__",
|
||||
".next",
|
||||
".turbo",
|
||||
"coverage",
|
||||
".venv",
|
||||
"venv",
|
||||
".tox",
|
||||
"target",
|
||||
".cache",
|
||||
".idea",
|
||||
".vscode",
|
||||
]);
|
||||
|
||||
const LANGUAGE_MAP: Record<string, string> = {
|
||||
".ts": "typescript",
|
||||
".tsx": "typescript",
|
||||
".js": "javascript",
|
||||
".jsx": "javascript",
|
||||
".mjs": "javascript",
|
||||
".cjs": "javascript",
|
||||
".py": "python",
|
||||
".go": "go",
|
||||
".rs": "rust",
|
||||
".java": "java",
|
||||
".rb": "ruby",
|
||||
".php": "php",
|
||||
".cs": "csharp",
|
||||
".cpp": "cpp",
|
||||
".c": "c",
|
||||
".h": "c",
|
||||
".hpp": "cpp",
|
||||
".swift": "swift",
|
||||
".kt": "kotlin",
|
||||
};
|
||||
|
||||
const ENTRY_POINT_NAMES = new Set([
|
||||
"index",
|
||||
"main",
|
||||
"app",
|
||||
"server",
|
||||
"mod",
|
||||
"lib",
|
||||
"__init__",
|
||||
"manage",
|
||||
]);
|
||||
|
||||
export interface WalkedFile {
|
||||
absolutePath: string;
|
||||
relativePath: string;
|
||||
language: string;
|
||||
size: number;
|
||||
isEntryPoint: boolean;
|
||||
}
|
||||
|
||||
async function walkDir(
|
||||
dir: string,
|
||||
rootDir: string,
|
||||
results: WalkedFile[]
|
||||
): Promise<void> {
|
||||
const entries = await readdir(dir, { withFileTypes: true });
|
||||
|
||||
for (const entry of entries) {
|
||||
if (IGNORED_DIRS.has(entry.name)) continue;
|
||||
if (entry.name.startsWith(".")) continue;
|
||||
|
||||
const fullPath = join(dir, entry.name);
|
||||
|
||||
if (entry.isDirectory()) {
|
||||
await walkDir(fullPath, rootDir, results);
|
||||
continue;
|
||||
}
|
||||
|
||||
const ext = extname(entry.name);
|
||||
const language = LANGUAGE_MAP[ext];
|
||||
if (!language) continue;
|
||||
|
||||
const fileStat = await stat(fullPath);
|
||||
if (fileStat.size > 500_000) continue;
|
||||
|
||||
const nameWithoutExt = basename(entry.name, ext);
|
||||
const isEntryPoint = ENTRY_POINT_NAMES.has(nameWithoutExt);
|
||||
|
||||
results.push({
|
||||
absolutePath: fullPath,
|
||||
relativePath: relative(rootDir, fullPath),
|
||||
language,
|
||||
size: fileStat.size,
|
||||
isEntryPoint,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export async function walkFiles(repoPath: string): Promise<WalkedFile[]> {
|
||||
const results: WalkedFile[] = [];
|
||||
await walkDir(repoPath, repoPath, results);
|
||||
|
||||
results.sort((a, b) => {
|
||||
if (a.isEntryPoint && !b.isEntryPoint) return -1;
|
||||
if (!a.isEntryPoint && b.isEntryPoint) return 1;
|
||||
return a.relativePath.localeCompare(b.relativePath);
|
||||
});
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
export async function readFileContent(filePath: string): Promise<string> {
|
||||
return readFile(filePath, "utf-8");
|
||||
}
|
||||
|
||||
export function detectLanguage(filePath: string): string | null {
|
||||
return LANGUAGE_MAP[extname(filePath)] ?? null;
|
||||
}
|
||||
Reference in New Issue
Block a user