diff --git a/build.ts b/build.ts new file mode 100644 index 0000000..9adf48e --- /dev/null +++ b/build.ts @@ -0,0 +1,5 @@ +await Bun.build({ + entrypoints: ['./src/cli.ts'], + outdir: './dist', + target: 'node', +}); \ No newline at end of file diff --git a/bun.lockb b/bun.lockb new file mode 100755 index 0000000..b145f6c Binary files /dev/null and b/bun.lockb differ diff --git a/package.json b/package.json index fd248ea..7ebf628 100644 --- a/package.json +++ b/package.json @@ -10,7 +10,7 @@ "dist" ], "scripts": { - "build": "rm -rf dist && mkdir dist && cp src/*.js dist/", + "build": "rm -rf dist && bun run build.ts", "test": "echo \"No tests specified\" && exit 0", "prepublishOnly": "npm run build", "dev": "npx .", @@ -31,6 +31,10 @@ "@eslint/js": "^9.14.0", "eslint": "^9.14.0", "globals": "^15.12.0", - "prettier": "^3.3.3" + "prettier": "^3.3.3", + "bun": "latest", + "@types/bun": "latest", + "@types/node": "^22.9.1", + "@types/micromatch": "^4.0.9" } } diff --git a/src/MarkdownGenerator.js b/src/MarkdownGenerator.ts similarity index 65% rename from src/MarkdownGenerator.js rename to src/MarkdownGenerator.ts index e29a106..53f7292 100644 --- a/src/MarkdownGenerator.js +++ b/src/MarkdownGenerator.ts @@ -1,4 +1,4 @@ -// MarkdownGenerator.js +// MarkdownGenerator.ts import path from 'path'; import { execSync } from 'child_process'; @@ -6,17 +6,18 @@ import { readFile, writeFile } from 'fs/promises'; import llama3Tokenizer from 'llama3-tokenizer-js'; import { TokenCleaner } from './TokenCleaner.js'; import micromatch from 'micromatch'; +import fileTypeExclusions from './fileTypeExclusions.js'; +import fileExclusions from './fileExclusions.js'; -/** - * @typedef {Object} MarkdownGeneratorOptions - * @property {string} [dir='.'] - The directory to process files from - * @property {string} [outputFilePath='./prompt.md'] - Path where the output markdown file will be saved - * @property {Set} [fileTypeExclusions] - Set of file extensions to exclude - * @property {string[]} [fileExclusions] - Array of specific files or patterns to exclude - * @property {Object} [customPatterns] - Custom patterns for token cleaning - * @property {Object} [customSecretPatterns] - Custom patterns for identifying and redacting secrets - * @property {boolean} [verbose=true] - Whether to log detailed information during processing - */ +interface MarkdownGeneratorOptions { + dir?: string; + outputFilePath?: string; + fileTypeExclusions?: Set; + fileExclusions?: string[]; + customPatterns?: Record; + customSecretPatterns?: Record; + verbose?: boolean; +} /** * @class MarkdownGenerator @@ -24,148 +25,26 @@ import micromatch from 'micromatch'; * It can exclude specific file types and files, clean tokens, and include todo lists. */ export class MarkdownGenerator { + private dir: string; + private outputFilePath: string; + private fileTypeExclusions: Set; + private fileExclusions: string[]; + private tokenCleaner: TokenCleaner; + private verbose: boolean; + /** * Creates an instance of MarkdownGenerator. * @param {MarkdownGeneratorOptions} [options={}] - Configuration options for the generator */ - constructor(options = {}) { + constructor(options: MarkdownGeneratorOptions = {}) { this.dir = options.dir || '.'; this.outputFilePath = options.outputFilePath || './prompt.md'; this.fileTypeExclusions = new Set( - options.fileTypeExclusions || [ - // Images - '.jpg', - '.jpeg', - '.png', - '.gif', - '.bmp', - '.svg', - '.webp', - '.tiff', - '.ico', - - // Fonts - '.ttf', - '.woff', - '.woff2', - '.eot', - '.otf', - - // Lock files - '.lock', - '.lockb', - - // Config files - '.yaml', - '.yml', - '.toml', - '.conf', - - // Binary and compiled - '.exe', - '.dll', - '.so', - '.dylib', - '.bin', - '.dat', - '.pyc', - '.pyo', - '.class', - '.jar', - - // Archives - '.zip', - '.tar', - '.gz', - '.rar', - '.7z', - - // Media - '.mp3', - '.mp4', - '.avi', - '.mov', - '.wav', - - // Database - '.db', - '.sqlite', - '.sqlite3' - ] + options.fileTypeExclusions || fileTypeExclusions, ); - this.fileExclusions = options.fileExclusions || [ - // Config patterns - '**/.*rc', - '**/.*rc.{js,json,yaml,yml}', - '**/*.config.{js,ts}', - '**/tsconfig.json', - '**/tsconfig*.json', - '**/jsconfig.json', - '**/jsconfig*.json', - '**/package-lock.json', - '**/.prettierignore', - // Environment and variables - '**/.env*', - '**/*.vars', - '**/secrets.*', - - // Version control - '**/.git*', - '**/.hg*', - '**/.svn*', - '**/CVS', - '**/.github/', - - // CI/CD - '**/.gitlab-ci.yml', - '**/azure-pipelines.yml', - '**/jenkins*', - - // Dependency directories - '**/node_modules/', - '**/target/', - '**/__pycache__/', - '**/venv/', - '**/.venv/', - '**/env/', - '**/build/', - '**/dist/', - '**/out/', - '**/bin/', - '**/obj/', - - // Documentation - '**/README*', - '**/CHANGELOG*', - '**/CONTRIBUTING*', - '**/LICENSE*', - '**/docs/', - '**/documentation/', - - // IDE and editors - '**/.{idea,vscode,eclipse,settings,zed,cursor}/', - '**/.project', - '**/.classpath', - '**/.factorypath', - - // Test and data - '**/test{s,}/', - '**/spec/', - '**/fixtures/', - '**/testdata/', - '**/__tests__/', - '**/*.{test,spec}.*', - '**/coverage/', - '**/jest.config.*', - - // Logs and temporary files - '**/logs/', - '**/tmp/', - '**/temp/', - '**/*.log' - ]; + this.fileExclusions = options.fileExclusions || fileExclusions; this.tokenCleaner = new TokenCleaner(options.customPatterns, options.customSecretPatterns); this.verbose = options.verbose !== undefined ? options.verbose : true; @@ -177,7 +56,7 @@ export class MarkdownGenerator { * @returns {Promise} Array of tracked file paths that aren't excluded * @throws {Error} When unable to execute git command or access files */ - async getTrackedFiles() { + async getTrackedFiles(): Promise { try { const output = this.execCommand('git ls-files'); const trackedFiles = output.split('\n').filter(file => file.trim().length > 0); @@ -210,7 +89,7 @@ export class MarkdownGenerator { * @returns {Promise} Cleaned and redacted content of the file * @throws {Error} When unable to read or process the file */ - async readFileContent(filePath) { + async readFileContent(filePath: string): Promise { try { const content = await readFile(filePath, 'utf-8'); const cleanedAndRedactedContent = this.tokenCleaner.cleanAndRedact(content); @@ -233,7 +112,7 @@ export class MarkdownGenerator { * @returns {Promise} Generated markdown content containing all processed files * @throws {Error} When unable to generate markdown content */ - async generateMarkdown() { + async generateMarkdown(): Promise { const trackedFiles = await this.getTrackedFiles(); if (this.verbose) { console.log(`Generating markdown for ${trackedFiles.length} files`); @@ -258,18 +137,18 @@ export class MarkdownGenerator { * @returns {Promise} Content of the todo file * @throws {Error} When unable to read or create the todo file */ - async getTodo() { + async getTodo(): Promise { const todoPath = path.join(this.dir, 'todo'); try { if (this.verbose) { console.log('Reading todo file'); } return await readFile(todoPath, 'utf-8'); - } catch (error) { + } catch (error: any) { if (error.code === 'ENOENT') { // File does not exist if (this.verbose) { - console.log("File not found, creating a new 'todo' file."); + console.log('File not found, creating a new \'todo\' file.'); } await writeFile(todoPath, ''); // Create an empty 'todo' file return await this.getTodo(); // Await the recursive call @@ -290,7 +169,7 @@ export class MarkdownGenerator { * @returns {Error} [result.error] - Error object if operation failed * @throws {Error} When unable to create or write the markdown document */ - async createMarkdownDocument() { + async createMarkdownDocument(): Promise<{ success: boolean, tokenCount?: number, error?: Error }> { try { const codeMarkdown = await this.generateMarkdown(); const todos = await this.getTodo(); @@ -317,7 +196,7 @@ export class MarkdownGenerator { * @throws {Error} When command execution fails * @private */ - execCommand(command) { + private execCommand(command: string): string { try { return execSync(command, { cwd: this.dir, encoding: 'utf-8' }).toString().trim(); } catch (error) { @@ -327,4 +206,4 @@ export class MarkdownGenerator { throw error; } } -} +} \ No newline at end of file diff --git a/src/TokenCleaner.js b/src/TokenCleaner.ts similarity index 83% rename from src/TokenCleaner.js rename to src/TokenCleaner.ts index 88a6205..c4eb9eb 100644 --- a/src/TokenCleaner.js +++ b/src/TokenCleaner.ts @@ -1,5 +1,11 @@ export class TokenCleaner { - constructor(customPatterns = [], customSecretPatterns = []) { + patterns: { regex: RegExp; replacement: string }[]; + secretPatterns: { regex: RegExp; replacement: string }[]; + + constructor(customPatterns: { regex: RegExp; replacement: string }[] = [], customSecretPatterns: { + regex: RegExp; + replacement: string + }[] = []) { this.patterns = [ { regex: /\/\/.*$/gm, replacement: '' }, { regex: /\/\*[\s\S]*?\*\//gm, replacement: '' }, @@ -40,22 +46,22 @@ export class TokenCleaner { ]; } - clean(code) { + clean(code: string): string { return this.patterns.reduce( (cleanCode, pattern) => cleanCode.replace(pattern.regex, pattern.replacement), code, ); } - redactSecrets(code) { + redactSecrets(code: string): string { return this.secretPatterns.reduce( (redactedCode, pattern) => redactedCode.replace(pattern.regex, pattern.replacement), code, ); } - cleanAndRedact(code) { + cleanAndRedact(code: string): string { const cleanedCode = this.clean(code); return this.redactSecrets(cleanedCode); } -} +} \ No newline at end of file diff --git a/src/cli.js b/src/cli.ts old mode 100755 new mode 100644 similarity index 71% rename from src/cli.js rename to src/cli.ts index de1bb05..4446bfb --- a/src/cli.js +++ b/src/cli.ts @@ -1,16 +1,16 @@ #!/usr/bin/env node -console.log("RUNNING TOKENIZER") +console.log('RUNNING TOKENIZER'); import { MarkdownGenerator } from './MarkdownGenerator.js'; const generator = new MarkdownGenerator(); generator .createMarkdownDocument() - .then(result => { + .then((result: { success: boolean }) => { if (!result.success) { process.exit(1); } }) - .catch(error => { + .catch((error: any) => { console.error('Error:', error); process.exit(1); - }); + }); \ No newline at end of file diff --git a/src/fileExclusions.ts b/src/fileExclusions.ts new file mode 100644 index 0000000..0e8b006 --- /dev/null +++ b/src/fileExclusions.ts @@ -0,0 +1,71 @@ +export default [ + // Config patterns + '**/.*rc', + '**/.*rc.{js,json,yaml,yml}', + '**/*.config.{js,ts}', + '**/tsconfig.json', + '**/tsconfig*.json', + '**/jsconfig.json', + '**/jsconfig*.json', + '**/package-lock.json', + '**/.prettierignore', + // Environment and variables + '**/.env*', + '**/*.vars', + '**/secrets.*', + + // Version control + '**/.git*', + '**/.hg*', + '**/.svn*', + '**/CVS', + '**/.github/', + + // CI/CD + '**/.gitlab-ci.yml', + '**/azure-pipelines.yml', + '**/jenkins*', + + // Dependency directories + '**/node_modules/', + '**/target/', + '**/__pycache__/', + '**/venv/', + '**/.venv/', + '**/env/', + '**/build/', + '**/dist/', + '**/out/', + '**/bin/', + '**/obj/', + + // Documentation + '**/README*', + '**/CHANGELOG*', + '**/CONTRIBUTING*', + '**/LICENSE*', + '**/docs/', + '**/documentation/', + + // IDE and editors + '**/.{idea,vscode,eclipse,settings,zed,cursor}/', + '**/.project', + '**/.classpath', + '**/.factorypath', + + // Test and data + '**/test{s,}/', + '**/spec/', + '**/fixtures/', + '**/testdata/', + '**/__tests__/', + '**/*.{test,spec}.*', + '**/coverage/', + '**/jest.config.*', + + // Logs and temporary files + '**/logs/', + '**/tmp/', + '**/temp/', + '**/*.log' +] \ No newline at end of file diff --git a/src/fileTypeExclusions.ts b/src/fileTypeExclusions.ts new file mode 100644 index 0000000..b1d3ba0 --- /dev/null +++ b/src/fileTypeExclusions.ts @@ -0,0 +1,60 @@ +export default [ + // Images + '.jpg', + '.jpeg', + '.png', + '.gif', + '.bmp', + '.svg', + '.webp', + '.tiff', + '.ico', + + // Fonts + '.ttf', + '.woff', + '.woff2', + '.eot', + '.otf', + + // Lock files + '.lock', + '.lockb', + + // Config files + '.yaml', + '.yml', + '.toml', + '.conf', + + // Binary and compiled + '.exe', + '.dll', + '.so', + '.dylib', + '.bin', + '.dat', + '.pyc', + '.pyo', + '.class', + '.jar', + + // Archives + '.zip', + '.tar', + '.gz', + '.rar', + '.7z', + + // Media + '.mp3', + '.mp4', + '.avi', + '.mov', + '.wav', + + // Database + '.db', + '.sqlite', + '.sqlite3' +] \ No newline at end of file diff --git a/src/index.js b/src/index.js deleted file mode 100644 index f4237ff..0000000 --- a/src/index.js +++ /dev/null @@ -1,2 +0,0 @@ -export { TokenCleaner } from './TokenCleaner.js'; -export { MarkdownGenerator } from './MarkdownGenerator.js'; diff --git a/src/index.ts b/src/index.ts new file mode 100644 index 0000000..3675345 --- /dev/null +++ b/src/index.ts @@ -0,0 +1,2 @@ +export { TokenCleaner } from './TokenCleaner'; +export { MarkdownGenerator } from './MarkdownGenerator'; diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..fcbc19a --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,28 @@ +{ + "compilerOptions": { + // Enable latest features + "lib": ["ESNext", "DOM"], + "target": "ESNext", + "module": "ESNext", + "moduleDetection": "force", + "jsx": "react-jsx", + "allowJs": true, + + // Bundler mode + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "verbatimModuleSyntax": true, + "noEmit": true, + t + + // Best practices + "strict": true, + "skipLibCheck": true, + "noFallthroughCasesInSwitch": true, + + // Some stricter flags (disabled by default) + "noUnusedLocals": false, + "noUnusedParameters": false, + "noPropertyAccessFromIndexSignature": false + } +}