@@ -17,11 +17,11 @@ function getTokenCount(text: string): number {
1717}
1818
1919/**
20- * Configuration for JSON/YAML chunking (aligned with Chonkie standards)
20+ * Configuration for JSON/YAML chunking
2121 * Reduced limits to ensure we stay well under OpenAI's 8,191 token limit per embedding request
2222 */
2323const JSON_YAML_CHUNKING_CONFIG = {
24- TARGET_CHUNK_SIZE : 1024 , // Target tokens per chunk (aligned with Chonkie)
24+ TARGET_CHUNK_SIZE : 1024 , // Target tokens per chunk
2525 MIN_CHARACTERS_PER_CHUNK : 100 , // Minimum characters per chunk to filter tiny fragments
2626 MAX_CHUNK_SIZE : 1500 , // Maximum tokens per chunk
2727 MAX_DEPTH_FOR_SPLITTING : 5 , // Maximum depth to traverse for splitting
@@ -100,7 +100,7 @@ export class JsonYamlChunker {
100100 const content = JSON . stringify ( data , null , 2 )
101101 const tokenCount = getTokenCount ( content )
102102
103- // Filter tiny fragments using character count (Chonkie standard)
103+ // Filter tiny fragments using character count
104104 if ( content . length >= this . minCharactersPerChunk ) {
105105 chunks . push ( {
106106 text : content ,
@@ -320,7 +320,7 @@ export class JsonYamlChunker {
320320 }
321321 }
322322
323- // Filter tiny fragments using character count (Chonkie standard)
323+ // Filter tiny fragments using character count
324324 if ( currentChunk && currentChunk . length >= this . minCharactersPerChunk ) {
325325 chunks . push ( {
326326 text : currentChunk ,
0 commit comments