e53e7662e9
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
53 lines
1.5 KiB
Go
53 lines
1.5 KiB
Go
package parse_test
|
|
|
|
import (
|
|
"strings"
|
|
"testing"
|
|
|
|
"qbank/internal/parse"
|
|
)
|
|
|
|
func TestChunk(t *testing.T) {
|
|
t.Run("small text stays in one chunk", func(t *testing.T) {
|
|
text := "Para one.\n\nPara two.\n\nPara three."
|
|
chunks := parse.Chunk(text, 1000)
|
|
if len(chunks) != 1 {
|
|
t.Errorf("want 1 chunk, got %d: %v", len(chunks), chunks)
|
|
}
|
|
if !strings.Contains(chunks[0], "Para one") || !strings.Contains(chunks[0], "Para three") {
|
|
t.Errorf("content lost: %q", chunks[0])
|
|
}
|
|
})
|
|
|
|
t.Run("paragraphs split when over limit", func(t *testing.T) {
|
|
para := strings.Repeat("x", 600)
|
|
text := para + "\n\n" + para + "\n\n" + para
|
|
chunks := parse.Chunk(text, 1000)
|
|
if len(chunks) < 2 {
|
|
t.Errorf("want ≥2 chunks for 1800-rune input with 1000 limit, got %d", len(chunks))
|
|
}
|
|
// No chunk should combine paragraphs past the limit
|
|
for i, c := range chunks {
|
|
if len([]rune(c)) > 1200 {
|
|
t.Errorf("chunk %d is %d runes, too large", i, len([]rune(c)))
|
|
}
|
|
}
|
|
})
|
|
|
|
t.Run("single oversized paragraph kept as own chunk", func(t *testing.T) {
|
|
bigPara := strings.Repeat("x", 2000)
|
|
chunks := parse.Chunk(bigPara, 1000)
|
|
if len(chunks) != 1 {
|
|
t.Errorf("want 1 chunk for single oversized para, got %d", len(chunks))
|
|
}
|
|
})
|
|
|
|
t.Run("empty paragraphs ignored", func(t *testing.T) {
|
|
text := "\n\nPara one.\n\n\n\nPara two.\n\n"
|
|
chunks := parse.Chunk(text, 1000)
|
|
if len(chunks) != 1 {
|
|
t.Errorf("want 1 chunk after ignoring blanks, got %d", len(chunks))
|
|
}
|
|
})
|
|
}
|