Phase 3: PDF/DOCX extraction, chunking, LLM client with mock interface
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,37 @@
|
||||
package parse
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestIsGibberish(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
text string
|
||||
want bool
|
||||
}{
|
||||
{"empty", "", true},
|
||||
{"too short", "hello", true},
|
||||
{"exactly 50 letters", strings.Repeat("a", 50), false},
|
||||
{"49 letters", strings.Repeat("a", 49), true},
|
||||
{"all punctuation", strings.Repeat(".", 100), true},
|
||||
{"1% alpha", strings.Repeat(".", 99) + "a", true},
|
||||
{"2% alpha exactly", strings.Repeat(".", 49) + "a" + strings.Repeat(".", 49) + "a", false},
|
||||
{"normal text", "The quick brown fox jumps over the lazy dog. " + strings.Repeat("word ", 10), false},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := isGibberish(tt.text); got != tt.want {
|
||||
t.Errorf("isGibberish(%q…) = %v, want %v", tt.text[:min(len(tt.text), 20)], got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func min(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
Reference in New Issue
Block a user