Phase 3: PDF/DOCX extraction, chunking, LLM client with mock interface

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Jānis Kacēns
2026-05-11 13:03:04 +03:00
parent d9de37d3d8
commit e53e7662e9
13 changed files with 628 additions and 0 deletions
+37
View File
@@ -0,0 +1,37 @@
package parse
import (
"strings"
"testing"
)
func TestIsGibberish(t *testing.T) {
tests := []struct {
name string
text string
want bool
}{
{"empty", "", true},
{"too short", "hello", true},
{"exactly 50 letters", strings.Repeat("a", 50), false},
{"49 letters", strings.Repeat("a", 49), true},
{"all punctuation", strings.Repeat(".", 100), true},
{"1% alpha", strings.Repeat(".", 99) + "a", true},
{"2% alpha exactly", strings.Repeat(".", 49) + "a" + strings.Repeat(".", 49) + "a", false},
{"normal text", "The quick brown fox jumps over the lazy dog. " + strings.Repeat("word ", 10), false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := isGibberish(tt.text); got != tt.want {
t.Errorf("isGibberish(%q…) = %v, want %v", tt.text[:min(len(tt.text), 20)], got, tt.want)
}
})
}
}
func min(a, b int) int {
if a < b {
return a
}
return b
}