Phase 3: PDF/DOCX extraction, chunking, LLM client with mock interface
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,53 @@
|
||||
package parse_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"qbank/internal/parse"
|
||||
)
|
||||
|
||||
// TestAcceptanceDOCXPipeline verifies the full DOCX → text → chunk pipeline
|
||||
// using a handcrafted in-memory docx with known content.
|
||||
func TestAcceptanceDOCXPipeline(t *testing.T) {
|
||||
const docXML = `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
|
||||
<w:body>
|
||||
<w:p><w:r><w:t>1. Which keyword declares a variable in Go?</w:t></w:r></w:p>
|
||||
<w:p><w:r><w:t>A) var</w:t></w:r></w:p>
|
||||
<w:p><w:r><w:t>B) let</w:t></w:r></w:p>
|
||||
<w:p><w:r><w:t>C) dim</w:t></w:r></w:p>
|
||||
<w:p><w:r><w:t>Correct: A</w:t></w:r></w:p>
|
||||
<w:p><w:r><w:t>2. What does fmt.Println return?</w:t></w:r></w:p>
|
||||
<w:p><w:r><w:t>A) Nothing</w:t></w:r></w:p>
|
||||
<w:p><w:r><w:t>B) n int, err error</w:t></w:r></w:p>
|
||||
<w:p><w:r><w:t>Correct: B</w:t></w:r></w:p>
|
||||
</w:body>
|
||||
</w:document>`
|
||||
|
||||
docx := buildDocx(t, docXML)
|
||||
|
||||
text, err := parse.ExtractDOCX(bytes.NewReader(docx))
|
||||
if err != nil {
|
||||
t.Fatalf("ExtractDOCX: %v", err)
|
||||
}
|
||||
|
||||
wantPhrases := []string{
|
||||
"Which keyword declares a variable",
|
||||
"fmt.Println",
|
||||
"n int, err error",
|
||||
}
|
||||
for _, phrase := range wantPhrases {
|
||||
if !strings.Contains(text, phrase) {
|
||||
t.Errorf("text missing %q\nfull text:\n%s", phrase, text)
|
||||
}
|
||||
}
|
||||
|
||||
// Chunking should produce at least 1 chunk.
|
||||
chunks := parse.Chunk(text, 10_000)
|
||||
if len(chunks) == 0 {
|
||||
t.Error("Chunk returned 0 chunks for non-empty text")
|
||||
}
|
||||
t.Logf("extracted %d chars, %d chunk(s)", len(text), len(chunks))
|
||||
}
|
||||
Reference in New Issue
Block a user