package parse_test import ( "bytes" "strings" "testing" "qbank/internal/parse" ) // TestAcceptanceDOCXPipeline verifies the full DOCX → text → chunk pipeline // using a handcrafted in-memory docx with known content. func TestAcceptanceDOCXPipeline(t *testing.T) { const docXML = ` 1. Which keyword declares a variable in Go? A) var B) let C) dim Correct: A 2. What does fmt.Println return? A) Nothing B) n int, err error Correct: B ` docx := buildDocx(t, docXML) text, err := parse.ExtractDOCX(bytes.NewReader(docx)) if err != nil { t.Fatalf("ExtractDOCX: %v", err) } wantPhrases := []string{ "Which keyword declares a variable", "fmt.Println", "n int, err error", } for _, phrase := range wantPhrases { if !strings.Contains(text, phrase) { t.Errorf("text missing %q\nfull text:\n%s", phrase, text) } } // Chunking should produce at least 1 chunk. chunks := parse.Chunk(text, 10_000) if len(chunks) == 0 { t.Error("Chunk returned 0 chunks for non-empty text") } t.Logf("extracted %d chars, %d chunk(s)", len(text), len(chunks)) }