Phase 3: PDF/DOCX extraction, chunking, LLM client with mock interface
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,62 @@
|
||||
package parse_test
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"bytes"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"qbank/internal/parse"
|
||||
)
|
||||
|
||||
func TestExtractDOCX(t *testing.T) {
|
||||
const docXML = `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
|
||||
<w:body>
|
||||
<w:p><w:r><w:t>Question 1: What is Go?</w:t></w:r></w:p>
|
||||
<w:p><w:r><w:t>A) A compiled language</w:t></w:r></w:p>
|
||||
<w:p><w:r><w:t>B) An interpreted language</w:t></w:r></w:p>
|
||||
<w:p><w:r><w:t>C) A markup language</w:t></w:r></w:p>
|
||||
</w:body>
|
||||
</w:document>`
|
||||
|
||||
docx := buildDocx(t, docXML)
|
||||
|
||||
text, err := parse.ExtractDOCX(bytes.NewReader(docx))
|
||||
if err != nil {
|
||||
t.Fatalf("ExtractDOCX: %v", err)
|
||||
}
|
||||
for _, want := range []string{"Question 1", "compiled language", "interpreted language"} {
|
||||
if !strings.Contains(text, want) {
|
||||
t.Errorf("output missing %q; got:\n%s", want, text)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractDOCX_MissingXML(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
w := zip.NewWriter(&buf)
|
||||
w.Close()
|
||||
|
||||
_, err := parse.ExtractDOCX(bytes.NewReader(buf.Bytes()))
|
||||
if err == nil {
|
||||
t.Error("expected error for docx without document.xml")
|
||||
}
|
||||
}
|
||||
|
||||
func buildDocx(t *testing.T, xmlContent string) []byte {
|
||||
t.Helper()
|
||||
var buf bytes.Buffer
|
||||
w := zip.NewWriter(&buf)
|
||||
f, err := w.Create("word/document.xml")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if _, err := f.Write([]byte(xmlContent)); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := w.Close(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
return buf.Bytes()
|
||||
}
|
||||
Reference in New Issue
Block a user