Phase 4: upload, LLM extraction, import review flow
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -24,7 +24,7 @@ func Load() *Config {
|
||||
OpenAIAPIKey: os.Getenv("OPENAI_API_KEY"),
|
||||
SessionSecret: os.Getenv("SESSION_SECRET"),
|
||||
DataDir: envOr("DATA_DIR", "./data"),
|
||||
Port: envOr("PORT", "8080"),
|
||||
Port: envOr("PORT", "8079"),
|
||||
LLMModel: envOr("LLM_MODEL", "gpt-4o-mini"),
|
||||
}
|
||||
cfg.AdminUsers = parseAdminUsers(os.Getenv("ADMIN_USERS"))
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
package db
|
||||
|
||||
import (
|
||||
"crypto/rand"
|
||||
"crypto/sha256"
|
||||
"database/sql"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
@@ -373,3 +375,49 @@ func (r *Repo) GetStatsForUser(userID int64, questionIDs []string) (map[string]*
|
||||
}
|
||||
return result, rows.Err()
|
||||
}
|
||||
|
||||
// ── Draft (import review) ────────────────────────────────────────────────────
|
||||
|
||||
func newDraftID() string {
|
||||
b := make([]byte, 16)
|
||||
rand.Read(b)
|
||||
return hex.EncodeToString(b)
|
||||
}
|
||||
|
||||
func (r *Repo) CreateDraft(userID int64, source string, questions []models.DraftQuestion) (string, error) {
|
||||
data, err := json.Marshal(questions)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
id := newDraftID()
|
||||
_, err = r.db.Exec(
|
||||
"INSERT INTO import_drafts (id, user_id, source, questions) VALUES (?, ?, ?, ?)",
|
||||
id, userID, source, string(data),
|
||||
)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
|
||||
func (r *Repo) GetDraftForUser(id string, userID int64) (*models.Draft, error) {
|
||||
d := &models.Draft{}
|
||||
var questionsJSON, createdAt string
|
||||
err := r.db.QueryRow(
|
||||
"SELECT id, user_id, source, questions, created_at FROM import_drafts WHERE id = ? AND user_id = ?",
|
||||
id, userID,
|
||||
).Scan(&d.ID, &d.UserID, &d.Source, &questionsJSON, &createdAt)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
d.CreatedAt = parseTime(createdAt)
|
||||
if err := json.Unmarshal([]byte(questionsJSON), &d.Questions); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return d, nil
|
||||
}
|
||||
|
||||
func (r *Repo) DeleteDraft(id string) error {
|
||||
_, err := r.db.Exec("DELETE FROM import_drafts WHERE id = ?", id)
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -54,6 +54,14 @@ CREATE TABLE IF NOT EXISTS sessions (
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_sessions_expiry ON sessions(expiry);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS import_drafts (
|
||||
id TEXT PRIMARY KEY,
|
||||
user_id INTEGER NOT NULL REFERENCES users(id),
|
||||
source TEXT NOT NULL DEFAULT '',
|
||||
questions TEXT NOT NULL,
|
||||
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_test_answers_test ON test_answers(test_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_answers_question ON answers(question_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_stats_user ON user_question_stats(user_id);
|
||||
|
||||
@@ -9,6 +9,10 @@ import (
|
||||
"qbank/internal/auth"
|
||||
)
|
||||
|
||||
var tmplFuncs = template.FuncMap{
|
||||
"inc": func(i int) int { return i + 1 },
|
||||
}
|
||||
|
||||
// Renderer parses and executes HTML templates from a directory.
|
||||
type Renderer struct {
|
||||
dir string
|
||||
@@ -18,7 +22,7 @@ func NewRenderer(dir string) *Renderer { return &Renderer{dir: dir} }
|
||||
|
||||
// Render executes layout.html + <name>.html, passing data to the "layout" template.
|
||||
func (r *Renderer) Render(w http.ResponseWriter, status int, name string, data any) {
|
||||
t, err := template.ParseFiles(
|
||||
t, err := template.New("").Funcs(tmplFuncs).ParseFiles(
|
||||
filepath.Join(r.dir, "layout.html"),
|
||||
filepath.Join(r.dir, name+".html"),
|
||||
)
|
||||
|
||||
@@ -0,0 +1,233 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
|
||||
"qbank/internal/auth"
|
||||
"qbank/internal/db"
|
||||
"qbank/internal/llm"
|
||||
"qbank/internal/models"
|
||||
"qbank/internal/parse"
|
||||
)
|
||||
|
||||
type UploadHandler struct {
|
||||
auth *auth.Manager
|
||||
repo *db.Repo
|
||||
llm *llm.Client
|
||||
render *Renderer
|
||||
dataDir string
|
||||
}
|
||||
|
||||
func NewUploadHandler(a *auth.Manager, repo *db.Repo, llmClient *llm.Client, r *Renderer, dataDir string) *UploadHandler {
|
||||
return &UploadHandler{auth: a, repo: repo, llm: llmClient, render: r, dataDir: dataDir}
|
||||
}
|
||||
|
||||
func (h *UploadHandler) UploadGet(w http.ResponseWriter, r *http.Request) {
|
||||
data := BaseData(h.auth, r)
|
||||
h.render.Render(w, http.StatusOK, "upload", data)
|
||||
}
|
||||
|
||||
func (h *UploadHandler) UploadPost(w http.ResponseWriter, r *http.Request) {
|
||||
r.Body = http.MaxBytesReader(w, r.Body, 20<<20) // 20 MB
|
||||
|
||||
renderErr := func(msg string) {
|
||||
data := BaseData(h.auth, r)
|
||||
data["Error"] = msg
|
||||
h.render.Render(w, http.StatusUnprocessableEntity, "upload", data)
|
||||
}
|
||||
|
||||
if !h.auth.CheckCSRF(r) {
|
||||
HTTPError(w, http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
if err := r.ParseMultipartForm(20 << 20); err != nil {
|
||||
renderErr("File too large (max 20 MB).")
|
||||
return
|
||||
}
|
||||
|
||||
file, header, err := r.FormFile("file")
|
||||
if err != nil {
|
||||
renderErr("No file selected.")
|
||||
return
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
ext := strings.ToLower(filepath.Ext(header.Filename))
|
||||
if ext != ".pdf" && ext != ".docx" {
|
||||
renderErr("Unsupported file type. Upload a .pdf or .docx file.")
|
||||
return
|
||||
}
|
||||
|
||||
// Read file once; use for both saving and extraction.
|
||||
data, err := io.ReadAll(file)
|
||||
if err != nil {
|
||||
renderErr("Failed to read uploaded file.")
|
||||
return
|
||||
}
|
||||
|
||||
// Save to disk.
|
||||
uploadsDir := filepath.Join(h.dataDir, "uploads")
|
||||
if err := os.MkdirAll(uploadsDir, 0755); err != nil {
|
||||
renderErr("Server error: could not create uploads directory.")
|
||||
return
|
||||
}
|
||||
safeName := filepath.Base(header.Filename)
|
||||
savedPath := filepath.Join(uploadsDir, fmt.Sprintf("%d_%s", time.Now().UnixMilli(), safeName))
|
||||
if err := os.WriteFile(savedPath, data, 0644); err != nil {
|
||||
renderErr("Server error: could not save file.")
|
||||
return
|
||||
}
|
||||
|
||||
// Extract text.
|
||||
var text string
|
||||
switch ext {
|
||||
case ".pdf":
|
||||
text, err = parse.ExtractPDF(bytes.NewReader(data))
|
||||
if errors.Is(err, parse.ErrScanPDF) {
|
||||
renderErr("This PDF appears to be image-only (scan-based). Please convert it to a text PDF first.")
|
||||
return
|
||||
}
|
||||
case ".docx":
|
||||
text, err = parse.ExtractDOCX(bytes.NewReader(data))
|
||||
}
|
||||
if err != nil {
|
||||
renderErr("Could not extract text from the document: " + err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
// Chunk and call LLM.
|
||||
chunks := parse.Chunk(text, 10_000)
|
||||
seen := make(map[string]bool)
|
||||
var draftQs []models.DraftQuestion
|
||||
|
||||
for _, chunk := range chunks {
|
||||
qs, err := h.llm.ExtractQuestions(r.Context(), chunk)
|
||||
if err != nil {
|
||||
slog.Warn("llm chunk failed", "err", err)
|
||||
continue
|
||||
}
|
||||
for _, q := range qs {
|
||||
key := db.QuestionID(q.Question)
|
||||
if seen[key] {
|
||||
continue
|
||||
}
|
||||
seen[key] = true
|
||||
dq := models.DraftQuestion{Text: q.Question}
|
||||
for _, a := range q.Answers {
|
||||
dq.Answers = append(dq.Answers, models.DraftAnswer{Text: a.Text, IsCorrect: a.Correct})
|
||||
}
|
||||
draftQs = append(draftQs, dq)
|
||||
}
|
||||
}
|
||||
|
||||
source := strings.TrimSpace(r.FormValue("source"))
|
||||
if source == "" {
|
||||
source = strings.TrimSuffix(safeName, ext)
|
||||
}
|
||||
|
||||
userID := auth.UserFromCtx(r.Context()).ID
|
||||
draftID, err := h.repo.CreateDraft(userID, source, draftQs)
|
||||
if err != nil {
|
||||
renderErr("Server error: could not create import draft.")
|
||||
return
|
||||
}
|
||||
|
||||
http.Redirect(w, r, "/import/"+draftID, http.StatusSeeOther)
|
||||
}
|
||||
|
||||
func (h *UploadHandler) ImportGet(w http.ResponseWriter, r *http.Request) {
|
||||
draftID := chi.URLParam(r, "id")
|
||||
userID := auth.UserFromCtx(r.Context()).ID
|
||||
|
||||
draft, err := h.repo.GetDraftForUser(draftID, userID)
|
||||
if err != nil {
|
||||
HTTPError(w, http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
data := BaseData(h.auth, r)
|
||||
data["Draft"] = draft
|
||||
h.render.Render(w, http.StatusOK, "import", data)
|
||||
}
|
||||
|
||||
func (h *UploadHandler) ImportPost(w http.ResponseWriter, r *http.Request) {
|
||||
draftID := chi.URLParam(r, "id")
|
||||
userID := auth.UserFromCtx(r.Context()).ID
|
||||
|
||||
if !h.auth.CheckCSRF(r) {
|
||||
HTTPError(w, http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
|
||||
draft, err := h.repo.GetDraftForUser(draftID, userID)
|
||||
if err != nil {
|
||||
HTTPError(w, http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
source := strings.TrimSpace(r.FormValue("source"))
|
||||
|
||||
var imported, skipped int
|
||||
for i, dq := range draft.Questions {
|
||||
if r.FormValue(fmt.Sprintf("delete_%d", i)) == "on" {
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
|
||||
text := strings.TrimSpace(r.FormValue(fmt.Sprintf("q_text_%d", i)))
|
||||
if text == "" {
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
|
||||
correctIdx := r.FormValue(fmt.Sprintf("correct_%d", i))
|
||||
var answers []*models.Answer
|
||||
for j := range dq.Answers {
|
||||
aText := strings.TrimSpace(r.FormValue(fmt.Sprintf("a_text_%d_%d", i, j)))
|
||||
if aText == "" {
|
||||
continue
|
||||
}
|
||||
answers = append(answers, &models.Answer{
|
||||
Text: aText,
|
||||
IsCorrect: fmt.Sprintf("%d", j) == correctIdx,
|
||||
})
|
||||
}
|
||||
|
||||
var nCorrect int
|
||||
for _, a := range answers {
|
||||
if a.IsCorrect {
|
||||
nCorrect++
|
||||
}
|
||||
}
|
||||
if len(answers) < 2 || nCorrect != 1 {
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
|
||||
q := &models.Question{Text: text, Source: source}
|
||||
if err := h.repo.InsertQuestion(q, answers); err != nil {
|
||||
slog.Error("insert question", "err", err)
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
imported++
|
||||
}
|
||||
|
||||
if err := h.repo.DeleteDraft(draftID); err != nil {
|
||||
slog.Error("delete draft", "id", draftID, "err", err)
|
||||
}
|
||||
|
||||
h.auth.SetFlash(r, fmt.Sprintf("Imported %d question(s), %d skipped.", imported, skipped))
|
||||
http.Redirect(w, r, "/", http.StatusSeeOther)
|
||||
}
|
||||
@@ -44,6 +44,25 @@ type TestAnswer struct {
|
||||
AnsweredAt sql.NullTime
|
||||
}
|
||||
|
||||
// Draft holds LLM-extracted questions pending user review before import.
|
||||
type Draft struct {
|
||||
ID string
|
||||
UserID int64
|
||||
Source string
|
||||
Questions []DraftQuestion
|
||||
CreatedAt time.Time
|
||||
}
|
||||
|
||||
type DraftQuestion struct {
|
||||
Text string `json:"text"`
|
||||
Answers []DraftAnswer `json:"answers"`
|
||||
}
|
||||
|
||||
type DraftAnswer struct {
|
||||
Text string `json:"text"`
|
||||
IsCorrect bool `json:"is_correct"`
|
||||
}
|
||||
|
||||
type UserQuestionStat struct {
|
||||
UserID int64
|
||||
QuestionID string
|
||||
|
||||
Reference in New Issue
Block a user