Phase 4: upload, LLM extraction, import review flow

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Jānis Kacēns
2026-05-11 13:15:04 +03:00
parent e53e7662e9
commit 5199c1fa16
10 changed files with 447 additions and 6 deletions
+1 -1
View File
@@ -24,7 +24,7 @@ func Load() *Config {
OpenAIAPIKey: os.Getenv("OPENAI_API_KEY"),
SessionSecret: os.Getenv("SESSION_SECRET"),
DataDir: envOr("DATA_DIR", "./data"),
Port: envOr("PORT", "8080"),
Port: envOr("PORT", "8079"),
LLMModel: envOr("LLM_MODEL", "gpt-4o-mini"),
}
cfg.AdminUsers = parseAdminUsers(os.Getenv("ADMIN_USERS"))
+48
View File
@@ -1,8 +1,10 @@
package db
import (
"crypto/rand"
"crypto/sha256"
"database/sql"
"encoding/hex"
"encoding/json"
"fmt"
"strings"
@@ -373,3 +375,49 @@ func (r *Repo) GetStatsForUser(userID int64, questionIDs []string) (map[string]*
}
return result, rows.Err()
}
// ── Draft (import review) ────────────────────────────────────────────────────
func newDraftID() string {
b := make([]byte, 16)
rand.Read(b)
return hex.EncodeToString(b)
}
func (r *Repo) CreateDraft(userID int64, source string, questions []models.DraftQuestion) (string, error) {
data, err := json.Marshal(questions)
if err != nil {
return "", err
}
id := newDraftID()
_, err = r.db.Exec(
"INSERT INTO import_drafts (id, user_id, source, questions) VALUES (?, ?, ?, ?)",
id, userID, source, string(data),
)
if err != nil {
return "", err
}
return id, nil
}
func (r *Repo) GetDraftForUser(id string, userID int64) (*models.Draft, error) {
d := &models.Draft{}
var questionsJSON, createdAt string
err := r.db.QueryRow(
"SELECT id, user_id, source, questions, created_at FROM import_drafts WHERE id = ? AND user_id = ?",
id, userID,
).Scan(&d.ID, &d.UserID, &d.Source, &questionsJSON, &createdAt)
if err != nil {
return nil, err
}
d.CreatedAt = parseTime(createdAt)
if err := json.Unmarshal([]byte(questionsJSON), &d.Questions); err != nil {
return nil, err
}
return d, nil
}
func (r *Repo) DeleteDraft(id string) error {
_, err := r.db.Exec("DELETE FROM import_drafts WHERE id = ?", id)
return err
}
+8
View File
@@ -54,6 +54,14 @@ CREATE TABLE IF NOT EXISTS sessions (
);
CREATE INDEX IF NOT EXISTS idx_sessions_expiry ON sessions(expiry);
CREATE TABLE IF NOT EXISTS import_drafts (
id TEXT PRIMARY KEY,
user_id INTEGER NOT NULL REFERENCES users(id),
source TEXT NOT NULL DEFAULT '',
questions TEXT NOT NULL,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_test_answers_test ON test_answers(test_id);
CREATE INDEX IF NOT EXISTS idx_answers_question ON answers(question_id);
CREATE INDEX IF NOT EXISTS idx_stats_user ON user_question_stats(user_id);
+5 -1
View File
@@ -9,6 +9,10 @@ import (
"qbank/internal/auth"
)
var tmplFuncs = template.FuncMap{
"inc": func(i int) int { return i + 1 },
}
// Renderer parses and executes HTML templates from a directory.
type Renderer struct {
dir string
@@ -18,7 +22,7 @@ func NewRenderer(dir string) *Renderer { return &Renderer{dir: dir} }
// Render executes layout.html + <name>.html, passing data to the "layout" template.
func (r *Renderer) Render(w http.ResponseWriter, status int, name string, data any) {
t, err := template.ParseFiles(
t, err := template.New("").Funcs(tmplFuncs).ParseFiles(
filepath.Join(r.dir, "layout.html"),
filepath.Join(r.dir, name+".html"),
)
+233
View File
@@ -0,0 +1,233 @@
package handlers
import (
"bytes"
"errors"
"fmt"
"io"
"log/slog"
"net/http"
"os"
"path/filepath"
"strings"
"time"
"github.com/go-chi/chi/v5"
"qbank/internal/auth"
"qbank/internal/db"
"qbank/internal/llm"
"qbank/internal/models"
"qbank/internal/parse"
)
type UploadHandler struct {
auth *auth.Manager
repo *db.Repo
llm *llm.Client
render *Renderer
dataDir string
}
func NewUploadHandler(a *auth.Manager, repo *db.Repo, llmClient *llm.Client, r *Renderer, dataDir string) *UploadHandler {
return &UploadHandler{auth: a, repo: repo, llm: llmClient, render: r, dataDir: dataDir}
}
func (h *UploadHandler) UploadGet(w http.ResponseWriter, r *http.Request) {
data := BaseData(h.auth, r)
h.render.Render(w, http.StatusOK, "upload", data)
}
func (h *UploadHandler) UploadPost(w http.ResponseWriter, r *http.Request) {
r.Body = http.MaxBytesReader(w, r.Body, 20<<20) // 20 MB
renderErr := func(msg string) {
data := BaseData(h.auth, r)
data["Error"] = msg
h.render.Render(w, http.StatusUnprocessableEntity, "upload", data)
}
if !h.auth.CheckCSRF(r) {
HTTPError(w, http.StatusForbidden)
return
}
if err := r.ParseMultipartForm(20 << 20); err != nil {
renderErr("File too large (max 20 MB).")
return
}
file, header, err := r.FormFile("file")
if err != nil {
renderErr("No file selected.")
return
}
defer file.Close()
ext := strings.ToLower(filepath.Ext(header.Filename))
if ext != ".pdf" && ext != ".docx" {
renderErr("Unsupported file type. Upload a .pdf or .docx file.")
return
}
// Read file once; use for both saving and extraction.
data, err := io.ReadAll(file)
if err != nil {
renderErr("Failed to read uploaded file.")
return
}
// Save to disk.
uploadsDir := filepath.Join(h.dataDir, "uploads")
if err := os.MkdirAll(uploadsDir, 0755); err != nil {
renderErr("Server error: could not create uploads directory.")
return
}
safeName := filepath.Base(header.Filename)
savedPath := filepath.Join(uploadsDir, fmt.Sprintf("%d_%s", time.Now().UnixMilli(), safeName))
if err := os.WriteFile(savedPath, data, 0644); err != nil {
renderErr("Server error: could not save file.")
return
}
// Extract text.
var text string
switch ext {
case ".pdf":
text, err = parse.ExtractPDF(bytes.NewReader(data))
if errors.Is(err, parse.ErrScanPDF) {
renderErr("This PDF appears to be image-only (scan-based). Please convert it to a text PDF first.")
return
}
case ".docx":
text, err = parse.ExtractDOCX(bytes.NewReader(data))
}
if err != nil {
renderErr("Could not extract text from the document: " + err.Error())
return
}
// Chunk and call LLM.
chunks := parse.Chunk(text, 10_000)
seen := make(map[string]bool)
var draftQs []models.DraftQuestion
for _, chunk := range chunks {
qs, err := h.llm.ExtractQuestions(r.Context(), chunk)
if err != nil {
slog.Warn("llm chunk failed", "err", err)
continue
}
for _, q := range qs {
key := db.QuestionID(q.Question)
if seen[key] {
continue
}
seen[key] = true
dq := models.DraftQuestion{Text: q.Question}
for _, a := range q.Answers {
dq.Answers = append(dq.Answers, models.DraftAnswer{Text: a.Text, IsCorrect: a.Correct})
}
draftQs = append(draftQs, dq)
}
}
source := strings.TrimSpace(r.FormValue("source"))
if source == "" {
source = strings.TrimSuffix(safeName, ext)
}
userID := auth.UserFromCtx(r.Context()).ID
draftID, err := h.repo.CreateDraft(userID, source, draftQs)
if err != nil {
renderErr("Server error: could not create import draft.")
return
}
http.Redirect(w, r, "/import/"+draftID, http.StatusSeeOther)
}
func (h *UploadHandler) ImportGet(w http.ResponseWriter, r *http.Request) {
draftID := chi.URLParam(r, "id")
userID := auth.UserFromCtx(r.Context()).ID
draft, err := h.repo.GetDraftForUser(draftID, userID)
if err != nil {
HTTPError(w, http.StatusNotFound)
return
}
data := BaseData(h.auth, r)
data["Draft"] = draft
h.render.Render(w, http.StatusOK, "import", data)
}
func (h *UploadHandler) ImportPost(w http.ResponseWriter, r *http.Request) {
draftID := chi.URLParam(r, "id")
userID := auth.UserFromCtx(r.Context()).ID
if !h.auth.CheckCSRF(r) {
HTTPError(w, http.StatusForbidden)
return
}
draft, err := h.repo.GetDraftForUser(draftID, userID)
if err != nil {
HTTPError(w, http.StatusNotFound)
return
}
source := strings.TrimSpace(r.FormValue("source"))
var imported, skipped int
for i, dq := range draft.Questions {
if r.FormValue(fmt.Sprintf("delete_%d", i)) == "on" {
skipped++
continue
}
text := strings.TrimSpace(r.FormValue(fmt.Sprintf("q_text_%d", i)))
if text == "" {
skipped++
continue
}
correctIdx := r.FormValue(fmt.Sprintf("correct_%d", i))
var answers []*models.Answer
for j := range dq.Answers {
aText := strings.TrimSpace(r.FormValue(fmt.Sprintf("a_text_%d_%d", i, j)))
if aText == "" {
continue
}
answers = append(answers, &models.Answer{
Text: aText,
IsCorrect: fmt.Sprintf("%d", j) == correctIdx,
})
}
var nCorrect int
for _, a := range answers {
if a.IsCorrect {
nCorrect++
}
}
if len(answers) < 2 || nCorrect != 1 {
skipped++
continue
}
q := &models.Question{Text: text, Source: source}
if err := h.repo.InsertQuestion(q, answers); err != nil {
slog.Error("insert question", "err", err)
skipped++
continue
}
imported++
}
if err := h.repo.DeleteDraft(draftID); err != nil {
slog.Error("delete draft", "id", draftID, "err", err)
}
h.auth.SetFlash(r, fmt.Sprintf("Imported %d question(s), %d skipped.", imported, skipped))
http.Redirect(w, r, "/", http.StatusSeeOther)
}
+19
View File
@@ -44,6 +44,25 @@ type TestAnswer struct {
AnsweredAt sql.NullTime
}
// Draft holds LLM-extracted questions pending user review before import.
type Draft struct {
ID string
UserID int64
Source string
Questions []DraftQuestion
CreatedAt time.Time
}
type DraftQuestion struct {
Text string `json:"text"`
Answers []DraftAnswer `json:"answers"`
}
type DraftAnswer struct {
Text string `json:"text"`
IsCorrect bool `json:"is_correct"`
}
type UserQuestionStat struct {
UserID int64
QuestionID string