package llm import ( "context" "crypto/sha256" "encoding/json" "fmt" openai "github.com/sashabaranov/go-openai" ) // ParsedQuestion is a question extracted from a document chunk by the LLM. type ParsedQuestion struct { Question string Answers []ParsedAnswer } // ParsedAnswer is one answer choice for a ParsedQuestion. type ParsedAnswer struct { Text string Correct bool } // ChatClient is the interface for creating chat completions. // The concrete *openai.Client satisfies this interface. type ChatClient interface { CreateChatCompletion(ctx context.Context, req openai.ChatCompletionRequest) (openai.ChatCompletionResponse, error) } // Client wraps a ChatClient with question-extraction logic. type Client struct { cc ChatClient model string } // New creates a Client backed by the real OpenAI API. func New(apiKey, model string) *Client { if model == "" { model = "gpt-4o-mini" } return &Client{cc: openai.NewClient(apiKey), model: model} } // NewWithClient creates a Client with an injected ChatClient (useful for tests). func NewWithClient(cc ChatClient, model string) *Client { return &Client{cc: cc, model: model} } const systemPrompt = `You extract multiple-choice questions from study material. Return every question found. Exactly one answer per question must be marked correct. If the source doesn't clearly mark a correct answer, omit that question entirely. Do not invent questions not present in the text. Respond with JSON matching this schema exactly: {"questions":[{"question":"","answers":[{"text":"","correct":false},{"text":"","correct":true}]}]}` type llmResponse struct { Questions []struct { Question string `json:"question"` Answers []struct { Text string `json:"text"` Correct bool `json:"correct"` } `json:"answers"` } `json:"questions"` } // ExtractQuestions sends chunk to the LLM and returns validated, deduplicated questions. // Questions that do not have exactly one correct answer are silently dropped. func (c *Client) ExtractQuestions(ctx context.Context, chunk string) ([]ParsedQuestion, error) { resp, err := c.cc.CreateChatCompletion(ctx, openai.ChatCompletionRequest{ Model: c.model, Messages: []openai.ChatCompletionMessage{ {Role: openai.ChatMessageRoleSystem, Content: systemPrompt}, {Role: openai.ChatMessageRoleUser, Content: chunk}, }, ResponseFormat: &openai.ChatCompletionResponseFormat{ Type: openai.ChatCompletionResponseFormatTypeJSONObject, }, }) if err != nil { return nil, fmt.Errorf("openai: %w", err) } if len(resp.Choices) == 0 { return nil, fmt.Errorf("openai: empty response") } var raw llmResponse if err := json.Unmarshal([]byte(resp.Choices[0].Message.Content), &raw); err != nil { return nil, fmt.Errorf("parse llm response: %w", err) } seen := make(map[string]bool) var out []ParsedQuestion for _, q := range raw.Questions { var nCorrect int for _, a := range q.Answers { if a.Correct { nCorrect++ } } if nCorrect != 1 { continue } key := textHash(q.Question) if seen[key] { continue } seen[key] = true pq := ParsedQuestion{Question: q.Question} for _, a := range q.Answers { pq.Answers = append(pq.Answers, ParsedAnswer{Text: a.Text, Correct: a.Correct}) } out = append(out, pq) } return out, nil } func textHash(s string) string { h := sha256.Sum256([]byte(s)) return fmt.Sprintf("%x", h[:8]) }