e53e7662e9
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
32 lines
719 B
Go
32 lines
719 B
Go
package parse
|
|
|
|
import "strings"
|
|
|
|
// Chunk splits text on double-newlines and builds chunks of at most maxRunes.
|
|
// A single paragraph longer than maxRunes is kept as its own chunk.
|
|
func Chunk(text string, maxRunes int) []string {
|
|
paragraphs := strings.Split(text, "\n\n")
|
|
var chunks []string
|
|
var cur strings.Builder
|
|
|
|
for _, p := range paragraphs {
|
|
p = strings.TrimSpace(p)
|
|
if p == "" {
|
|
continue
|
|
}
|
|
pLen := len([]rune(p))
|
|
if cur.Len() > 0 && len([]rune(cur.String()))+2+pLen > maxRunes {
|
|
chunks = append(chunks, cur.String())
|
|
cur.Reset()
|
|
}
|
|
if cur.Len() > 0 {
|
|
cur.WriteString("\n\n")
|
|
}
|
|
cur.WriteString(p)
|
|
}
|
|
if cur.Len() > 0 {
|
|
chunks = append(chunks, cur.String())
|
|
}
|
|
return chunks
|
|
}
|