doc: cleaned up a bit.

This commit is contained in:
Hayden Hargreaves 2026-04-07 20:21:41 -07:00
parent 76f949a6b2
commit 760770c564
2 changed files with 290 additions and 154 deletions

View File

@ -3,7 +3,6 @@ package syntax
import (
"bytes"
"sort"
"strings"
"git.gophernest.net/azpect/TextEditor/internal/core"
"git.gophernest.net/azpect/TextEditor/internal/style"
@ -11,6 +10,15 @@ import (
sitter "github.com/tree-sitter/go-tree-sitter"
)
// TreeSitterEngine provides syntax highlighting using Tree-sitter queries.
//
// The engine stores per-buffer parser state and a cached style map so redraws
// can reuse prior work. It supports both full rebuilds and incremental edits:
// - full rebuilds when a buffer is first seen, language changes, or state is invalid
// - incremental updates when ApplyEdit provides enough information to reparse
// only changed regions
//
// Cached styles are represented as one style per rune for each line.
type TreeSitterEngine struct {
styles style.Styles
registry *languageRegistry
@ -18,6 +26,11 @@ type TreeSitterEngine struct {
cache map[*core.Buffer]*bufferCache
}
// bufferCache stores all derived highlighting state for a single buffer.
//
// It contains both style output (`lines`) and parse/query state (`parser`,
// `tree`, `source`, language/query bindings) so the engine can incrementally
// update only dirty lines instead of recomputing the whole file each frame.
type bufferCache struct {
built bool
lines map[int][]lipgloss.Style
@ -34,11 +47,19 @@ type bufferCache struct {
query *sitter.Query
}
// lineRange is an inclusive line interval [start, end].
//
// Dirty tracking and partial restyling use this type to represent which rows
// need work.
type lineRange struct {
start int
end int
}
// captureRange describes one Tree-sitter capture span.
//
// Coordinates are in row/byte-column space, matching Tree-sitter node
// positions. The range is later converted to rune indexes for style writes.
type captureRange struct {
startRow uint
startCol uint
@ -47,11 +68,10 @@ type captureRange struct {
name string
}
// NewTreeSitterEngine: Creates a new tree sitter engine with the styles
// provided attached.
// NewTreeSitterEngine constructs a TreeSitterEngine with the provided style set.
//
// Currently, this engine only support GoLang. But more languages can be
// added with easy.
// Language support is resolved through the language registry, so the engine can
// work with any language/query pair registered there.
func NewTreeSitterEngine(styles style.Styles) *TreeSitterEngine {
return &TreeSitterEngine{
styles: styles,
@ -60,6 +80,14 @@ func NewTreeSitterEngine(styles style.Styles) *TreeSitterEngine {
}
}
// PrepareBuffer ensures highlighting data for buf is ready to read.
//
// This method is idempotent: if cached styles are already valid (`built`), it
// returns immediately. Otherwise it resolves language support and performs a
// rebuild pass (full or dirty-range-based) to refresh `bc.lines`.
//
// If the buffer language is unsupported or resolution fails, it still marks the
// cache as built with an empty style map so callers can safely continue.
func (e *TreeSitterEngine) PrepareBuffer(buf *core.Buffer) {
// Cannot prepare a nil buffer
if buf == nil {
@ -90,6 +118,11 @@ func (e *TreeSitterEngine) PrepareBuffer(buf *core.Buffer) {
e.buildFullBuffer(buf, bc)
}
// LineStyleMap returns the style row for a specific line in buf.
//
// It first guarantees buffer preparation, then returns cached styles when
// available. Missing lines are lazily initialized to the base line style and
// stored in cache to keep downstream rendering logic simple.
func (e *TreeSitterEngine) LineStyleMap(buf *core.Buffer, line int) []lipgloss.Style {
if buf == nil {
return nil
@ -111,6 +144,17 @@ func (e *TreeSitterEngine) LineStyleMap(buf *core.Buffer, line int) []lipgloss.S
return out
}
// ApplyEdit applies an incremental buffer edit to parser and style cache state.
//
// Workflow:
// - validate buffer and language support
// - apply the edit to the current parse tree (InputEdit)
// - reparse using the previous tree as incremental context
// - collect changed line ranges from both the user edit and parser changes
// - mark cache as unbuilt so the next PrepareBuffer restyles only dirty areas
//
// If incremental parsing cannot proceed (missing parser/tree/source or parse
// failure), it falls back to a full-dirty rebuild on the next preparation.
func (e *TreeSitterEngine) ApplyEdit(buf *core.Buffer, edit *core.BufferEdit) {
if buf == nil || edit == nil {
return
@ -172,8 +216,9 @@ func (e *TreeSitterEngine) ApplyEdit(buf *core.Buffer, edit *core.BufferEdit) {
bc.built = false
}
// TreeSitterEngine.InvalidateBuffer: Deletes the entire buffers cache from the engine. If the
// buffer provided is nil, this function does nothing.
// InvalidateBuffer marks all cached highlighting data for buf as stale.
//
// The next PrepareBuffer call will rebuild styles from scratch for the buffer.
func (e *TreeSitterEngine) InvalidateBuffer(buf *core.Buffer) {
if buf == nil {
return
@ -184,9 +229,11 @@ func (e *TreeSitterEngine) InvalidateBuffer(buf *core.Buffer) {
bc.dirty = nil
}
// TreeSitterEngine.InvalidateLines: Deletes lines between start and end (inclusive) from the
// buffers cache. Then marks the cache as "unbuilt." If the buffer provided is nil, this function
// does nothing.
// InvalidateLines marks a line interval in buf as dirty.
//
// The range is inclusive and normalized by addDirtyRange. On the next
// preparation pass, those lines (plus capture-context neighbors) are
// recalculated while unchanged lines are preserved.
func (e *TreeSitterEngine) InvalidateLines(buf *core.Buffer, startLine, endLine int) {
if buf == nil {
return
@ -196,8 +243,14 @@ func (e *TreeSitterEngine) InvalidateLines(buf *core.Buffer, startLine, endLine
bc.built = false
}
// TreeSitterEngine.supportsBuffer: Returns whether the buffer can be parsed and highlighted
// by the engine. When false, there should be a fallback.
// resolveBufferLanguage resolves and applies language/query config for buf.
//
// It asks the registry to resolve filetype/filename to a concrete language id,
// language object, and highlight query. When the resolved language id changes,
// parser/query bindings are updated and the cache is marked dirty for rebuild.
//
// Returns (resolved, true, nil) on success. When unsupported it returns
// (nil, false, nil). Resolution errors are returned as the third value.
func (e *TreeSitterEngine) resolveBufferLanguage(buf *core.Buffer, bc *bufferCache) (*resolvedLanguage, bool, error) {
if e.registry == nil {
e.registry = newLanguageRegistry()
@ -222,8 +275,10 @@ func (e *TreeSitterEngine) resolveBufferLanguage(buf *core.Buffer, bc *bufferCac
return resolved, true, nil
}
// TreeSitterEngine.getCache: Returns the buffers cache. If the cache does not exist, a new one
// is created and applied to the engines cache map.
// getCache returns the cache object associated with buf, creating it if needed.
//
// New caches start with an initialized lines map and default zero-values for
// parse/highlight state.
func (e *TreeSitterEngine) getCache(buf *core.Buffer) *bufferCache {
if bc, ok := e.cache[buf]; ok {
return bc
@ -233,6 +288,15 @@ func (e *TreeSitterEngine) getCache(buf *core.Buffer) *bufferCache {
return bc
}
// buildFullBuffer rebuilds highlight styles for buf using current cache state.
//
// Despite the name, this method handles both full and partial updates:
// - full rebuild: reset every line to base style, query entire file
// - partial rebuild: reset only dirty lines, query around dirty ranges
//
// It (re)parses source when needed, collects query captures, sorts captures by
// precedence order, then writes styles onto per-rune line slices. After a
// successful pass it clears dirty flags and marks the cache as built.
func (e *TreeSitterEngine) buildFullBuffer(buf *core.Buffer, bc *bufferCache) {
lineCount := buf.LineCount()
@ -378,143 +442,3 @@ func (e *TreeSitterEngine) buildFullBuffer(buf *core.Buffer, bc *bufferCache) {
bc.count = lineCount
bc.built = true
}
func addDirtyRange(bc *bufferCache, start, end int) {
if bc == nil {
return
}
if end < start {
start, end = end, start
}
if start < 0 {
start = 0
}
if end < 0 {
end = 0
}
bc.dirty = append(bc.dirty, lineRange{start: start, end: end})
bc.dirty = mergeRanges(bc.dirty)
}
func normalizedDirtyRanges(ranges []lineRange, lineCount int) []lineRange {
if lineCount <= 0 || len(ranges) == 0 {
return nil
}
clamped := make([]lineRange, 0, len(ranges))
for _, r := range ranges {
start := max(0, r.start)
end := min(lineCount-1, r.end)
if start > end {
continue
}
clamped = append(clamped, lineRange{start: start, end: end})
}
return mergeRanges(clamped)
}
func mergeRanges(ranges []lineRange) []lineRange {
if len(ranges) == 0 {
return nil
}
sort.Slice(ranges, func(i, j int) bool {
if ranges[i].start == ranges[j].start {
return ranges[i].end < ranges[j].end
}
return ranges[i].start < ranges[j].start
})
merged := make([]lineRange, 0, len(ranges))
cur := ranges[0]
for i := 1; i < len(ranges); i++ {
n := ranges[i]
if n.start <= cur.end+1 {
if n.end > cur.end {
cur.end = n.end
}
continue
}
merged = append(merged, cur)
cur = n
}
merged = append(merged, cur)
return merged
}
func rowInRanges(row int, ranges []lineRange) bool {
for _, r := range ranges {
if row >= r.start && row <= r.end {
return true
}
}
return false
}
func defaultLineStyles(line string, base lipgloss.Style) []lipgloss.Style {
runes := []rune(line)
row := make([]lipgloss.Style, len(runes))
for i := range row {
row[i] = base
}
return row
}
func collectCaptures(iter sitter.QueryCaptures, query *sitter.Query) []captureRange {
if query == nil {
return nil
}
names := query.CaptureNames()
out := []captureRange{}
for match, captureIdx := iter.Next(); match != nil; match, captureIdx = iter.Next() {
capture := match.Captures[captureIdx]
if int(capture.Index) >= len(names) {
continue
}
name := names[capture.Index]
if name == "spell" {
continue
}
node := capture.Node
start := node.StartPosition()
end := node.EndPosition()
out = append(out, captureRange{
startRow: start.Row,
startCol: start.Column,
endRow: end.Row,
endCol: end.Column,
name: name,
})
}
return out
}
func buildBufferSource(buf *core.Buffer) []byte {
lineCount := buf.LineCount()
if lineCount == 0 {
return []byte{}
}
lines := make([]string, lineCount)
for i := range lineCount {
lines[i] = buf.Line(i)
}
return []byte(strings.Join(lines, "\n"))
}
func byteColToRuneIndex(line []byte, byteCol int) int {
if byteCol <= 0 {
return 0
}
if byteCol >= len(line) {
return len([]rune(string(line)))
}
prefix := line[:byteCol]
return len([]rune(string(prefix)))
}

View File

@ -0,0 +1,212 @@
package syntax
import (
"sort"
"strings"
"git.gophernest.net/azpect/TextEditor/internal/core"
"github.com/charmbracelet/lipgloss"
sitter "github.com/tree-sitter/go-tree-sitter"
)
// addDirtyRange records a potentially changed line span in the buffer cache.
//
// The parser/highlighter keeps a list of "dirty" line ranges that must be
// reparsed or restyled after edits. This helper makes sure the incoming range
// is safe and normalized before storing it:
// - nil cache is ignored (defensive early-return)
// - start/end are swapped if the caller passed them in reverse order
// - negative values are clamped to 0
//
// After appending the new range, it merges overlaps/adjacent ranges so the
// dirty list stays compact and avoids duplicate work during incremental updates.
func addDirtyRange(bc *bufferCache, start, end int) {
if bc == nil {
return
}
if end < start {
start, end = end, start
}
start = max(0, start)
end = max(0, end)
bc.dirty = append(bc.dirty, lineRange{start: start, end: end})
bc.dirty = mergeRanges(bc.dirty)
}
// normalizedDirtyRanges clamps, filters, and merges dirty ranges for a buffer.
//
// Tree-sitter and styling operations expect valid row bounds. This function
// takes arbitrary line ranges and converts them into a clean canonical form
// based on the current buffer size:
// - returns nil if there are no lines or no input ranges
// - clamps each range to [0, lineCount-1]
// - drops invalid ranges where start > end after clamping
// - merges overlapping or adjacent ranges
//
// The returned slice is safe to iterate directly for reparse/restyle passes.
func normalizedDirtyRanges(ranges []lineRange, lineCount int) []lineRange {
if lineCount <= 0 || len(ranges) == 0 {
return nil
}
clamped := make([]lineRange, 0, len(ranges))
for _, r := range ranges {
start := max(0, r.start)
end := min(lineCount-1, r.end)
if start > end {
continue
}
clamped = append(clamped, lineRange{start: start, end: end})
}
return mergeRanges(clamped)
}
// mergeRanges sorts and coalesces line ranges into a minimal non-overlapping set.
//
// Two ranges are merged when they overlap or touch (for example [1,3] and [4,6]
// become [1,6]). Treating adjacent ranges as one avoids unnecessary splits in
// later highlighting logic.
//
// Note: this function sorts the provided slice in place before building and
// returning a merged result.
func mergeRanges(ranges []lineRange) []lineRange {
if len(ranges) == 0 {
return nil
}
sort.Slice(ranges, func(i, j int) bool {
if ranges[i].start == ranges[j].start {
return ranges[i].end < ranges[j].end
}
return ranges[i].start < ranges[j].start
})
merged := make([]lineRange, 0, len(ranges))
cur := ranges[0]
for i := 1; i < len(ranges); i++ {
n := ranges[i]
if n.start <= cur.end+1 {
if n.end > cur.end {
cur.end = n.end
}
continue
}
merged = append(merged, cur)
cur = n
}
merged = append(merged, cur)
return merged
}
// rowInRanges reports whether a row index is covered by any range.
//
// This is a simple membership check used by update paths that need to decide
// whether a specific line should be recomputed.
func rowInRanges(row int, ranges []lineRange) bool {
for _, r := range ranges {
if row >= r.start && row <= r.end {
return true
}
}
return false
}
// defaultLineStyles creates a style-per-rune slice initialized with base.
//
// The highlighter applies styles at rune granularity (not byte granularity) so
// multibyte UTF-8 characters still map to exactly one style entry per visible
// character. This function produces the baseline style row before syntax
// captures overwrite specific spans.
func defaultLineStyles(line string, base lipgloss.Style) []lipgloss.Style {
runes := []rune(line)
row := make([]lipgloss.Style, len(runes))
for i := range row {
row[i] = base
}
return row
}
// collectCaptures consumes a Tree-sitter capture iterator into local ranges.
//
// For each capture returned by the query iterator, it resolves the capture name
// and records start/end row+column coordinates as a captureRange. These ranges
// are then used by the renderer to map syntax names to concrete styles.
//
// Special handling:
// - nil query yields nil output
// - capture indexes outside query.CaptureNames() are ignored defensively
// - captures named "spell" are skipped, because spell-check is handled by a
// separate pass and should not be treated as a syntax-highlight capture here
func collectCaptures(iter sitter.QueryCaptures, query *sitter.Query) []captureRange {
if query == nil {
return nil
}
names := query.CaptureNames()
out := []captureRange{}
for match, captureIdx := iter.Next(); match != nil; match, captureIdx = iter.Next() {
capture := match.Captures[captureIdx]
if int(capture.Index) >= len(names) {
continue
}
name := names[capture.Index]
if name == "spell" {
continue
}
node := capture.Node
start := node.StartPosition()
end := node.EndPosition()
out = append(out, captureRange{
startRow: start.Row,
startCol: start.Column,
endRow: end.Row,
endCol: end.Column,
name: name,
})
}
return out
}
// buildBufferSource flattens the editor buffer into a single newline-delimited
// byte slice suitable for Tree-sitter parsing.
//
// The buffer stores text as lines, while Tree-sitter expects one contiguous
// source blob. This helper joins all lines with '\n' separators, preserving row
// structure expected by parser positions.
func buildBufferSource(buf *core.Buffer) []byte {
lineCount := buf.LineCount()
if lineCount == 0 {
return []byte{}
}
lines := make([]string, lineCount)
for i := range lineCount {
lines[i] = buf.Line(i)
}
return []byte(strings.Join(lines, "\n"))
}
// byteColToRuneIndex converts a byte-based column offset to a rune index.
//
// Tree-sitter positions use byte columns, while the renderer/highlighter often
// indexes text by runes so multibyte UTF-8 characters are handled correctly.
// This conversion keeps style slicing aligned with displayed characters.
//
// Boundary behavior:
// - byteCol <= 0 -> 0
// - byteCol >= len(line) -> rune length of the entire line
func byteColToRuneIndex(line []byte, byteCol int) int {
if byteCol <= 0 {
return 0
}
if byteCol >= len(line) {
return len([]rune(string(line)))
}
prefix := line[:byteCol]
return len([]rune(string(prefix)))
}