doc: cleaned up a bit.

2026-04-07 20:21:41 -07:00 · 2026-04-07 20:21:41 -07:00 · 760770c564
commit 760770c564
parent 76f949a6b2
2 changed files with 290 additions and 154 deletions
--- a/internal/syntax/treesitter.go
+++ b/internal/syntax/treesitter.go
@ -3,7 +3,6 @@ package syntax
 import (
 	"bytes"
 	"sort"
-	"strings"

 	"git.gophernest.net/azpect/TextEditor/internal/core"
 	"git.gophernest.net/azpect/TextEditor/internal/style"
@ -11,6 +10,15 @@ import (
 	sitter "github.com/tree-sitter/go-tree-sitter"
 )

+// TreeSitterEngine provides syntax highlighting using Tree-sitter queries.
+//
+// The engine stores per-buffer parser state and a cached style map so redraws
+// can reuse prior work. It supports both full rebuilds and incremental edits:
+//   - full rebuilds when a buffer is first seen, language changes, or state is invalid
+//   - incremental updates when ApplyEdit provides enough information to reparse
+//     only changed regions
+//
+// Cached styles are represented as one style per rune for each line.
 type TreeSitterEngine struct {
 	styles   style.Styles
 	registry *languageRegistry
@ -18,6 +26,11 @@ type TreeSitterEngine struct {
 	cache map[*core.Buffer]*bufferCache
 }

+// bufferCache stores all derived highlighting state for a single buffer.
+//
+// It contains both style output (`lines`) and parse/query state (`parser`,
+// `tree`, `source`, language/query bindings) so the engine can incrementally
+// update only dirty lines instead of recomputing the whole file each frame.
 type bufferCache struct {
 	built bool
 	lines map[int][]lipgloss.Style
@ -34,11 +47,19 @@ type bufferCache struct {
 	query    *sitter.Query
 }

+// lineRange is an inclusive line interval [start, end].
+//
+// Dirty tracking and partial restyling use this type to represent which rows
+// need work.
 type lineRange struct {
 	start int
 	end   int
 }

+// captureRange describes one Tree-sitter capture span.
+//
+// Coordinates are in row/byte-column space, matching Tree-sitter node
+// positions. The range is later converted to rune indexes for style writes.
 type captureRange struct {
 	startRow uint
 	startCol uint
@ -47,11 +68,10 @@ type captureRange struct {
 	name     string
 }

-// NewTreeSitterEngine: Creates a new tree sitter engine with the styles
-// provided attached.
+// NewTreeSitterEngine constructs a TreeSitterEngine with the provided style set.
 //
-// Currently, this engine only support GoLang. But more languages can be
-// added with easy.
+// Language support is resolved through the language registry, so the engine can
+// work with any language/query pair registered there.
 func NewTreeSitterEngine(styles style.Styles) *TreeSitterEngine {
 	return &TreeSitterEngine{
 		styles:   styles,
@ -60,6 +80,14 @@ func NewTreeSitterEngine(styles style.Styles) *TreeSitterEngine {
 	}
 }

+// PrepareBuffer ensures highlighting data for buf is ready to read.
+//
+// This method is idempotent: if cached styles are already valid (`built`), it
+// returns immediately. Otherwise it resolves language support and performs a
+// rebuild pass (full or dirty-range-based) to refresh `bc.lines`.
+//
+// If the buffer language is unsupported or resolution fails, it still marks the
+// cache as built with an empty style map so callers can safely continue.
 func (e *TreeSitterEngine) PrepareBuffer(buf *core.Buffer) {
 	// Cannot prepare a nil buffer
 	if buf == nil {
@ -90,6 +118,11 @@ func (e *TreeSitterEngine) PrepareBuffer(buf *core.Buffer) {
 	e.buildFullBuffer(buf, bc)
 }

+// LineStyleMap returns the style row for a specific line in buf.
+//
+// It first guarantees buffer preparation, then returns cached styles when
+// available. Missing lines are lazily initialized to the base line style and
+// stored in cache to keep downstream rendering logic simple.
 func (e *TreeSitterEngine) LineStyleMap(buf *core.Buffer, line int) []lipgloss.Style {
 	if buf == nil {
 		return nil
@ -111,6 +144,17 @@ func (e *TreeSitterEngine) LineStyleMap(buf *core.Buffer, line int) []lipgloss.S
 	return out
 }

+// ApplyEdit applies an incremental buffer edit to parser and style cache state.
+//
+// Workflow:
+//   - validate buffer and language support
+//   - apply the edit to the current parse tree (InputEdit)
+//   - reparse using the previous tree as incremental context
+//   - collect changed line ranges from both the user edit and parser changes
+//   - mark cache as unbuilt so the next PrepareBuffer restyles only dirty areas
+//
+// If incremental parsing cannot proceed (missing parser/tree/source or parse
+// failure), it falls back to a full-dirty rebuild on the next preparation.
 func (e *TreeSitterEngine) ApplyEdit(buf *core.Buffer, edit *core.BufferEdit) {
 	if buf == nil || edit == nil {
 		return
@ -172,8 +216,9 @@ func (e *TreeSitterEngine) ApplyEdit(buf *core.Buffer, edit *core.BufferEdit) {
 	bc.built = false
 }

-// TreeSitterEngine.InvalidateBuffer: Deletes the entire buffers cache from the engine. If the
-// buffer provided is nil, this function does nothing.
+// InvalidateBuffer marks all cached highlighting data for buf as stale.
+//
+// The next PrepareBuffer call will rebuild styles from scratch for the buffer.
 func (e *TreeSitterEngine) InvalidateBuffer(buf *core.Buffer) {
 	if buf == nil {
 		return
@ -184,9 +229,11 @@ func (e *TreeSitterEngine) InvalidateBuffer(buf *core.Buffer) {
 	bc.dirty = nil
 }

-// TreeSitterEngine.InvalidateLines: Deletes lines between start and end (inclusive) from the
-// buffers cache. Then marks the cache as "unbuilt." If the buffer provided is nil, this function
-// does nothing.
+// InvalidateLines marks a line interval in buf as dirty.
+//
+// The range is inclusive and normalized by addDirtyRange. On the next
+// preparation pass, those lines (plus capture-context neighbors) are
+// recalculated while unchanged lines are preserved.
 func (e *TreeSitterEngine) InvalidateLines(buf *core.Buffer, startLine, endLine int) {
 	if buf == nil {
 		return
@ -196,8 +243,14 @@ func (e *TreeSitterEngine) InvalidateLines(buf *core.Buffer, startLine, endLine
 	bc.built = false
 }

-// TreeSitterEngine.supportsBuffer: Returns whether the buffer can be parsed and highlighted
-// by the engine. When false, there should be a fallback.
+// resolveBufferLanguage resolves and applies language/query config for buf.
+//
+// It asks the registry to resolve filetype/filename to a concrete language id,
+// language object, and highlight query. When the resolved language id changes,
+// parser/query bindings are updated and the cache is marked dirty for rebuild.
+//
+// Returns (resolved, true, nil) on success. When unsupported it returns
+// (nil, false, nil). Resolution errors are returned as the third value.
 func (e *TreeSitterEngine) resolveBufferLanguage(buf *core.Buffer, bc *bufferCache) (*resolvedLanguage, bool, error) {
 	if e.registry == nil {
 		e.registry = newLanguageRegistry()
@ -222,8 +275,10 @@ func (e *TreeSitterEngine) resolveBufferLanguage(buf *core.Buffer, bc *bufferCac
 	return resolved, true, nil
 }

-// TreeSitterEngine.getCache: Returns the buffers cache. If the cache does not exist, a new one
-// is created and applied to the engines cache map.
+// getCache returns the cache object associated with buf, creating it if needed.
+//
+// New caches start with an initialized lines map and default zero-values for
+// parse/highlight state.
 func (e *TreeSitterEngine) getCache(buf *core.Buffer) *bufferCache {
 	if bc, ok := e.cache[buf]; ok {
 		return bc
@ -233,6 +288,15 @@ func (e *TreeSitterEngine) getCache(buf *core.Buffer) *bufferCache {
 	return bc
 }

+// buildFullBuffer rebuilds highlight styles for buf using current cache state.
+//
+// Despite the name, this method handles both full and partial updates:
+//   - full rebuild: reset every line to base style, query entire file
+//   - partial rebuild: reset only dirty lines, query around dirty ranges
+//
+// It (re)parses source when needed, collects query captures, sorts captures by
+// precedence order, then writes styles onto per-rune line slices. After a
+// successful pass it clears dirty flags and marks the cache as built.
 func (e *TreeSitterEngine) buildFullBuffer(buf *core.Buffer, bc *bufferCache) {
 	lineCount := buf.LineCount()

@ -378,143 +442,3 @@ func (e *TreeSitterEngine) buildFullBuffer(buf *core.Buffer, bc *bufferCache) {
 	bc.count = lineCount
 	bc.built = true
 }
-
-func addDirtyRange(bc *bufferCache, start, end int) {
-	if bc == nil {
-		return
-	}
-	if end < start {
-		start, end = end, start
-	}
-	if start < 0 {
-		start = 0
-	}
-	if end < 0 {
-		end = 0
-	}
-	bc.dirty = append(bc.dirty, lineRange{start: start, end: end})
-	bc.dirty = mergeRanges(bc.dirty)
-}
-
-func normalizedDirtyRanges(ranges []lineRange, lineCount int) []lineRange {
-	if lineCount <= 0 || len(ranges) == 0 {
-		return nil
-	}
-
-	clamped := make([]lineRange, 0, len(ranges))
-	for _, r := range ranges {
-		start := max(0, r.start)
-		end := min(lineCount-1, r.end)
-		if start > end {
-			continue
-		}
-		clamped = append(clamped, lineRange{start: start, end: end})
-	}
-
-	return mergeRanges(clamped)
-}
-
-func mergeRanges(ranges []lineRange) []lineRange {
-	if len(ranges) == 0 {
-		return nil
-	}
-
-	sort.Slice(ranges, func(i, j int) bool {
-		if ranges[i].start == ranges[j].start {
-			return ranges[i].end < ranges[j].end
-		}
-		return ranges[i].start < ranges[j].start
-	})
-
-	merged := make([]lineRange, 0, len(ranges))
-	cur := ranges[0]
-	for i := 1; i < len(ranges); i++ {
-		n := ranges[i]
-		if n.start <= cur.end+1 {
-			if n.end > cur.end {
-				cur.end = n.end
-			}
-			continue
-		}
-		merged = append(merged, cur)
-		cur = n
-	}
-	merged = append(merged, cur)
-	return merged
-}
-
-func rowInRanges(row int, ranges []lineRange) bool {
-	for _, r := range ranges {
-		if row >= r.start && row <= r.end {
-			return true
-		}
-	}
-	return false
-}
-
-func defaultLineStyles(line string, base lipgloss.Style) []lipgloss.Style {
-	runes := []rune(line)
-	row := make([]lipgloss.Style, len(runes))
-	for i := range row {
-		row[i] = base
-	}
-	return row
-}
-
-func collectCaptures(iter sitter.QueryCaptures, query *sitter.Query) []captureRange {
-	if query == nil {
-		return nil
-	}
-
-	names := query.CaptureNames()
-	out := []captureRange{}
-	for match, captureIdx := iter.Next(); match != nil; match, captureIdx = iter.Next() {
-		capture := match.Captures[captureIdx]
-		if int(capture.Index) >= len(names) {
-			continue
-		}
-		name := names[capture.Index]
-		if name == "spell" {
-			continue
-		}
-
-		node := capture.Node
-		start := node.StartPosition()
-		end := node.EndPosition()
-		out = append(out, captureRange{
-			startRow: start.Row,
-			startCol: start.Column,
-			endRow:   end.Row,
-			endCol:   end.Column,
-			name:     name,
-		})
-	}
-
-	return out
-}
-
-func buildBufferSource(buf *core.Buffer) []byte {
-	lineCount := buf.LineCount()
-	if lineCount == 0 {
-		return []byte{}
-	}
-
-	lines := make([]string, lineCount)
-	for i := range lineCount {
-		lines[i] = buf.Line(i)
-	}
-
-	return []byte(strings.Join(lines, "\n"))
-}
-
-func byteColToRuneIndex(line []byte, byteCol int) int {
-	if byteCol <= 0 {
-		return 0
-	}
-	if byteCol >= len(line) {
-		return len([]rune(string(line)))
-	}
-
-	prefix := line[:byteCol]
-	return len([]rune(string(prefix)))
-}
--- a/internal/syntax/treesitter_utils.go
+++ b/internal/syntax/treesitter_utils.go
@ -0,0 +1,212 @@
+package syntax
+
+import (
+	"sort"
+	"strings"
+
+	"git.gophernest.net/azpect/TextEditor/internal/core"
+	"github.com/charmbracelet/lipgloss"
+	sitter "github.com/tree-sitter/go-tree-sitter"
+)
+
+// addDirtyRange records a potentially changed line span in the buffer cache.
+//
+// The parser/highlighter keeps a list of "dirty" line ranges that must be
+// reparsed or restyled after edits. This helper makes sure the incoming range
+// is safe and normalized before storing it:
+//   - nil cache is ignored (defensive early-return)
+//   - start/end are swapped if the caller passed them in reverse order
+//   - negative values are clamped to 0
+//
+// After appending the new range, it merges overlaps/adjacent ranges so the
+// dirty list stays compact and avoids duplicate work during incremental updates.
+func addDirtyRange(bc *bufferCache, start, end int) {
+	if bc == nil {
+		return
+	}
+	if end < start {
+		start, end = end, start
+	}
+	start = max(0, start)
+	end = max(0, end)
+	bc.dirty = append(bc.dirty, lineRange{start: start, end: end})
+	bc.dirty = mergeRanges(bc.dirty)
+}
+
+// normalizedDirtyRanges clamps, filters, and merges dirty ranges for a buffer.
+//
+// Tree-sitter and styling operations expect valid row bounds. This function
+// takes arbitrary line ranges and converts them into a clean canonical form
+// based on the current buffer size:
+//   - returns nil if there are no lines or no input ranges
+//   - clamps each range to [0, lineCount-1]
+//   - drops invalid ranges where start > end after clamping
+//   - merges overlapping or adjacent ranges
+//
+// The returned slice is safe to iterate directly for reparse/restyle passes.
+func normalizedDirtyRanges(ranges []lineRange, lineCount int) []lineRange {
+	if lineCount <= 0 || len(ranges) == 0 {
+		return nil
+	}
+
+	clamped := make([]lineRange, 0, len(ranges))
+	for _, r := range ranges {
+		start := max(0, r.start)
+		end := min(lineCount-1, r.end)
+		if start > end {
+			continue
+		}
+		clamped = append(clamped, lineRange{start: start, end: end})
+	}
+
+	return mergeRanges(clamped)
+}
+
+// mergeRanges sorts and coalesces line ranges into a minimal non-overlapping set.
+//
+// Two ranges are merged when they overlap or touch (for example [1,3] and [4,6]
+// become [1,6]). Treating adjacent ranges as one avoids unnecessary splits in
+// later highlighting logic.
+//
+// Note: this function sorts the provided slice in place before building and
+// returning a merged result.
+func mergeRanges(ranges []lineRange) []lineRange {
+	if len(ranges) == 0 {
+		return nil
+	}
+
+	sort.Slice(ranges, func(i, j int) bool {
+		if ranges[i].start == ranges[j].start {
+			return ranges[i].end < ranges[j].end
+		}
+		return ranges[i].start < ranges[j].start
+	})
+
+	merged := make([]lineRange, 0, len(ranges))
+	cur := ranges[0]
+	for i := 1; i < len(ranges); i++ {
+		n := ranges[i]
+		if n.start <= cur.end+1 {
+			if n.end > cur.end {
+				cur.end = n.end
+			}
+			continue
+		}
+		merged = append(merged, cur)
+		cur = n
+	}
+	merged = append(merged, cur)
+	return merged
+}
+
+// rowInRanges reports whether a row index is covered by any range.
+//
+// This is a simple membership check used by update paths that need to decide
+// whether a specific line should be recomputed.
+func rowInRanges(row int, ranges []lineRange) bool {
+	for _, r := range ranges {
+		if row >= r.start && row <= r.end {
+			return true
+		}
+	}
+	return false
+}
+
+// defaultLineStyles creates a style-per-rune slice initialized with base.
+//
+// The highlighter applies styles at rune granularity (not byte granularity) so
+// multibyte UTF-8 characters still map to exactly one style entry per visible
+// character. This function produces the baseline style row before syntax
+// captures overwrite specific spans.
+func defaultLineStyles(line string, base lipgloss.Style) []lipgloss.Style {
+	runes := []rune(line)
+	row := make([]lipgloss.Style, len(runes))
+	for i := range row {
+		row[i] = base
+	}
+	return row
+}
+
+// collectCaptures consumes a Tree-sitter capture iterator into local ranges.
+//
+// For each capture returned by the query iterator, it resolves the capture name
+// and records start/end row+column coordinates as a captureRange. These ranges
+// are then used by the renderer to map syntax names to concrete styles.
+//
+// Special handling:
+//   - nil query yields nil output
+//   - capture indexes outside query.CaptureNames() are ignored defensively
+//   - captures named "spell" are skipped, because spell-check is handled by a
+//     separate pass and should not be treated as a syntax-highlight capture here
+func collectCaptures(iter sitter.QueryCaptures, query *sitter.Query) []captureRange {
+	if query == nil {
+		return nil
+	}
+
+	names := query.CaptureNames()
+	out := []captureRange{}
+	for match, captureIdx := iter.Next(); match != nil; match, captureIdx = iter.Next() {
+		capture := match.Captures[captureIdx]
+		if int(capture.Index) >= len(names) {
+			continue
+		}
+		name := names[capture.Index]
+		if name == "spell" {
+			continue
+		}
+
+		node := capture.Node
+		start := node.StartPosition()
+		end := node.EndPosition()
+		out = append(out, captureRange{
+			startRow: start.Row,
+			startCol: start.Column,
+			endRow:   end.Row,
+			endCol:   end.Column,
+			name:     name,
+		})
+	}
+
+	return out
+}
+
+// buildBufferSource flattens the editor buffer into a single newline-delimited
+// byte slice suitable for Tree-sitter parsing.
+//
+// The buffer stores text as lines, while Tree-sitter expects one contiguous
+// source blob. This helper joins all lines with '\n' separators, preserving row
+// structure expected by parser positions.
+func buildBufferSource(buf *core.Buffer) []byte {
+	lineCount := buf.LineCount()
+	if lineCount == 0 {
+		return []byte{}
+	}
+
+	lines := make([]string, lineCount)
+	for i := range lineCount {
+		lines[i] = buf.Line(i)
+	}
+
+	return []byte(strings.Join(lines, "\n"))
+}
+
+// byteColToRuneIndex converts a byte-based column offset to a rune index.
+//
+// Tree-sitter positions use byte columns, while the renderer/highlighter often
+// indexes text by runes so multibyte UTF-8 characters are handled correctly.
+// This conversion keeps style slicing aligned with displayed characters.
+//
+// Boundary behavior:
+//   - byteCol <= 0 -> 0
+//   - byteCol >= len(line) -> rune length of the entire line
+func byteColToRuneIndex(line []byte, byteCol int) int {
+	if byteCol <= 0 {
+		return 0
+	}
+	if byteCol >= len(line) {
+		return len([]rune(string(line)))
+	}
+
+	prefix := line[:byteCol]
+	return len([]rune(string(prefix)))
+}