diff --git a/internal/syntax/treesitter.go b/internal/syntax/treesitter.go index 723b545..258f948 100644 --- a/internal/syntax/treesitter.go +++ b/internal/syntax/treesitter.go @@ -3,7 +3,6 @@ package syntax import ( "bytes" "sort" - "strings" "git.gophernest.net/azpect/TextEditor/internal/core" "git.gophernest.net/azpect/TextEditor/internal/style" @@ -11,6 +10,15 @@ import ( sitter "github.com/tree-sitter/go-tree-sitter" ) +// TreeSitterEngine provides syntax highlighting using Tree-sitter queries. +// +// The engine stores per-buffer parser state and a cached style map so redraws +// can reuse prior work. It supports both full rebuilds and incremental edits: +// - full rebuilds when a buffer is first seen, language changes, or state is invalid +// - incremental updates when ApplyEdit provides enough information to reparse +// only changed regions +// +// Cached styles are represented as one style per rune for each line. type TreeSitterEngine struct { styles style.Styles registry *languageRegistry @@ -18,6 +26,11 @@ type TreeSitterEngine struct { cache map[*core.Buffer]*bufferCache } +// bufferCache stores all derived highlighting state for a single buffer. +// +// It contains both style output (`lines`) and parse/query state (`parser`, +// `tree`, `source`, language/query bindings) so the engine can incrementally +// update only dirty lines instead of recomputing the whole file each frame. type bufferCache struct { built bool lines map[int][]lipgloss.Style @@ -34,11 +47,19 @@ type bufferCache struct { query *sitter.Query } +// lineRange is an inclusive line interval [start, end]. +// +// Dirty tracking and partial restyling use this type to represent which rows +// need work. type lineRange struct { start int end int } +// captureRange describes one Tree-sitter capture span. +// +// Coordinates are in row/byte-column space, matching Tree-sitter node +// positions. The range is later converted to rune indexes for style writes. type captureRange struct { startRow uint startCol uint @@ -47,11 +68,10 @@ type captureRange struct { name string } -// NewTreeSitterEngine: Creates a new tree sitter engine with the styles -// provided attached. +// NewTreeSitterEngine constructs a TreeSitterEngine with the provided style set. // -// Currently, this engine only support GoLang. But more languages can be -// added with easy. +// Language support is resolved through the language registry, so the engine can +// work with any language/query pair registered there. func NewTreeSitterEngine(styles style.Styles) *TreeSitterEngine { return &TreeSitterEngine{ styles: styles, @@ -60,6 +80,14 @@ func NewTreeSitterEngine(styles style.Styles) *TreeSitterEngine { } } +// PrepareBuffer ensures highlighting data for buf is ready to read. +// +// This method is idempotent: if cached styles are already valid (`built`), it +// returns immediately. Otherwise it resolves language support and performs a +// rebuild pass (full or dirty-range-based) to refresh `bc.lines`. +// +// If the buffer language is unsupported or resolution fails, it still marks the +// cache as built with an empty style map so callers can safely continue. func (e *TreeSitterEngine) PrepareBuffer(buf *core.Buffer) { // Cannot prepare a nil buffer if buf == nil { @@ -90,6 +118,11 @@ func (e *TreeSitterEngine) PrepareBuffer(buf *core.Buffer) { e.buildFullBuffer(buf, bc) } +// LineStyleMap returns the style row for a specific line in buf. +// +// It first guarantees buffer preparation, then returns cached styles when +// available. Missing lines are lazily initialized to the base line style and +// stored in cache to keep downstream rendering logic simple. func (e *TreeSitterEngine) LineStyleMap(buf *core.Buffer, line int) []lipgloss.Style { if buf == nil { return nil @@ -111,6 +144,17 @@ func (e *TreeSitterEngine) LineStyleMap(buf *core.Buffer, line int) []lipgloss.S return out } +// ApplyEdit applies an incremental buffer edit to parser and style cache state. +// +// Workflow: +// - validate buffer and language support +// - apply the edit to the current parse tree (InputEdit) +// - reparse using the previous tree as incremental context +// - collect changed line ranges from both the user edit and parser changes +// - mark cache as unbuilt so the next PrepareBuffer restyles only dirty areas +// +// If incremental parsing cannot proceed (missing parser/tree/source or parse +// failure), it falls back to a full-dirty rebuild on the next preparation. func (e *TreeSitterEngine) ApplyEdit(buf *core.Buffer, edit *core.BufferEdit) { if buf == nil || edit == nil { return @@ -172,8 +216,9 @@ func (e *TreeSitterEngine) ApplyEdit(buf *core.Buffer, edit *core.BufferEdit) { bc.built = false } -// TreeSitterEngine.InvalidateBuffer: Deletes the entire buffers cache from the engine. If the -// buffer provided is nil, this function does nothing. +// InvalidateBuffer marks all cached highlighting data for buf as stale. +// +// The next PrepareBuffer call will rebuild styles from scratch for the buffer. func (e *TreeSitterEngine) InvalidateBuffer(buf *core.Buffer) { if buf == nil { return @@ -184,9 +229,11 @@ func (e *TreeSitterEngine) InvalidateBuffer(buf *core.Buffer) { bc.dirty = nil } -// TreeSitterEngine.InvalidateLines: Deletes lines between start and end (inclusive) from the -// buffers cache. Then marks the cache as "unbuilt." If the buffer provided is nil, this function -// does nothing. +// InvalidateLines marks a line interval in buf as dirty. +// +// The range is inclusive and normalized by addDirtyRange. On the next +// preparation pass, those lines (plus capture-context neighbors) are +// recalculated while unchanged lines are preserved. func (e *TreeSitterEngine) InvalidateLines(buf *core.Buffer, startLine, endLine int) { if buf == nil { return @@ -196,8 +243,14 @@ func (e *TreeSitterEngine) InvalidateLines(buf *core.Buffer, startLine, endLine bc.built = false } -// TreeSitterEngine.supportsBuffer: Returns whether the buffer can be parsed and highlighted -// by the engine. When false, there should be a fallback. +// resolveBufferLanguage resolves and applies language/query config for buf. +// +// It asks the registry to resolve filetype/filename to a concrete language id, +// language object, and highlight query. When the resolved language id changes, +// parser/query bindings are updated and the cache is marked dirty for rebuild. +// +// Returns (resolved, true, nil) on success. When unsupported it returns +// (nil, false, nil). Resolution errors are returned as the third value. func (e *TreeSitterEngine) resolveBufferLanguage(buf *core.Buffer, bc *bufferCache) (*resolvedLanguage, bool, error) { if e.registry == nil { e.registry = newLanguageRegistry() @@ -222,8 +275,10 @@ func (e *TreeSitterEngine) resolveBufferLanguage(buf *core.Buffer, bc *bufferCac return resolved, true, nil } -// TreeSitterEngine.getCache: Returns the buffers cache. If the cache does not exist, a new one -// is created and applied to the engines cache map. +// getCache returns the cache object associated with buf, creating it if needed. +// +// New caches start with an initialized lines map and default zero-values for +// parse/highlight state. func (e *TreeSitterEngine) getCache(buf *core.Buffer) *bufferCache { if bc, ok := e.cache[buf]; ok { return bc @@ -233,6 +288,15 @@ func (e *TreeSitterEngine) getCache(buf *core.Buffer) *bufferCache { return bc } +// buildFullBuffer rebuilds highlight styles for buf using current cache state. +// +// Despite the name, this method handles both full and partial updates: +// - full rebuild: reset every line to base style, query entire file +// - partial rebuild: reset only dirty lines, query around dirty ranges +// +// It (re)parses source when needed, collects query captures, sorts captures by +// precedence order, then writes styles onto per-rune line slices. After a +// successful pass it clears dirty flags and marks the cache as built. func (e *TreeSitterEngine) buildFullBuffer(buf *core.Buffer, bc *bufferCache) { lineCount := buf.LineCount() @@ -378,143 +442,3 @@ func (e *TreeSitterEngine) buildFullBuffer(buf *core.Buffer, bc *bufferCache) { bc.count = lineCount bc.built = true } - -func addDirtyRange(bc *bufferCache, start, end int) { - if bc == nil { - return - } - if end < start { - start, end = end, start - } - if start < 0 { - start = 0 - } - if end < 0 { - end = 0 - } - bc.dirty = append(bc.dirty, lineRange{start: start, end: end}) - bc.dirty = mergeRanges(bc.dirty) -} - -func normalizedDirtyRanges(ranges []lineRange, lineCount int) []lineRange { - if lineCount <= 0 || len(ranges) == 0 { - return nil - } - - clamped := make([]lineRange, 0, len(ranges)) - for _, r := range ranges { - start := max(0, r.start) - end := min(lineCount-1, r.end) - if start > end { - continue - } - clamped = append(clamped, lineRange{start: start, end: end}) - } - - return mergeRanges(clamped) -} - -func mergeRanges(ranges []lineRange) []lineRange { - if len(ranges) == 0 { - return nil - } - - sort.Slice(ranges, func(i, j int) bool { - if ranges[i].start == ranges[j].start { - return ranges[i].end < ranges[j].end - } - return ranges[i].start < ranges[j].start - }) - - merged := make([]lineRange, 0, len(ranges)) - cur := ranges[0] - for i := 1; i < len(ranges); i++ { - n := ranges[i] - if n.start <= cur.end+1 { - if n.end > cur.end { - cur.end = n.end - } - continue - } - merged = append(merged, cur) - cur = n - } - merged = append(merged, cur) - return merged -} - -func rowInRanges(row int, ranges []lineRange) bool { - for _, r := range ranges { - if row >= r.start && row <= r.end { - return true - } - } - return false -} - -func defaultLineStyles(line string, base lipgloss.Style) []lipgloss.Style { - runes := []rune(line) - row := make([]lipgloss.Style, len(runes)) - for i := range row { - row[i] = base - } - return row -} - -func collectCaptures(iter sitter.QueryCaptures, query *sitter.Query) []captureRange { - if query == nil { - return nil - } - - names := query.CaptureNames() - out := []captureRange{} - for match, captureIdx := iter.Next(); match != nil; match, captureIdx = iter.Next() { - capture := match.Captures[captureIdx] - if int(capture.Index) >= len(names) { - continue - } - name := names[capture.Index] - if name == "spell" { - continue - } - - node := capture.Node - start := node.StartPosition() - end := node.EndPosition() - out = append(out, captureRange{ - startRow: start.Row, - startCol: start.Column, - endRow: end.Row, - endCol: end.Column, - name: name, - }) - } - - return out -} - -func buildBufferSource(buf *core.Buffer) []byte { - lineCount := buf.LineCount() - if lineCount == 0 { - return []byte{} - } - - lines := make([]string, lineCount) - for i := range lineCount { - lines[i] = buf.Line(i) - } - - return []byte(strings.Join(lines, "\n")) -} - -func byteColToRuneIndex(line []byte, byteCol int) int { - if byteCol <= 0 { - return 0 - } - if byteCol >= len(line) { - return len([]rune(string(line))) - } - - prefix := line[:byteCol] - return len([]rune(string(prefix))) -} diff --git a/internal/syntax/treesitter_utils.go b/internal/syntax/treesitter_utils.go new file mode 100644 index 0000000..52ebc35 --- /dev/null +++ b/internal/syntax/treesitter_utils.go @@ -0,0 +1,212 @@ +package syntax + +import ( + "sort" + "strings" + + "git.gophernest.net/azpect/TextEditor/internal/core" + "github.com/charmbracelet/lipgloss" + sitter "github.com/tree-sitter/go-tree-sitter" +) + +// addDirtyRange records a potentially changed line span in the buffer cache. +// +// The parser/highlighter keeps a list of "dirty" line ranges that must be +// reparsed or restyled after edits. This helper makes sure the incoming range +// is safe and normalized before storing it: +// - nil cache is ignored (defensive early-return) +// - start/end are swapped if the caller passed them in reverse order +// - negative values are clamped to 0 +// +// After appending the new range, it merges overlaps/adjacent ranges so the +// dirty list stays compact and avoids duplicate work during incremental updates. +func addDirtyRange(bc *bufferCache, start, end int) { + if bc == nil { + return + } + if end < start { + start, end = end, start + } + start = max(0, start) + end = max(0, end) + bc.dirty = append(bc.dirty, lineRange{start: start, end: end}) + bc.dirty = mergeRanges(bc.dirty) +} + +// normalizedDirtyRanges clamps, filters, and merges dirty ranges for a buffer. +// +// Tree-sitter and styling operations expect valid row bounds. This function +// takes arbitrary line ranges and converts them into a clean canonical form +// based on the current buffer size: +// - returns nil if there are no lines or no input ranges +// - clamps each range to [0, lineCount-1] +// - drops invalid ranges where start > end after clamping +// - merges overlapping or adjacent ranges +// +// The returned slice is safe to iterate directly for reparse/restyle passes. +func normalizedDirtyRanges(ranges []lineRange, lineCount int) []lineRange { + if lineCount <= 0 || len(ranges) == 0 { + return nil + } + + clamped := make([]lineRange, 0, len(ranges)) + for _, r := range ranges { + start := max(0, r.start) + end := min(lineCount-1, r.end) + if start > end { + continue + } + clamped = append(clamped, lineRange{start: start, end: end}) + } + + return mergeRanges(clamped) +} + +// mergeRanges sorts and coalesces line ranges into a minimal non-overlapping set. +// +// Two ranges are merged when they overlap or touch (for example [1,3] and [4,6] +// become [1,6]). Treating adjacent ranges as one avoids unnecessary splits in +// later highlighting logic. +// +// Note: this function sorts the provided slice in place before building and +// returning a merged result. +func mergeRanges(ranges []lineRange) []lineRange { + if len(ranges) == 0 { + return nil + } + + sort.Slice(ranges, func(i, j int) bool { + if ranges[i].start == ranges[j].start { + return ranges[i].end < ranges[j].end + } + return ranges[i].start < ranges[j].start + }) + + merged := make([]lineRange, 0, len(ranges)) + cur := ranges[0] + for i := 1; i < len(ranges); i++ { + n := ranges[i] + if n.start <= cur.end+1 { + if n.end > cur.end { + cur.end = n.end + } + continue + } + merged = append(merged, cur) + cur = n + } + merged = append(merged, cur) + return merged +} + +// rowInRanges reports whether a row index is covered by any range. +// +// This is a simple membership check used by update paths that need to decide +// whether a specific line should be recomputed. +func rowInRanges(row int, ranges []lineRange) bool { + for _, r := range ranges { + if row >= r.start && row <= r.end { + return true + } + } + return false +} + +// defaultLineStyles creates a style-per-rune slice initialized with base. +// +// The highlighter applies styles at rune granularity (not byte granularity) so +// multibyte UTF-8 characters still map to exactly one style entry per visible +// character. This function produces the baseline style row before syntax +// captures overwrite specific spans. +func defaultLineStyles(line string, base lipgloss.Style) []lipgloss.Style { + runes := []rune(line) + row := make([]lipgloss.Style, len(runes)) + for i := range row { + row[i] = base + } + return row +} + +// collectCaptures consumes a Tree-sitter capture iterator into local ranges. +// +// For each capture returned by the query iterator, it resolves the capture name +// and records start/end row+column coordinates as a captureRange. These ranges +// are then used by the renderer to map syntax names to concrete styles. +// +// Special handling: +// - nil query yields nil output +// - capture indexes outside query.CaptureNames() are ignored defensively +// - captures named "spell" are skipped, because spell-check is handled by a +// separate pass and should not be treated as a syntax-highlight capture here +func collectCaptures(iter sitter.QueryCaptures, query *sitter.Query) []captureRange { + if query == nil { + return nil + } + + names := query.CaptureNames() + out := []captureRange{} + for match, captureIdx := iter.Next(); match != nil; match, captureIdx = iter.Next() { + capture := match.Captures[captureIdx] + if int(capture.Index) >= len(names) { + continue + } + name := names[capture.Index] + if name == "spell" { + continue + } + + node := capture.Node + start := node.StartPosition() + end := node.EndPosition() + out = append(out, captureRange{ + startRow: start.Row, + startCol: start.Column, + endRow: end.Row, + endCol: end.Column, + name: name, + }) + } + + return out +} + +// buildBufferSource flattens the editor buffer into a single newline-delimited +// byte slice suitable for Tree-sitter parsing. +// +// The buffer stores text as lines, while Tree-sitter expects one contiguous +// source blob. This helper joins all lines with '\n' separators, preserving row +// structure expected by parser positions. +func buildBufferSource(buf *core.Buffer) []byte { + lineCount := buf.LineCount() + if lineCount == 0 { + return []byte{} + } + + lines := make([]string, lineCount) + for i := range lineCount { + lines[i] = buf.Line(i) + } + + return []byte(strings.Join(lines, "\n")) +} + +// byteColToRuneIndex converts a byte-based column offset to a rune index. +// +// Tree-sitter positions use byte columns, while the renderer/highlighter often +// indexes text by runes so multibyte UTF-8 characters are handled correctly. +// This conversion keeps style slicing aligned with displayed characters. +// +// Boundary behavior: +// - byteCol <= 0 -> 0 +// - byteCol >= len(line) -> rune length of the entire line +func byteColToRuneIndex(line []byte, byteCol int) int { + if byteCol <= 0 { + return 0 + } + if byteCol >= len(line) { + return len([]rune(string(line))) + } + + prefix := line[:byteCol] + return len([]rune(string(prefix))) +}