Gim/internal/syntax/treesitter_utils.go
2026-04-07 20:21:41 -07:00

213 lines
6.3 KiB
Go

package syntax
import (
"sort"
"strings"
"git.gophernest.net/azpect/TextEditor/internal/core"
"github.com/charmbracelet/lipgloss"
sitter "github.com/tree-sitter/go-tree-sitter"
)
// addDirtyRange records a potentially changed line span in the buffer cache.
//
// The parser/highlighter keeps a list of "dirty" line ranges that must be
// reparsed or restyled after edits. This helper makes sure the incoming range
// is safe and normalized before storing it:
// - nil cache is ignored (defensive early-return)
// - start/end are swapped if the caller passed them in reverse order
// - negative values are clamped to 0
//
// After appending the new range, it merges overlaps/adjacent ranges so the
// dirty list stays compact and avoids duplicate work during incremental updates.
func addDirtyRange(bc *bufferCache, start, end int) {
if bc == nil {
return
}
if end < start {
start, end = end, start
}
start = max(0, start)
end = max(0, end)
bc.dirty = append(bc.dirty, lineRange{start: start, end: end})
bc.dirty = mergeRanges(bc.dirty)
}
// normalizedDirtyRanges clamps, filters, and merges dirty ranges for a buffer.
//
// Tree-sitter and styling operations expect valid row bounds. This function
// takes arbitrary line ranges and converts them into a clean canonical form
// based on the current buffer size:
// - returns nil if there are no lines or no input ranges
// - clamps each range to [0, lineCount-1]
// - drops invalid ranges where start > end after clamping
// - merges overlapping or adjacent ranges
//
// The returned slice is safe to iterate directly for reparse/restyle passes.
func normalizedDirtyRanges(ranges []lineRange, lineCount int) []lineRange {
if lineCount <= 0 || len(ranges) == 0 {
return nil
}
clamped := make([]lineRange, 0, len(ranges))
for _, r := range ranges {
start := max(0, r.start)
end := min(lineCount-1, r.end)
if start > end {
continue
}
clamped = append(clamped, lineRange{start: start, end: end})
}
return mergeRanges(clamped)
}
// mergeRanges sorts and coalesces line ranges into a minimal non-overlapping set.
//
// Two ranges are merged when they overlap or touch (for example [1,3] and [4,6]
// become [1,6]). Treating adjacent ranges as one avoids unnecessary splits in
// later highlighting logic.
//
// Note: this function sorts the provided slice in place before building and
// returning a merged result.
func mergeRanges(ranges []lineRange) []lineRange {
if len(ranges) == 0 {
return nil
}
sort.Slice(ranges, func(i, j int) bool {
if ranges[i].start == ranges[j].start {
return ranges[i].end < ranges[j].end
}
return ranges[i].start < ranges[j].start
})
merged := make([]lineRange, 0, len(ranges))
cur := ranges[0]
for i := 1; i < len(ranges); i++ {
n := ranges[i]
if n.start <= cur.end+1 {
if n.end > cur.end {
cur.end = n.end
}
continue
}
merged = append(merged, cur)
cur = n
}
merged = append(merged, cur)
return merged
}
// rowInRanges reports whether a row index is covered by any range.
//
// This is a simple membership check used by update paths that need to decide
// whether a specific line should be recomputed.
func rowInRanges(row int, ranges []lineRange) bool {
for _, r := range ranges {
if row >= r.start && row <= r.end {
return true
}
}
return false
}
// defaultLineStyles creates a style-per-rune slice initialized with base.
//
// The highlighter applies styles at rune granularity (not byte granularity) so
// multibyte UTF-8 characters still map to exactly one style entry per visible
// character. This function produces the baseline style row before syntax
// captures overwrite specific spans.
func defaultLineStyles(line string, base lipgloss.Style) []lipgloss.Style {
runes := []rune(line)
row := make([]lipgloss.Style, len(runes))
for i := range row {
row[i] = base
}
return row
}
// collectCaptures consumes a Tree-sitter capture iterator into local ranges.
//
// For each capture returned by the query iterator, it resolves the capture name
// and records start/end row+column coordinates as a captureRange. These ranges
// are then used by the renderer to map syntax names to concrete styles.
//
// Special handling:
// - nil query yields nil output
// - capture indexes outside query.CaptureNames() are ignored defensively
// - captures named "spell" are skipped, because spell-check is handled by a
// separate pass and should not be treated as a syntax-highlight capture here
func collectCaptures(iter sitter.QueryCaptures, query *sitter.Query) []captureRange {
if query == nil {
return nil
}
names := query.CaptureNames()
out := []captureRange{}
for match, captureIdx := iter.Next(); match != nil; match, captureIdx = iter.Next() {
capture := match.Captures[captureIdx]
if int(capture.Index) >= len(names) {
continue
}
name := names[capture.Index]
if name == "spell" {
continue
}
node := capture.Node
start := node.StartPosition()
end := node.EndPosition()
out = append(out, captureRange{
startRow: start.Row,
startCol: start.Column,
endRow: end.Row,
endCol: end.Column,
name: name,
})
}
return out
}
// buildBufferSource flattens the editor buffer into a single newline-delimited
// byte slice suitable for Tree-sitter parsing.
//
// The buffer stores text as lines, while Tree-sitter expects one contiguous
// source blob. This helper joins all lines with '\n' separators, preserving row
// structure expected by parser positions.
func buildBufferSource(buf *core.Buffer) []byte {
lineCount := buf.LineCount()
if lineCount == 0 {
return []byte{}
}
lines := make([]string, lineCount)
for i := range lineCount {
lines[i] = buf.Line(i)
}
return []byte(strings.Join(lines, "\n"))
}
// byteColToRuneIndex converts a byte-based column offset to a rune index.
//
// Tree-sitter positions use byte columns, while the renderer/highlighter often
// indexes text by runes so multibyte UTF-8 characters are handled correctly.
// This conversion keeps style slicing aligned with displayed characters.
//
// Boundary behavior:
// - byteCol <= 0 -> 0
// - byteCol >= len(line) -> rune length of the entire line
func byteColToRuneIndex(line []byte, byteCol int) int {
if byteCol <= 0 {
return 0
}
if byteCol >= len(line) {
return len([]rune(string(line)))
}
prefix := line[:byteCol]
return len([]rune(string(prefix)))
}