445 lines
13 KiB
Go
445 lines
13 KiB
Go
package syntax
|
|
|
|
import (
|
|
"bytes"
|
|
"sort"
|
|
|
|
"git.gophernest.net/azpect/TextEditor/internal/core"
|
|
"git.gophernest.net/azpect/TextEditor/internal/theme"
|
|
"github.com/charmbracelet/lipgloss"
|
|
sitter "github.com/tree-sitter/go-tree-sitter"
|
|
)
|
|
|
|
// TreeSitterEngine provides syntax highlighting using Tree-sitter queries.
|
|
//
|
|
// The engine stores per-buffer parser state and a cached style map so redraws
|
|
// can reuse prior work. It supports both full rebuilds and incremental edits:
|
|
// - full rebuilds when a buffer is first seen, language changes, or state is invalid
|
|
// - incremental updates when ApplyEdit provides enough information to reparse
|
|
// only changed regions
|
|
//
|
|
// Cached styles are represented as one style per rune for each line.
|
|
type TreeSitterEngine struct {
|
|
editorTheme theme.EditorTheme
|
|
registry *languageRegistry
|
|
|
|
cache map[*core.Buffer]*bufferCache
|
|
}
|
|
|
|
// bufferCache stores all derived highlighting state for a single buffer.
|
|
//
|
|
// It contains both style output (`lines`) and parse/query state (`parser`,
|
|
// `tree`, `source`, language/query bindings) so the engine can incrementally
|
|
// update only dirty lines instead of recomputing the whole file each frame.
|
|
type bufferCache struct {
|
|
built bool
|
|
lines map[int][]lipgloss.Style
|
|
count int
|
|
|
|
parser *sitter.Parser
|
|
tree *sitter.Tree
|
|
source []byte
|
|
dirtyAll bool
|
|
dirty []lineRange
|
|
|
|
langID string
|
|
language *sitter.Language
|
|
query *sitter.Query
|
|
}
|
|
|
|
// lineRange is an inclusive line interval [start, end].
|
|
//
|
|
// Dirty tracking and partial restyling use this type to represent which rows
|
|
// need work.
|
|
type lineRange struct {
|
|
start int
|
|
end int
|
|
}
|
|
|
|
// captureRange describes one Tree-sitter capture span.
|
|
//
|
|
// Coordinates are in row/byte-column space, matching Tree-sitter node
|
|
// positions. The range is later converted to rune indexes for style writes.
|
|
type captureRange struct {
|
|
startRow uint
|
|
startCol uint
|
|
endRow uint
|
|
endCol uint
|
|
name string
|
|
}
|
|
|
|
// NewTreeSitterEngine constructs a TreeSitterEngine with the provided style set.
|
|
//
|
|
// Language support is resolved through the language registry, so the engine can
|
|
// work with any language/query pair registered there.
|
|
func NewTreeSitterEngine(t theme.EditorTheme) *TreeSitterEngine {
|
|
return &TreeSitterEngine{
|
|
editorTheme: t,
|
|
registry: newLanguageRegistry(),
|
|
cache: map[*core.Buffer]*bufferCache{},
|
|
}
|
|
}
|
|
|
|
// PrepareBuffer ensures highlighting data for buf is ready to read.
|
|
//
|
|
// This method is idempotent: if cached styles are already valid (`built`), it
|
|
// returns immediately. Otherwise it resolves language support and performs a
|
|
// rebuild pass (full or dirty-range-based) to refresh `bc.lines`.
|
|
//
|
|
// If the buffer language is unsupported or resolution fails, it still marks the
|
|
// cache as built with an empty style map so callers can safely continue.
|
|
func (e *TreeSitterEngine) PrepareBuffer(buf *core.Buffer) {
|
|
// Cannot prepare a nil buffer
|
|
if buf == nil {
|
|
return
|
|
}
|
|
|
|
// Get the buffers cache and return if we are already "built" (ready to render).
|
|
bc := e.getCache(buf)
|
|
if bc.count != buf.LineCount() {
|
|
bc.dirtyAll = true
|
|
}
|
|
if bc.dirtyAll {
|
|
bc.built = false
|
|
}
|
|
if bc.built {
|
|
return
|
|
}
|
|
|
|
// If we do no support the buffer, load empty styles into the cache
|
|
lang, ok, err := e.resolveBufferLanguage(buf, bc)
|
|
if err != nil || !ok {
|
|
bc.lines = map[int][]lipgloss.Style{}
|
|
bc.built = true
|
|
return
|
|
}
|
|
_ = lang
|
|
|
|
e.buildFullBuffer(buf, bc)
|
|
}
|
|
|
|
// LineStyleMap returns the style row for a specific line in buf.
|
|
//
|
|
// It first guarantees buffer preparation, then returns cached styles when
|
|
// available. Missing lines are lazily initialized to the base line style and
|
|
// stored in cache to keep downstream rendering logic simple.
|
|
func (e *TreeSitterEngine) LineStyleMap(buf *core.Buffer, line int) []lipgloss.Style {
|
|
if buf == nil {
|
|
return nil
|
|
}
|
|
|
|
e.PrepareBuffer(buf)
|
|
bc := e.getCache(buf)
|
|
|
|
if s, ok := bc.lines[line]; ok {
|
|
return s
|
|
}
|
|
|
|
runes := []rune(buf.Line(line))
|
|
out := make([]lipgloss.Style, len(runes))
|
|
for i := range out {
|
|
out[i] = e.editorTheme.Line
|
|
}
|
|
bc.lines[line] = out
|
|
return out
|
|
}
|
|
|
|
// ApplyEdit applies an incremental buffer edit to parser and style cache state.
|
|
//
|
|
// Workflow:
|
|
// - validate buffer and language support
|
|
// - apply the edit to the current parse tree (InputEdit)
|
|
// - reparse using the previous tree as incremental context
|
|
// - collect changed line ranges from both the user edit and parser changes
|
|
// - mark cache as unbuilt so the next PrepareBuffer restyles only dirty areas
|
|
//
|
|
// If incremental parsing cannot proceed (missing parser/tree/source or parse
|
|
// failure), it falls back to a full-dirty rebuild on the next preparation.
|
|
func (e *TreeSitterEngine) ApplyEdit(buf *core.Buffer, edit *core.BufferEdit) {
|
|
if buf == nil || edit == nil {
|
|
return
|
|
}
|
|
|
|
bc := e.getCache(buf)
|
|
lang, ok, err := e.resolveBufferLanguage(buf, bc)
|
|
if err != nil || !ok {
|
|
bc.built = false
|
|
bc.dirtyAll = true
|
|
return
|
|
}
|
|
_ = lang
|
|
|
|
if bc.parser == nil {
|
|
bc.parser = sitter.NewParser()
|
|
bc.parser.SetLanguage(bc.language)
|
|
}
|
|
|
|
if bc.tree == nil || len(bc.source) == 0 {
|
|
bc.dirtyAll = true
|
|
return
|
|
}
|
|
|
|
bc.tree.Edit(&sitter.InputEdit{
|
|
StartByte: edit.StartByte,
|
|
OldEndByte: edit.OldEndByte,
|
|
NewEndByte: edit.NewEndByte,
|
|
StartPosition: sitter.NewPoint(edit.StartPoint.Row, edit.StartPoint.Column),
|
|
OldEndPosition: sitter.NewPoint(edit.OldEndPoint.Row, edit.OldEndPoint.Column),
|
|
NewEndPosition: sitter.NewPoint(edit.NewEndPoint.Row, edit.NewEndPoint.Column),
|
|
})
|
|
|
|
newSource := buildBufferSource(buf)
|
|
newTree := bc.parser.Parse(newSource, bc.tree)
|
|
if newTree == nil {
|
|
bc.dirtyAll = true
|
|
return
|
|
}
|
|
|
|
changed := bc.tree.ChangedRanges(newTree)
|
|
|
|
newLineCount := buf.LineCount()
|
|
if newLineCount != bc.count {
|
|
bc.dirtyAll = true
|
|
bc.dirty = nil
|
|
} else {
|
|
startRow := int(edit.StartPoint.Row)
|
|
endRow := int(max(edit.OldEndPoint.Row, edit.NewEndPoint.Row))
|
|
addDirtyRange(bc, startRow, endRow)
|
|
for _, r := range changed {
|
|
addDirtyRange(bc, int(r.StartPoint.Row), int(r.EndPoint.Row))
|
|
}
|
|
}
|
|
|
|
bc.source = newSource
|
|
bc.tree.Close()
|
|
bc.tree = newTree
|
|
bc.built = false
|
|
}
|
|
|
|
// InvalidateBuffer marks all cached highlighting data for buf as stale.
|
|
//
|
|
// The next PrepareBuffer call will rebuild styles from scratch for the buffer.
|
|
func (e *TreeSitterEngine) InvalidateBuffer(buf *core.Buffer) {
|
|
if buf == nil {
|
|
return
|
|
}
|
|
bc := e.getCache(buf)
|
|
bc.built = false
|
|
bc.dirtyAll = true
|
|
bc.dirty = nil
|
|
}
|
|
|
|
// InvalidateLines marks a line interval in buf as dirty.
|
|
//
|
|
// The range is inclusive and normalized by addDirtyRange. On the next
|
|
// preparation pass, those lines (plus capture-context neighbors) are
|
|
// recalculated while unchanged lines are preserved.
|
|
func (e *TreeSitterEngine) InvalidateLines(buf *core.Buffer, startLine, endLine int) {
|
|
if buf == nil {
|
|
return
|
|
}
|
|
bc := e.getCache(buf)
|
|
addDirtyRange(bc, startLine, endLine)
|
|
bc.built = false
|
|
}
|
|
|
|
// resolveBufferLanguage resolves and applies language/query config for buf.
|
|
//
|
|
// It asks the registry to resolve filetype/filename to a concrete language id,
|
|
// language object, and highlight query. When the resolved language id changes,
|
|
// parser/query bindings are updated and the cache is marked dirty for rebuild.
|
|
//
|
|
// Returns (resolved, true, nil) on success. When unsupported it returns
|
|
// (nil, false, nil). Resolution errors are returned as the third value.
|
|
func (e *TreeSitterEngine) resolveBufferLanguage(buf *core.Buffer, bc *bufferCache) (*resolvedLanguage, bool, error) {
|
|
if e.registry == nil {
|
|
e.registry = newLanguageRegistry()
|
|
}
|
|
|
|
resolved, ok, err := e.registry.resolve(buf.Filetype, buf.Filename)
|
|
if err != nil || !ok {
|
|
return nil, ok, err
|
|
}
|
|
|
|
if bc.langID != resolved.id {
|
|
bc.langID = resolved.id
|
|
bc.language = resolved.language
|
|
bc.query = resolved.query
|
|
if bc.parser != nil {
|
|
bc.parser.SetLanguage(bc.language)
|
|
}
|
|
bc.dirtyAll = true
|
|
bc.built = false
|
|
}
|
|
|
|
return resolved, true, nil
|
|
}
|
|
|
|
// getCache returns the cache object associated with buf, creating it if needed.
|
|
//
|
|
// New caches start with an initialized lines map and default zero-values for
|
|
// parse/highlight state.
|
|
func (e *TreeSitterEngine) getCache(buf *core.Buffer) *bufferCache {
|
|
if bc, ok := e.cache[buf]; ok {
|
|
return bc
|
|
}
|
|
bc := &bufferCache{lines: map[int][]lipgloss.Style{}}
|
|
e.cache[buf] = bc
|
|
return bc
|
|
}
|
|
|
|
// buildFullBuffer rebuilds highlight styles for buf using current cache state.
|
|
//
|
|
// Despite the name, this method handles both full and partial updates:
|
|
// - full rebuild: reset every line to base style, query entire file
|
|
// - partial rebuild: reset only dirty lines, query around dirty ranges
|
|
//
|
|
// It (re)parses source when needed, collects query captures, sorts captures by
|
|
// precedence order, then writes styles onto per-rune line slices. After a
|
|
// successful pass it clears dirty flags and marks the cache as built.
|
|
func (e *TreeSitterEngine) buildFullBuffer(buf *core.Buffer, bc *bufferCache) {
|
|
lineCount := buf.LineCount()
|
|
|
|
// Load the lines into memory. There is no method for this due to the buffers
|
|
// internal implementation using a gap buffer. So the "Lines" property is of
|
|
// type []*GapBuffer.
|
|
lines := make([]string, lineCount)
|
|
for i := range lineCount {
|
|
lines[i] = buf.Line(i)
|
|
}
|
|
|
|
fullRebuild := bc.dirtyAll || len(bc.lines) == 0 || len(bc.dirty) == 0
|
|
if fullRebuild {
|
|
bc.lines = map[int][]lipgloss.Style{}
|
|
for i := range lineCount {
|
|
bc.lines[i] = defaultLineStyles(lines[i], e.editorTheme.Line)
|
|
}
|
|
} else {
|
|
dirty := normalizedDirtyRanges(bc.dirty, lineCount)
|
|
for _, r := range dirty {
|
|
for i := r.start; i <= r.end; i++ {
|
|
bc.lines[i] = defaultLineStyles(lines[i], e.editorTheme.Line)
|
|
}
|
|
}
|
|
}
|
|
|
|
source := buildBufferSource(buf)
|
|
useCurrentTree := bc.tree != nil && bytes.Equal(bc.source, source)
|
|
|
|
if bc.parser == nil {
|
|
bc.parser = sitter.NewParser()
|
|
bc.parser.SetLanguage(bc.language)
|
|
}
|
|
|
|
if !useCurrentTree {
|
|
var baseTree *sitter.Tree
|
|
if bc.tree != nil {
|
|
baseTree = bc.tree
|
|
}
|
|
|
|
tree := bc.parser.Parse(source, baseTree)
|
|
if tree == nil {
|
|
bc.built = true
|
|
return
|
|
}
|
|
|
|
if bc.tree != nil {
|
|
bc.tree.Close()
|
|
}
|
|
bc.tree = tree
|
|
bc.source = source
|
|
}
|
|
|
|
root := bc.tree.RootNode()
|
|
cursor := sitter.NewQueryCursor()
|
|
defer cursor.Close()
|
|
|
|
var captures []captureRange
|
|
|
|
if fullRebuild {
|
|
iter := cursor.Captures(bc.query, root, source)
|
|
captures = append(captures, collectCaptures(iter, bc.query)...)
|
|
} else {
|
|
dirty := normalizedDirtyRanges(bc.dirty, lineCount)
|
|
for _, r := range dirty {
|
|
queryStart := max(0, r.start-1)
|
|
queryEnd := min(lineCount-1, r.end+1)
|
|
|
|
rangeCursor := sitter.NewQueryCursor()
|
|
rangeCursor.SetPointRange(
|
|
sitter.NewPoint(uint(queryStart), 0),
|
|
sitter.NewPoint(uint(queryEnd+1), 0),
|
|
)
|
|
iter := rangeCursor.Captures(bc.query, root, source)
|
|
captures = append(captures, collectCaptures(iter, bc.query)...)
|
|
rangeCursor.Close()
|
|
}
|
|
}
|
|
|
|
// Sort the captures in order of their character occurrence in the file
|
|
sort.Slice(captures, func(i, j int) bool {
|
|
if captures[i].startRow == captures[j].startRow {
|
|
if captures[i].startCol == captures[j].startCol {
|
|
if captures[i].endRow == captures[j].endRow {
|
|
return captures[i].endCol > captures[j].endCol
|
|
}
|
|
return captures[i].endRow > captures[j].endRow
|
|
}
|
|
return captures[i].startCol < captures[j].startCol
|
|
}
|
|
return captures[i].startRow < captures[j].startRow
|
|
})
|
|
|
|
// Basically, this code works by rewriting the same range and the last capture wins.
|
|
// This is a great spot for optimization: No need to draw many times, just pick the best one.
|
|
// Or maybe when we sort, if we find ones that are the same, remove the first one, and then
|
|
// we just keep the last one. Then this code can stay the same but will not suffer so many
|
|
// rewrites.
|
|
targetDirty := normalizedDirtyRanges(bc.dirty, lineCount)
|
|
for _, c := range captures {
|
|
sty := e.editorTheme.CaptureStyle(c.name)
|
|
for row := c.startRow; row <= c.endRow; row++ {
|
|
if int(row) >= len(lines) {
|
|
break
|
|
}
|
|
if !fullRebuild && !rowInRanges(int(row), targetDirty) {
|
|
continue
|
|
}
|
|
|
|
lineBytes := []byte(lines[row])
|
|
startByteCol := uint(0)
|
|
if row == c.startRow {
|
|
startByteCol = c.startCol
|
|
}
|
|
endByteCol := uint(len(lineBytes))
|
|
if row == c.endRow {
|
|
endByteCol = min(c.endCol, uint(len(lineBytes)))
|
|
}
|
|
|
|
startRune := byteColToRuneIndex(lineBytes, int(startByteCol))
|
|
endRune := byteColToRuneIndex(lineBytes, int(endByteCol))
|
|
|
|
rowStyles := bc.lines[int(row)]
|
|
if startRune < 0 {
|
|
startRune = 0
|
|
}
|
|
if endRune > len(rowStyles) {
|
|
endRune = len(rowStyles)
|
|
}
|
|
if startRune >= endRune {
|
|
continue
|
|
}
|
|
|
|
for i := startRune; i < endRune; i++ {
|
|
rowStyles[i] = sty
|
|
}
|
|
bc.lines[int(row)] = rowStyles
|
|
}
|
|
}
|
|
|
|
bc.dirtyAll = false
|
|
bc.dirty = nil
|
|
bc.count = lineCount
|
|
bc.built = true
|
|
}
|