Gim/internal/syntax/treesitter.go
Hayden Hargreaves 1c2585b8d9 feat: Implemented syntax styles
Treesitter integration implemented! But tests are failing, need to
resolve that.
2026-04-07 22:34:42 -07:00

445 lines
13 KiB
Go

package syntax
import (
"bytes"
"sort"
"git.gophernest.net/azpect/TextEditor/internal/core"
"git.gophernest.net/azpect/TextEditor/internal/theme"
"github.com/charmbracelet/lipgloss"
sitter "github.com/tree-sitter/go-tree-sitter"
)
// TreeSitterEngine provides syntax highlighting using Tree-sitter queries.
//
// The engine stores per-buffer parser state and a cached style map so redraws
// can reuse prior work. It supports both full rebuilds and incremental edits:
// - full rebuilds when a buffer is first seen, language changes, or state is invalid
// - incremental updates when ApplyEdit provides enough information to reparse
// only changed regions
//
// Cached styles are represented as one style per rune for each line.
type TreeSitterEngine struct {
editorTheme theme.EditorTheme
registry *languageRegistry
cache map[*core.Buffer]*bufferCache
}
// bufferCache stores all derived highlighting state for a single buffer.
//
// It contains both style output (`lines`) and parse/query state (`parser`,
// `tree`, `source`, language/query bindings) so the engine can incrementally
// update only dirty lines instead of recomputing the whole file each frame.
type bufferCache struct {
built bool
lines map[int][]lipgloss.Style
count int
parser *sitter.Parser
tree *sitter.Tree
source []byte
dirtyAll bool
dirty []lineRange
langID string
language *sitter.Language
query *sitter.Query
}
// lineRange is an inclusive line interval [start, end].
//
// Dirty tracking and partial restyling use this type to represent which rows
// need work.
type lineRange struct {
start int
end int
}
// captureRange describes one Tree-sitter capture span.
//
// Coordinates are in row/byte-column space, matching Tree-sitter node
// positions. The range is later converted to rune indexes for style writes.
type captureRange struct {
startRow uint
startCol uint
endRow uint
endCol uint
name string
}
// NewTreeSitterEngine constructs a TreeSitterEngine with the provided style set.
//
// Language support is resolved through the language registry, so the engine can
// work with any language/query pair registered there.
func NewTreeSitterEngine(t theme.EditorTheme) *TreeSitterEngine {
return &TreeSitterEngine{
editorTheme: t,
registry: newLanguageRegistry(),
cache: map[*core.Buffer]*bufferCache{},
}
}
// PrepareBuffer ensures highlighting data for buf is ready to read.
//
// This method is idempotent: if cached styles are already valid (`built`), it
// returns immediately. Otherwise it resolves language support and performs a
// rebuild pass (full or dirty-range-based) to refresh `bc.lines`.
//
// If the buffer language is unsupported or resolution fails, it still marks the
// cache as built with an empty style map so callers can safely continue.
func (e *TreeSitterEngine) PrepareBuffer(buf *core.Buffer) {
// Cannot prepare a nil buffer
if buf == nil {
return
}
// Get the buffers cache and return if we are already "built" (ready to render).
bc := e.getCache(buf)
if bc.count != buf.LineCount() {
bc.dirtyAll = true
}
if bc.dirtyAll {
bc.built = false
}
if bc.built {
return
}
// If we do no support the buffer, load empty styles into the cache
lang, ok, err := e.resolveBufferLanguage(buf, bc)
if err != nil || !ok {
bc.lines = map[int][]lipgloss.Style{}
bc.built = true
return
}
_ = lang
e.buildFullBuffer(buf, bc)
}
// LineStyleMap returns the style row for a specific line in buf.
//
// It first guarantees buffer preparation, then returns cached styles when
// available. Missing lines are lazily initialized to the base line style and
// stored in cache to keep downstream rendering logic simple.
func (e *TreeSitterEngine) LineStyleMap(buf *core.Buffer, line int) []lipgloss.Style {
if buf == nil {
return nil
}
e.PrepareBuffer(buf)
bc := e.getCache(buf)
if s, ok := bc.lines[line]; ok {
return s
}
runes := []rune(buf.Line(line))
out := make([]lipgloss.Style, len(runes))
for i := range out {
out[i] = e.editorTheme.Line
}
bc.lines[line] = out
return out
}
// ApplyEdit applies an incremental buffer edit to parser and style cache state.
//
// Workflow:
// - validate buffer and language support
// - apply the edit to the current parse tree (InputEdit)
// - reparse using the previous tree as incremental context
// - collect changed line ranges from both the user edit and parser changes
// - mark cache as unbuilt so the next PrepareBuffer restyles only dirty areas
//
// If incremental parsing cannot proceed (missing parser/tree/source or parse
// failure), it falls back to a full-dirty rebuild on the next preparation.
func (e *TreeSitterEngine) ApplyEdit(buf *core.Buffer, edit *core.BufferEdit) {
if buf == nil || edit == nil {
return
}
bc := e.getCache(buf)
lang, ok, err := e.resolveBufferLanguage(buf, bc)
if err != nil || !ok {
bc.built = false
bc.dirtyAll = true
return
}
_ = lang
if bc.parser == nil {
bc.parser = sitter.NewParser()
bc.parser.SetLanguage(bc.language)
}
if bc.tree == nil || len(bc.source) == 0 {
bc.dirtyAll = true
return
}
bc.tree.Edit(&sitter.InputEdit{
StartByte: edit.StartByte,
OldEndByte: edit.OldEndByte,
NewEndByte: edit.NewEndByte,
StartPosition: sitter.NewPoint(edit.StartPoint.Row, edit.StartPoint.Column),
OldEndPosition: sitter.NewPoint(edit.OldEndPoint.Row, edit.OldEndPoint.Column),
NewEndPosition: sitter.NewPoint(edit.NewEndPoint.Row, edit.NewEndPoint.Column),
})
newSource := buildBufferSource(buf)
newTree := bc.parser.Parse(newSource, bc.tree)
if newTree == nil {
bc.dirtyAll = true
return
}
changed := bc.tree.ChangedRanges(newTree)
newLineCount := buf.LineCount()
if newLineCount != bc.count {
bc.dirtyAll = true
bc.dirty = nil
} else {
startRow := int(edit.StartPoint.Row)
endRow := int(max(edit.OldEndPoint.Row, edit.NewEndPoint.Row))
addDirtyRange(bc, startRow, endRow)
for _, r := range changed {
addDirtyRange(bc, int(r.StartPoint.Row), int(r.EndPoint.Row))
}
}
bc.source = newSource
bc.tree.Close()
bc.tree = newTree
bc.built = false
}
// InvalidateBuffer marks all cached highlighting data for buf as stale.
//
// The next PrepareBuffer call will rebuild styles from scratch for the buffer.
func (e *TreeSitterEngine) InvalidateBuffer(buf *core.Buffer) {
if buf == nil {
return
}
bc := e.getCache(buf)
bc.built = false
bc.dirtyAll = true
bc.dirty = nil
}
// InvalidateLines marks a line interval in buf as dirty.
//
// The range is inclusive and normalized by addDirtyRange. On the next
// preparation pass, those lines (plus capture-context neighbors) are
// recalculated while unchanged lines are preserved.
func (e *TreeSitterEngine) InvalidateLines(buf *core.Buffer, startLine, endLine int) {
if buf == nil {
return
}
bc := e.getCache(buf)
addDirtyRange(bc, startLine, endLine)
bc.built = false
}
// resolveBufferLanguage resolves and applies language/query config for buf.
//
// It asks the registry to resolve filetype/filename to a concrete language id,
// language object, and highlight query. When the resolved language id changes,
// parser/query bindings are updated and the cache is marked dirty for rebuild.
//
// Returns (resolved, true, nil) on success. When unsupported it returns
// (nil, false, nil). Resolution errors are returned as the third value.
func (e *TreeSitterEngine) resolveBufferLanguage(buf *core.Buffer, bc *bufferCache) (*resolvedLanguage, bool, error) {
if e.registry == nil {
e.registry = newLanguageRegistry()
}
resolved, ok, err := e.registry.resolve(buf.Filetype, buf.Filename)
if err != nil || !ok {
return nil, ok, err
}
if bc.langID != resolved.id {
bc.langID = resolved.id
bc.language = resolved.language
bc.query = resolved.query
if bc.parser != nil {
bc.parser.SetLanguage(bc.language)
}
bc.dirtyAll = true
bc.built = false
}
return resolved, true, nil
}
// getCache returns the cache object associated with buf, creating it if needed.
//
// New caches start with an initialized lines map and default zero-values for
// parse/highlight state.
func (e *TreeSitterEngine) getCache(buf *core.Buffer) *bufferCache {
if bc, ok := e.cache[buf]; ok {
return bc
}
bc := &bufferCache{lines: map[int][]lipgloss.Style{}}
e.cache[buf] = bc
return bc
}
// buildFullBuffer rebuilds highlight styles for buf using current cache state.
//
// Despite the name, this method handles both full and partial updates:
// - full rebuild: reset every line to base style, query entire file
// - partial rebuild: reset only dirty lines, query around dirty ranges
//
// It (re)parses source when needed, collects query captures, sorts captures by
// precedence order, then writes styles onto per-rune line slices. After a
// successful pass it clears dirty flags and marks the cache as built.
func (e *TreeSitterEngine) buildFullBuffer(buf *core.Buffer, bc *bufferCache) {
lineCount := buf.LineCount()
// Load the lines into memory. There is no method for this due to the buffers
// internal implementation using a gap buffer. So the "Lines" property is of
// type []*GapBuffer.
lines := make([]string, lineCount)
for i := range lineCount {
lines[i] = buf.Line(i)
}
fullRebuild := bc.dirtyAll || len(bc.lines) == 0 || len(bc.dirty) == 0
if fullRebuild {
bc.lines = map[int][]lipgloss.Style{}
for i := range lineCount {
bc.lines[i] = defaultLineStyles(lines[i], e.editorTheme.Line)
}
} else {
dirty := normalizedDirtyRanges(bc.dirty, lineCount)
for _, r := range dirty {
for i := r.start; i <= r.end; i++ {
bc.lines[i] = defaultLineStyles(lines[i], e.editorTheme.Line)
}
}
}
source := buildBufferSource(buf)
useCurrentTree := bc.tree != nil && bytes.Equal(bc.source, source)
if bc.parser == nil {
bc.parser = sitter.NewParser()
bc.parser.SetLanguage(bc.language)
}
if !useCurrentTree {
var baseTree *sitter.Tree
if bc.tree != nil {
baseTree = bc.tree
}
tree := bc.parser.Parse(source, baseTree)
if tree == nil {
bc.built = true
return
}
if bc.tree != nil {
bc.tree.Close()
}
bc.tree = tree
bc.source = source
}
root := bc.tree.RootNode()
cursor := sitter.NewQueryCursor()
defer cursor.Close()
var captures []captureRange
if fullRebuild {
iter := cursor.Captures(bc.query, root, source)
captures = append(captures, collectCaptures(iter, bc.query)...)
} else {
dirty := normalizedDirtyRanges(bc.dirty, lineCount)
for _, r := range dirty {
queryStart := max(0, r.start-1)
queryEnd := min(lineCount-1, r.end+1)
rangeCursor := sitter.NewQueryCursor()
rangeCursor.SetPointRange(
sitter.NewPoint(uint(queryStart), 0),
sitter.NewPoint(uint(queryEnd+1), 0),
)
iter := rangeCursor.Captures(bc.query, root, source)
captures = append(captures, collectCaptures(iter, bc.query)...)
rangeCursor.Close()
}
}
// Sort the captures in order of their character occurrence in the file
sort.Slice(captures, func(i, j int) bool {
if captures[i].startRow == captures[j].startRow {
if captures[i].startCol == captures[j].startCol {
if captures[i].endRow == captures[j].endRow {
return captures[i].endCol > captures[j].endCol
}
return captures[i].endRow > captures[j].endRow
}
return captures[i].startCol < captures[j].startCol
}
return captures[i].startRow < captures[j].startRow
})
// Basically, this code works by rewriting the same range and the last capture wins.
// This is a great spot for optimization: No need to draw many times, just pick the best one.
// Or maybe when we sort, if we find ones that are the same, remove the first one, and then
// we just keep the last one. Then this code can stay the same but will not suffer so many
// rewrites.
targetDirty := normalizedDirtyRanges(bc.dirty, lineCount)
for _, c := range captures {
sty := e.editorTheme.CaptureStyle(c.name)
for row := c.startRow; row <= c.endRow; row++ {
if int(row) >= len(lines) {
break
}
if !fullRebuild && !rowInRanges(int(row), targetDirty) {
continue
}
lineBytes := []byte(lines[row])
startByteCol := uint(0)
if row == c.startRow {
startByteCol = c.startCol
}
endByteCol := uint(len(lineBytes))
if row == c.endRow {
endByteCol = min(c.endCol, uint(len(lineBytes)))
}
startRune := byteColToRuneIndex(lineBytes, int(startByteCol))
endRune := byteColToRuneIndex(lineBytes, int(endByteCol))
rowStyles := bc.lines[int(row)]
if startRune < 0 {
startRune = 0
}
if endRune > len(rowStyles) {
endRune = len(rowStyles)
}
if startRune >= endRune {
continue
}
for i := startRune; i < endRune; i++ {
rowStyles[i] = sty
}
bc.lines[int(row)] = rowStyles
}
}
bc.dirtyAll = false
bc.dirty = nil
bc.count = lineCount
bc.built = true
}