Gim/internal/syntax/treesitter.go

package syntax

import (
	"bytes"
	"sort"

	"git.gophernest.net/azpect/TextEditor/internal/core"
	"git.gophernest.net/azpect/TextEditor/internal/theme"
	"github.com/charmbracelet/lipgloss"
	sitter "github.com/tree-sitter/go-tree-sitter"
)

// TreeSitterEngine provides syntax highlighting using Tree-sitter queries.
//
// The engine stores per-buffer parser state and a cached style map so redraws
// can reuse prior work. It supports both full rebuilds and incremental edits:
//   - full rebuilds when a buffer is first seen, language changes, or state is invalid
//   - incremental updates when ApplyEdit provides enough information to reparse
//     only changed regions
//
// Cached styles are represented as one style per rune for each line.
type TreeSitterEngine struct {
	registry *languageRegistry

	cache map[*core.Buffer]*bufferCache
}

// bufferCache stores all derived highlighting state for a single buffer.
//
// It contains both style output (`lines`) and parse/query state (`parser`,
// `tree`, `source`, language/query bindings) so the engine can incrementally
// update only dirty lines instead of recomputing the whole file each frame.
type bufferCache struct {
	built bool
	lines map[int][]lipgloss.Style
	count int

	parser   *sitter.Parser
	tree     *sitter.Tree
	source   []byte
	dirtyAll bool
	dirty    []lineRange

	langID   string
	language *sitter.Language
	query    *sitter.Query
}

// lineRange is an inclusive line interval [start, end].
//
// Dirty tracking and partial restyling use this type to represent which rows
// need work.
type lineRange struct {
	start int
	end   int
}

// captureRange describes one Tree-sitter capture span.
//
// Coordinates are in row/byte-column space, matching Tree-sitter node
// positions. The range is later converted to rune indexes for style writes.
type captureRange struct {
	startRow uint
	startCol uint
	endRow   uint
	endCol   uint
	name     string
}

// NewTreeSitterEngine constructs a TreeSitterEngine with the provided style set.
//
// Language support is resolved through the language registry, so the engine can
// work with any language/query pair registered there.
func NewTreeSitterEngine(t theme.EditorTheme) *TreeSitterEngine {
	return &TreeSitterEngine{
		registry: newLanguageRegistry(),
		cache:    map[*core.Buffer]*bufferCache{},
	}
}

// PrepareBuffer ensures highlighting data for buf is ready to read.
//
// This method is idempotent: if cached styles are already valid (`built`), it
// returns immediately. Otherwise it resolves language support and performs a
// rebuild pass (full or dirty-range-based) to refresh `bc.lines`.
//
// If the buffer language is unsupported or resolution fails, it still marks the
// cache as built with an empty style map so callers can safely continue.
func (e *TreeSitterEngine) PrepareBuffer(buf *core.Buffer, t theme.EditorTheme) {
	// Cannot prepare a nil buffer
	if buf == nil {
		return
	}

	// Get the buffers cache and return if we are already "built" (ready to render).
	bc := e.getCache(buf)
	if bc.count != buf.LineCount() {
		bc.dirtyAll = true
	}
	if bc.dirtyAll {
		bc.built = false
	}
	if bc.built {
		return
	}

	// If we do no support the buffer, load empty styles into the cache
	lang, ok, err := e.resolveBufferLanguage(buf, bc)
	if err != nil || !ok {
		bc.lines = map[int][]lipgloss.Style{}
		bc.built = true
		return
	}
	_ = lang

	e.buildFullBuffer(buf, bc, t)
}

// LineStyleMap returns the style row for a specific line in buf.
//
// It first guarantees buffer preparation, then returns cached styles when
// available. Missing lines are lazily initialized to the base line style and
// stored in cache to keep downstream rendering logic simple.
func (e *TreeSitterEngine) LineStyleMap(buf *core.Buffer, line int, t theme.EditorTheme) []lipgloss.Style {
	if buf == nil {
		return nil
	}

	e.PrepareBuffer(buf, t)
	bc := e.getCache(buf)

	if s, ok := bc.lines[line]; ok {
		return s
	}

	runes := []rune(buf.Line(line))
	out := make([]lipgloss.Style, len(runes))
	for i := range out {
		out[i] = t.Line
	}
	bc.lines[line] = out
	return out
}

// ApplyEdit applies an incremental buffer edit to parser and style cache state.
//
// Workflow:
//   - validate buffer and language support
//   - apply the edit to the current parse tree (InputEdit)
//   - reparse using the previous tree as incremental context
//   - collect changed line ranges from both the user edit and parser changes
//   - mark cache as unbuilt so the next PrepareBuffer restyles only dirty areas
//
// If incremental parsing cannot proceed (missing parser/tree/source or parse
// failure), it falls back to a full-dirty rebuild on the next preparation.
func (e *TreeSitterEngine) ApplyEdit(buf *core.Buffer, edit *core.BufferEdit) {
	if buf == nil || edit == nil {
		return
	}

	bc := e.getCache(buf)
	lang, ok, err := e.resolveBufferLanguage(buf, bc)
	if err != nil || !ok {
		bc.built = false
		bc.dirtyAll = true
		return
	}
	_ = lang

	if bc.parser == nil {
		bc.parser = sitter.NewParser()
		bc.parser.SetLanguage(bc.language)
	}

	if bc.tree == nil || len(bc.source) == 0 {
		bc.dirtyAll = true
		return
	}

	bc.tree.Edit(&sitter.InputEdit{
		StartByte:      edit.StartByte,
		OldEndByte:     edit.OldEndByte,
		NewEndByte:     edit.NewEndByte,
		StartPosition:  sitter.NewPoint(edit.StartPoint.Row, edit.StartPoint.Column),
		OldEndPosition: sitter.NewPoint(edit.OldEndPoint.Row, edit.OldEndPoint.Column),
		NewEndPosition: sitter.NewPoint(edit.NewEndPoint.Row, edit.NewEndPoint.Column),
	})

	newSource := buildBufferSource(buf)
	newTree := bc.parser.Parse(newSource, bc.tree)
	if newTree == nil {
		bc.dirtyAll = true
		return
	}

	changed := bc.tree.ChangedRanges(newTree)

	newLineCount := buf.LineCount()
	if newLineCount != bc.count {
		bc.dirtyAll = true
		bc.dirty = nil
	} else {
		startRow := int(edit.StartPoint.Row)
		endRow := int(max(edit.OldEndPoint.Row, edit.NewEndPoint.Row))
		addDirtyRange(bc, startRow, endRow)
		for _, r := range changed {
			addDirtyRange(bc, int(r.StartPoint.Row), int(r.EndPoint.Row))
		}
	}

	bc.source = newSource
	bc.tree.Close()
	bc.tree = newTree
	bc.built = false
}

// InvalidateBuffer marks all cached highlighting data for buf as stale.
//
// The next PrepareBuffer call will rebuild styles from scratch for the buffer.
func (e *TreeSitterEngine) InvalidateBuffer(buf *core.Buffer) {
	if buf == nil {
		return
	}
	bc := e.getCache(buf)
	bc.built = false
	bc.dirtyAll = true
	bc.dirty = nil
}

// InvalidateLines marks a line interval in buf as dirty.
//
// The range is inclusive and normalized by addDirtyRange. On the next
// preparation pass, those lines (plus capture-context neighbors) are
// recalculated while unchanged lines are preserved.
func (e *TreeSitterEngine) InvalidateLines(buf *core.Buffer, startLine, endLine int) {
	if buf == nil {
		return
	}
	bc := e.getCache(buf)
	addDirtyRange(bc, startLine, endLine)
	bc.built = false
}

// resolveBufferLanguage resolves and applies language/query config for buf.
//
// It asks the registry to resolve filetype/filename to a concrete language id,
// language object, and highlight query. When the resolved language id changes,
// parser/query bindings are updated and the cache is marked dirty for rebuild.
//
// Returns (resolved, true, nil) on success. When unsupported it returns
// (nil, false, nil). Resolution errors are returned as the third value.
func (e *TreeSitterEngine) resolveBufferLanguage(buf *core.Buffer, bc *bufferCache) (*resolvedLanguage, bool, error) {
	if e.registry == nil {
		e.registry = newLanguageRegistry()
	}

	resolved, ok, err := e.registry.resolve(buf.Filetype, buf.Filename)
	if err != nil || !ok {
		return nil, ok, err
	}

	if bc.langID != resolved.id {
		bc.langID = resolved.id
		bc.language = resolved.language
		bc.query = resolved.query
		if bc.parser != nil {
			bc.parser.SetLanguage(bc.language)
		}
		bc.dirtyAll = true
		bc.built = false
	}

	return resolved, true, nil
}

// getCache returns the cache object associated with buf, creating it if needed.
//
// New caches start with an initialized lines map and default zero-values for
// parse/highlight state.
func (e *TreeSitterEngine) getCache(buf *core.Buffer) *bufferCache {
	if bc, ok := e.cache[buf]; ok {
		return bc
	}
	bc := &bufferCache{lines: map[int][]lipgloss.Style{}}
	e.cache[buf] = bc
	return bc
}

// buildFullBuffer rebuilds highlight styles for buf using current cache state.
//
// Despite the name, this method handles both full and partial updates:
//   - full rebuild: reset every line to base style, query entire file
//   - partial rebuild: reset only dirty lines, query around dirty ranges
//
// It (re)parses source when needed, collects query captures, sorts captures by
// precedence order, then writes styles onto per-rune line slices. After a
// successful pass it clears dirty flags and marks the cache as built.
func (e *TreeSitterEngine) buildFullBuffer(buf *core.Buffer, bc *bufferCache, t theme.EditorTheme) {
	lineCount := buf.LineCount()

	// Load the lines into memory. There is no method for this due to the buffers
	// internal implementation using a gap buffer. So the "Lines" property is of
	// type []*GapBuffer.
	lines := make([]string, lineCount)
	for i := range lineCount {
		lines[i] = buf.Line(i)
	}

	fullRebuild := bc.dirtyAll || len(bc.lines) == 0 || len(bc.dirty) == 0
	if fullRebuild {
		bc.lines = map[int][]lipgloss.Style{}
		for i := range lineCount {
			bc.lines[i] = defaultLineStyles(lines[i], t.Line)
		}
	} else {
		dirty := normalizedDirtyRanges(bc.dirty, lineCount)
		for _, r := range dirty {
			for i := r.start; i <= r.end; i++ {
				bc.lines[i] = defaultLineStyles(lines[i], t.Line)
			}
		}
	}

	source := buildBufferSource(buf)
	useCurrentTree := bc.tree != nil && bytes.Equal(bc.source, source)

	if bc.parser == nil {
		bc.parser = sitter.NewParser()
		bc.parser.SetLanguage(bc.language)
	}

	if !useCurrentTree {
		var baseTree *sitter.Tree
		if bc.tree != nil {
			baseTree = bc.tree
		}

		tree := bc.parser.Parse(source, baseTree)
		if tree == nil {
			bc.built = true
			return
		}

		if bc.tree != nil {
			bc.tree.Close()
		}
		bc.tree = tree
		bc.source = source
	}

	root := bc.tree.RootNode()
	cursor := sitter.NewQueryCursor()
	defer cursor.Close()

	var captures []captureRange

	if fullRebuild {
		iter := cursor.Captures(bc.query, root, source)
		captures = append(captures, collectCaptures(iter, bc.query)...)
	} else {
		dirty := normalizedDirtyRanges(bc.dirty, lineCount)
		for _, r := range dirty {
			queryStart := max(0, r.start-1)
			queryEnd := min(lineCount-1, r.end+1)

			rangeCursor := sitter.NewQueryCursor()
			rangeCursor.SetPointRange(
				sitter.NewPoint(uint(queryStart), 0),
				sitter.NewPoint(uint(queryEnd+1), 0),
			)
			iter := rangeCursor.Captures(bc.query, root, source)
			captures = append(captures, collectCaptures(iter, bc.query)...)
			rangeCursor.Close()
		}
	}

	// Sort the captures in order of their character occurrence in the file
	sort.Slice(captures, func(i, j int) bool {
		if captures[i].startRow == captures[j].startRow {
			if captures[i].startCol == captures[j].startCol {
				if captures[i].endRow == captures[j].endRow {
					return captures[i].endCol > captures[j].endCol
				}
				return captures[i].endRow > captures[j].endRow
			}
			return captures[i].startCol < captures[j].startCol
		}
		return captures[i].startRow < captures[j].startRow
	})

	// Basically, this code works by rewriting the same range and the last capture wins.
	// This is a great spot for optimization: No need to draw many times, just pick the best one.
	// Or maybe when we sort, if we find ones that are the same, remove the first one, and then
	// we just keep the last one. Then this code can stay the same but will not suffer so many
	// rewrites.
	targetDirty := normalizedDirtyRanges(bc.dirty, lineCount)
	for _, c := range captures {
		sty := t.CaptureStyle(c.name)
		for row := c.startRow; row <= c.endRow; row++ {
			if int(row) >= len(lines) {
				break
			}
			if !fullRebuild && !rowInRanges(int(row), targetDirty) {
				continue
			}

			lineBytes := []byte(lines[row])
			startByteCol := uint(0)
			if row == c.startRow {
				startByteCol = c.startCol
			}
			endByteCol := uint(len(lineBytes))
			if row == c.endRow {
				endByteCol = min(c.endCol, uint(len(lineBytes)))
			}

			startRune := byteColToRuneIndex(lineBytes, int(startByteCol))
			endRune := byteColToRuneIndex(lineBytes, int(endByteCol))

			rowStyles := bc.lines[int(row)]
			if startRune < 0 {
				startRune = 0
			}
			if endRune > len(rowStyles) {
				endRune = len(rowStyles)
			}
			if startRune >= endRune {
				continue
			}

			for i := startRune; i < endRune; i++ {
				rowStyles[i] = sty
			}
			bc.lines[int(row)] = rowStyles
		}
	}

	bc.dirtyAll = false
	bc.dirty = nil
	bc.count = lineCount
	bc.built = true
}