package syntax import ( "bytes" "sort" "git.gophernest.net/azpect/TextEditor/internal/core" "git.gophernest.net/azpect/TextEditor/internal/theme" "github.com/charmbracelet/lipgloss" sitter "github.com/tree-sitter/go-tree-sitter" ) // TreeSitterEngine provides syntax highlighting using Tree-sitter queries. // // The engine stores per-buffer parser state and a cached style map so redraws // can reuse prior work. It supports both full rebuilds and incremental edits: // - full rebuilds when a buffer is first seen, language changes, or state is invalid // - incremental updates when ApplyEdit provides enough information to reparse // only changed regions // // Cached styles are represented as one style per rune for each line. type TreeSitterEngine struct { registry *languageRegistry cache map[*core.Buffer]*bufferCache } // bufferCache stores all derived highlighting state for a single buffer. // // It contains both style output (`lines`) and parse/query state (`parser`, // `tree`, `source`, language/query bindings) so the engine can incrementally // update only dirty lines instead of recomputing the whole file each frame. type bufferCache struct { built bool lines map[int][]lipgloss.Style count int parser *sitter.Parser tree *sitter.Tree source []byte dirtyAll bool dirty []lineRange langID string language *sitter.Language query *sitter.Query } // lineRange is an inclusive line interval [start, end]. // // Dirty tracking and partial restyling use this type to represent which rows // need work. type lineRange struct { start int end int } // captureRange describes one Tree-sitter capture span. // // Coordinates are in row/byte-column space, matching Tree-sitter node // positions. The range is later converted to rune indexes for style writes. type captureRange struct { startRow uint startCol uint endRow uint endCol uint name string } // NewTreeSitterEngine constructs a TreeSitterEngine with the provided style set. // // Language support is resolved through the language registry, so the engine can // work with any language/query pair registered there. func NewTreeSitterEngine(t theme.EditorTheme) *TreeSitterEngine { return &TreeSitterEngine{ registry: newLanguageRegistry(), cache: map[*core.Buffer]*bufferCache{}, } } // PrepareBuffer ensures highlighting data for buf is ready to read. // // This method is idempotent: if cached styles are already valid (`built`), it // returns immediately. Otherwise it resolves language support and performs a // rebuild pass (full or dirty-range-based) to refresh `bc.lines`. // // If the buffer language is unsupported or resolution fails, it still marks the // cache as built with an empty style map so callers can safely continue. func (e *TreeSitterEngine) PrepareBuffer(buf *core.Buffer, t theme.EditorTheme) { // Cannot prepare a nil buffer if buf == nil { return } // Get the buffers cache and return if we are already "built" (ready to render). bc := e.getCache(buf) if bc.count != buf.LineCount() { bc.dirtyAll = true } if bc.dirtyAll { bc.built = false } if bc.built { return } // If we do no support the buffer, load empty styles into the cache lang, ok, err := e.resolveBufferLanguage(buf, bc) if err != nil || !ok { bc.lines = map[int][]lipgloss.Style{} bc.built = true return } _ = lang e.buildFullBuffer(buf, bc, t) } // LineStyleMap returns the style row for a specific line in buf. // // It first guarantees buffer preparation, then returns cached styles when // available. Missing lines are lazily initialized to the base line style and // stored in cache to keep downstream rendering logic simple. func (e *TreeSitterEngine) LineStyleMap(buf *core.Buffer, line int, t theme.EditorTheme) []lipgloss.Style { if buf == nil { return nil } e.PrepareBuffer(buf, t) bc := e.getCache(buf) if s, ok := bc.lines[line]; ok { return s } runes := []rune(buf.Line(line)) out := make([]lipgloss.Style, len(runes)) for i := range out { out[i] = t.Line } bc.lines[line] = out return out } // ApplyEdit applies an incremental buffer edit to parser and style cache state. // // Workflow: // - validate buffer and language support // - apply the edit to the current parse tree (InputEdit) // - reparse using the previous tree as incremental context // - collect changed line ranges from both the user edit and parser changes // - mark cache as unbuilt so the next PrepareBuffer restyles only dirty areas // // If incremental parsing cannot proceed (missing parser/tree/source or parse // failure), it falls back to a full-dirty rebuild on the next preparation. func (e *TreeSitterEngine) ApplyEdit(buf *core.Buffer, edit *core.BufferEdit) { if buf == nil || edit == nil { return } bc := e.getCache(buf) lang, ok, err := e.resolveBufferLanguage(buf, bc) if err != nil || !ok { bc.built = false bc.dirtyAll = true return } _ = lang if bc.parser == nil { bc.parser = sitter.NewParser() bc.parser.SetLanguage(bc.language) } if bc.tree == nil || len(bc.source) == 0 { bc.dirtyAll = true return } bc.tree.Edit(&sitter.InputEdit{ StartByte: edit.StartByte, OldEndByte: edit.OldEndByte, NewEndByte: edit.NewEndByte, StartPosition: sitter.NewPoint(edit.StartPoint.Row, edit.StartPoint.Column), OldEndPosition: sitter.NewPoint(edit.OldEndPoint.Row, edit.OldEndPoint.Column), NewEndPosition: sitter.NewPoint(edit.NewEndPoint.Row, edit.NewEndPoint.Column), }) newSource := buildBufferSource(buf) newTree := bc.parser.Parse(newSource, bc.tree) if newTree == nil { bc.dirtyAll = true return } changed := bc.tree.ChangedRanges(newTree) newLineCount := buf.LineCount() if newLineCount != bc.count { bc.dirtyAll = true bc.dirty = nil } else { startRow := int(edit.StartPoint.Row) endRow := int(max(edit.OldEndPoint.Row, edit.NewEndPoint.Row)) addDirtyRange(bc, startRow, endRow) for _, r := range changed { addDirtyRange(bc, int(r.StartPoint.Row), int(r.EndPoint.Row)) } } bc.source = newSource bc.tree.Close() bc.tree = newTree bc.built = false } // InvalidateBuffer marks all cached highlighting data for buf as stale. // // The next PrepareBuffer call will rebuild styles from scratch for the buffer. func (e *TreeSitterEngine) InvalidateBuffer(buf *core.Buffer) { if buf == nil { return } bc := e.getCache(buf) bc.built = false bc.dirtyAll = true bc.dirty = nil } // InvalidateLines marks a line interval in buf as dirty. // // The range is inclusive and normalized by addDirtyRange. On the next // preparation pass, those lines (plus capture-context neighbors) are // recalculated while unchanged lines are preserved. func (e *TreeSitterEngine) InvalidateLines(buf *core.Buffer, startLine, endLine int) { if buf == nil { return } bc := e.getCache(buf) addDirtyRange(bc, startLine, endLine) bc.built = false } // resolveBufferLanguage resolves and applies language/query config for buf. // // It asks the registry to resolve filetype/filename to a concrete language id, // language object, and highlight query. When the resolved language id changes, // parser/query bindings are updated and the cache is marked dirty for rebuild. // // Returns (resolved, true, nil) on success. When unsupported it returns // (nil, false, nil). Resolution errors are returned as the third value. func (e *TreeSitterEngine) resolveBufferLanguage(buf *core.Buffer, bc *bufferCache) (*resolvedLanguage, bool, error) { if e.registry == nil { e.registry = newLanguageRegistry() } resolved, ok, err := e.registry.resolve(buf.Filetype, buf.Filename) if err != nil || !ok { return nil, ok, err } if bc.langID != resolved.id { bc.langID = resolved.id bc.language = resolved.language bc.query = resolved.query if bc.parser != nil { bc.parser.SetLanguage(bc.language) } bc.dirtyAll = true bc.built = false } return resolved, true, nil } // getCache returns the cache object associated with buf, creating it if needed. // // New caches start with an initialized lines map and default zero-values for // parse/highlight state. func (e *TreeSitterEngine) getCache(buf *core.Buffer) *bufferCache { if bc, ok := e.cache[buf]; ok { return bc } bc := &bufferCache{lines: map[int][]lipgloss.Style{}} e.cache[buf] = bc return bc } // buildFullBuffer rebuilds highlight styles for buf using current cache state. // // Despite the name, this method handles both full and partial updates: // - full rebuild: reset every line to base style, query entire file // - partial rebuild: reset only dirty lines, query around dirty ranges // // It (re)parses source when needed, collects query captures, sorts captures by // precedence order, then writes styles onto per-rune line slices. After a // successful pass it clears dirty flags and marks the cache as built. func (e *TreeSitterEngine) buildFullBuffer(buf *core.Buffer, bc *bufferCache, t theme.EditorTheme) { lineCount := buf.LineCount() // Load the lines into memory. There is no method for this due to the buffers // internal implementation using a gap buffer. So the "Lines" property is of // type []*GapBuffer. lines := make([]string, lineCount) for i := range lineCount { lines[i] = buf.Line(i) } fullRebuild := bc.dirtyAll || len(bc.lines) == 0 || len(bc.dirty) == 0 if fullRebuild { bc.lines = map[int][]lipgloss.Style{} for i := range lineCount { bc.lines[i] = defaultLineStyles(lines[i], t.Line) } } else { dirty := normalizedDirtyRanges(bc.dirty, lineCount) for _, r := range dirty { for i := r.start; i <= r.end; i++ { bc.lines[i] = defaultLineStyles(lines[i], t.Line) } } } source := buildBufferSource(buf) useCurrentTree := bc.tree != nil && bytes.Equal(bc.source, source) if bc.parser == nil { bc.parser = sitter.NewParser() bc.parser.SetLanguage(bc.language) } if !useCurrentTree { var baseTree *sitter.Tree if bc.tree != nil { baseTree = bc.tree } tree := bc.parser.Parse(source, baseTree) if tree == nil { bc.built = true return } if bc.tree != nil { bc.tree.Close() } bc.tree = tree bc.source = source } root := bc.tree.RootNode() cursor := sitter.NewQueryCursor() defer cursor.Close() var captures []captureRange if fullRebuild { iter := cursor.Captures(bc.query, root, source) captures = append(captures, collectCaptures(iter, bc.query)...) } else { dirty := normalizedDirtyRanges(bc.dirty, lineCount) for _, r := range dirty { queryStart := max(0, r.start-1) queryEnd := min(lineCount-1, r.end+1) rangeCursor := sitter.NewQueryCursor() rangeCursor.SetPointRange( sitter.NewPoint(uint(queryStart), 0), sitter.NewPoint(uint(queryEnd+1), 0), ) iter := rangeCursor.Captures(bc.query, root, source) captures = append(captures, collectCaptures(iter, bc.query)...) rangeCursor.Close() } } // Sort the captures in order of their character occurrence in the file sort.Slice(captures, func(i, j int) bool { if captures[i].startRow == captures[j].startRow { if captures[i].startCol == captures[j].startCol { if captures[i].endRow == captures[j].endRow { return captures[i].endCol > captures[j].endCol } return captures[i].endRow > captures[j].endRow } return captures[i].startCol < captures[j].startCol } return captures[i].startRow < captures[j].startRow }) // Basically, this code works by rewriting the same range and the last capture wins. // This is a great spot for optimization: No need to draw many times, just pick the best one. // Or maybe when we sort, if we find ones that are the same, remove the first one, and then // we just keep the last one. Then this code can stay the same but will not suffer so many // rewrites. targetDirty := normalizedDirtyRanges(bc.dirty, lineCount) for _, c := range captures { sty := t.CaptureStyle(c.name) for row := c.startRow; row <= c.endRow; row++ { if int(row) >= len(lines) { break } if !fullRebuild && !rowInRanges(int(row), targetDirty) { continue } lineBytes := []byte(lines[row]) startByteCol := uint(0) if row == c.startRow { startByteCol = c.startCol } endByteCol := uint(len(lineBytes)) if row == c.endRow { endByteCol = min(c.endCol, uint(len(lineBytes))) } startRune := byteColToRuneIndex(lineBytes, int(startByteCol)) endRune := byteColToRuneIndex(lineBytes, int(endByteCol)) rowStyles := bc.lines[int(row)] if startRune < 0 { startRune = 0 } if endRune > len(rowStyles) { endRune = len(rowStyles) } if startRune >= endRune { continue } for i := startRune; i < endRune; i++ { rowStyles[i] = sty } bc.lines[int(row)] = rowStyles } } bc.dirtyAll = false bc.dirty = nil bc.count = lineCount bc.built = true }