Gim/internal/syntax/treesitter_utils.go

package syntax

import (
	"sort"
	"strings"

	"git.gophernest.net/azpect/TextEditor/internal/core"
	"github.com/charmbracelet/lipgloss"
	sitter "github.com/tree-sitter/go-tree-sitter"
)

// addDirtyRange records a potentially changed line span in the buffer cache.
//
// The parser/highlighter keeps a list of "dirty" line ranges that must be
// reparsed or restyled after edits. This helper makes sure the incoming range
// is safe and normalized before storing it:
//   - nil cache is ignored (defensive early-return)
//   - start/end are swapped if the caller passed them in reverse order
//   - negative values are clamped to 0
//
// After appending the new range, it merges overlaps/adjacent ranges so the
// dirty list stays compact and avoids duplicate work during incremental updates.
func addDirtyRange(bc *bufferCache, start, end int) {
	if bc == nil {
		return
	}
	if end < start {
		start, end = end, start
	}
	start = max(0, start)
	end = max(0, end)
	bc.dirty = append(bc.dirty, lineRange{start: start, end: end})
	bc.dirty = mergeRanges(bc.dirty)
}

// normalizedDirtyRanges clamps, filters, and merges dirty ranges for a buffer.
//
// Tree-sitter and styling operations expect valid row bounds. This function
// takes arbitrary line ranges and converts them into a clean canonical form
// based on the current buffer size:
//   - returns nil if there are no lines or no input ranges
//   - clamps each range to [0, lineCount-1]
//   - drops invalid ranges where start > end after clamping
//   - merges overlapping or adjacent ranges
//
// The returned slice is safe to iterate directly for reparse/restyle passes.
func normalizedDirtyRanges(ranges []lineRange, lineCount int) []lineRange {
	if lineCount <= 0 || len(ranges) == 0 {
		return nil
	}

	clamped := make([]lineRange, 0, len(ranges))
	for _, r := range ranges {
		start := max(0, r.start)
		end := min(lineCount-1, r.end)
		if start > end {
			continue
		}
		clamped = append(clamped, lineRange{start: start, end: end})
	}

	return mergeRanges(clamped)
}

// mergeRanges sorts and coalesces line ranges into a minimal non-overlapping set.
//
// Two ranges are merged when they overlap or touch (for example [1,3] and [4,6]
// become [1,6]). Treating adjacent ranges as one avoids unnecessary splits in
// later highlighting logic.
//
// Note: this function sorts the provided slice in place before building and
// returning a merged result.
func mergeRanges(ranges []lineRange) []lineRange {
	if len(ranges) == 0 {
		return nil
	}

	sort.Slice(ranges, func(i, j int) bool {
		if ranges[i].start == ranges[j].start {
			return ranges[i].end < ranges[j].end
		}
		return ranges[i].start < ranges[j].start
	})

	merged := make([]lineRange, 0, len(ranges))
	cur := ranges[0]
	for i := 1; i < len(ranges); i++ {
		n := ranges[i]
		if n.start <= cur.end+1 {
			if n.end > cur.end {
				cur.end = n.end
			}
			continue
		}
		merged = append(merged, cur)
		cur = n
	}
	merged = append(merged, cur)
	return merged
}

// rowInRanges reports whether a row index is covered by any range.
//
// This is a simple membership check used by update paths that need to decide
// whether a specific line should be recomputed.
func rowInRanges(row int, ranges []lineRange) bool {
	for _, r := range ranges {
		if row >= r.start && row <= r.end {
			return true
		}
	}
	return false
}

// defaultLineStyles creates a style-per-rune slice initialized with base.
//
// The highlighter applies styles at rune granularity (not byte granularity) so
// multibyte UTF-8 characters still map to exactly one style entry per visible
// character. This function produces the baseline style row before syntax
// captures overwrite specific spans.
func defaultLineStyles(line string, base lipgloss.Style) []lipgloss.Style {
	runes := []rune(line)
	row := make([]lipgloss.Style, len(runes))
	for i := range row {
		row[i] = base
	}
	return row
}

// collectCaptures consumes a Tree-sitter capture iterator into local ranges.
//
// For each capture returned by the query iterator, it resolves the capture name
// and records start/end row+column coordinates as a captureRange. These ranges
// are then used by the renderer to map syntax names to concrete styles.
//
// Special handling:
//   - nil query yields nil output
//   - capture indexes outside query.CaptureNames() are ignored defensively
//   - captures named "spell" are skipped, because spell-check is handled by a
//     separate pass and should not be treated as a syntax-highlight capture here
func collectCaptures(iter sitter.QueryCaptures, query *sitter.Query) []captureRange {
	if query == nil {
		return nil
	}

	names := query.CaptureNames()
	out := []captureRange{}
	for match, captureIdx := iter.Next(); match != nil; match, captureIdx = iter.Next() {
		capture := match.Captures[captureIdx]
		if int(capture.Index) >= len(names) {
			continue
		}
		name := names[capture.Index]
		if name == "spell" {
			continue
		}

		node := capture.Node
		start := node.StartPosition()
		end := node.EndPosition()
		out = append(out, captureRange{
			startRow: start.Row,
			startCol: start.Column,
			endRow:   end.Row,
			endCol:   end.Column,
			name:     name,
		})
	}

	return out
}

// buildBufferSource flattens the editor buffer into a single newline-delimited
// byte slice suitable for Tree-sitter parsing.
//
// The buffer stores text as lines, while Tree-sitter expects one contiguous
// source blob. This helper joins all lines with '\n' separators, preserving row
// structure expected by parser positions.
func buildBufferSource(buf *core.Buffer) []byte {
	lineCount := buf.LineCount()
	if lineCount == 0 {
		return []byte{}
	}

	lines := make([]string, lineCount)
	for i := range lineCount {
		lines[i] = buf.Line(i)
	}

	return []byte(strings.Join(lines, "\n"))
}

// byteColToRuneIndex converts a byte-based column offset to a rune index.
//
// Tree-sitter positions use byte columns, while the renderer/highlighter often
// indexes text by runes so multibyte UTF-8 characters are handled correctly.
// This conversion keeps style slicing aligned with displayed characters.
//
// Boundary behavior:
//   - byteCol <= 0 -> 0
//   - byteCol >= len(line) -> rune length of the entire line
func byteColToRuneIndex(line []byte, byteCol int) int {
	if byteCol <= 0 {
		return 0
	}
	if byteCol >= len(line) {
		return len([]rune(string(line)))
	}

	prefix := line[:byteCol]
	return len([]rune(string(prefix)))
}