feat: implemented abstraction for the queries and regs

This commit is contained in:
Hayden Hargreaves 2026-04-07 11:01:07 -07:00
parent 7c15f41ab1
commit 624439a0cf
10 changed files with 505 additions and 349 deletions

2
go.mod
View File

@ -8,6 +8,7 @@ require (
github.com/charmbracelet/x/exp/teatest v0.0.0-20260209132835-6b065b8ba62c
github.com/tree-sitter/go-tree-sitter v0.25.0
github.com/tree-sitter/tree-sitter-go v0.25.0
github.com/tree-sitter/tree-sitter-javascript v0.25.0
)
require (
@ -31,7 +32,6 @@ require (
github.com/muesli/cancelreader v0.2.2 // indirect
github.com/muesli/termenv v0.16.0 // indirect
github.com/rivo/uniseg v0.4.7 // indirect
github.com/tree-sitter/tree-sitter-javascript v0.25.0 // indirect
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
golang.org/x/sys v0.38.0 // indirect
golang.org/x/text v0.28.0 // indirect

10
internal/syntax/README.md Normal file
View File

@ -0,0 +1,10 @@
# How to add more languages now (quick workflow)
1. Add binding dependency in go.mod
2. Add query file under internal/syntax/queries/<lang>/highlights.scm
3. Embed it in internal/syntax/query_assets.go
4. Add one register(...) block in internal/syntax/registry.go
5. Update internal/syntax/query_assets_test.go with another test
## Where to get .scm files
[nvim-treesitter](https://github.com/nvim-treesitter/nvim-treesitter/tree/master/queries)

View File

@ -1,40 +0,0 @@
package syntax
import (
"git.gophernest.net/azpect/TextEditor/internal/core"
"git.gophernest.net/azpect/TextEditor/internal/style"
"github.com/charmbracelet/lipgloss"
)
// PlainEngine is a no-op syntax engine.
// It exists to establish the architecture boundary before Tree-sitter is wired in.
type PlainEngine struct {
styles style.Styles
}
func NewPlainEngine(styles style.Styles) *PlainEngine {
return &PlainEngine{styles: styles}
}
func (e *PlainEngine) PrepareBuffer(buf *core.Buffer) {}
func (e *PlainEngine) ApplyEdit(buf *core.Buffer, edit *core.BufferEdit) {}
func (e *PlainEngine) LineStyleMap(buf *core.Buffer, line int) []lipgloss.Style {
if buf == nil {
return nil
}
text := buf.Line(line)
runes := []rune(text)
styleMap := make([]lipgloss.Style, len(runes))
for i := range styleMap {
styleMap[i] = e.styles.LineStyle
}
return styleMap
}
func (e *PlainEngine) InvalidateBuffer(buf *core.Buffer) {}
func (e *PlainEngine) InvalidateLines(buf *core.Buffer, startLine, endLine int) {}

View File

@ -0,0 +1,204 @@
; Variables
;----------
(identifier) @variable
; Properties
;-----------
(property_identifier) @property
; Function and method definitions
;--------------------------------
(function_expression
name: (identifier) @function)
(function_declaration
name: (identifier) @function)
(method_definition
name: (property_identifier) @function.method)
(pair
key: (property_identifier) @function.method
value: [(function_expression) (arrow_function)])
(assignment_expression
left: (member_expression
property: (property_identifier) @function.method)
right: [(function_expression) (arrow_function)])
(variable_declarator
name: (identifier) @function
value: [(function_expression) (arrow_function)])
(assignment_expression
left: (identifier) @function
right: [(function_expression) (arrow_function)])
; Function and method calls
;--------------------------
(call_expression
function: (identifier) @function)
(call_expression
function: (member_expression
property: (property_identifier) @function.method))
; Special identifiers
;--------------------
((identifier) @constructor
(#match? @constructor "^[A-Z]"))
([
(identifier)
(shorthand_property_identifier)
(shorthand_property_identifier_pattern)
] @constant
(#match? @constant "^[A-Z_][A-Z\\d_]+$"))
((identifier) @variable.builtin
(#match? @variable.builtin "^(arguments|module|console|window|document)$")
(#is-not? local))
((identifier) @function.builtin
(#eq? @function.builtin "require")
(#is-not? local))
; Literals
;---------
(this) @variable.builtin
(super) @variable.builtin
[
(true)
(false)
(null)
(undefined)
] @constant.builtin
(comment) @comment
[
(string)
(template_string)
] @string
(regex) @string.special
(number) @number
; Tokens
;-------
[
";"
(optional_chain)
"."
","
] @punctuation.delimiter
[
"-"
"--"
"-="
"+"
"++"
"+="
"*"
"*="
"**"
"**="
"/"
"/="
"%"
"%="
"<"
"<="
"<<"
"<<="
"="
"=="
"==="
"!"
"!="
"!=="
"=>"
">"
">="
">>"
">>="
">>>"
">>>="
"~"
"^"
"&"
"|"
"^="
"&="
"|="
"&&"
"||"
"??"
"&&="
"||="
"??="
] @operator
[
"("
")"
"["
"]"
"{"
"}"
] @punctuation.bracket
(template_substitution
"${" @punctuation.special
"}" @punctuation.special) @embedded
[
"as"
"async"
"await"
"break"
"case"
"catch"
"class"
"const"
"continue"
"debugger"
"default"
"delete"
"do"
"else"
"export"
"extends"
"finally"
"for"
"from"
"function"
"get"
"if"
"import"
"in"
"instanceof"
"let"
"new"
"of"
"return"
"set"
"static"
"switch"
"target"
"throw"
"try"
"typeof"
"var"
"void"
"while"
"with"
"yield"
] @keyword

View File

@ -5,6 +5,13 @@ import _ "embed"
//go:embed queries/go/highlights.scm
var goHighlightsQuery string
//go:embed queries/javascript/highlights.scm
var javascriptHighlightsQuery string
func loadGoHighlightsQuery() ([]byte, error) {
return []byte(goHighlightsQuery), nil
}
func loadJavaScriptHighlightsQuery() ([]byte, error) {
return []byte(javascriptHighlightsQuery), nil
}

View File

@ -5,6 +5,7 @@ import (
sitter "github.com/tree-sitter/go-tree-sitter"
ts_go "github.com/tree-sitter/tree-sitter-go/bindings/go"
ts_js "github.com/tree-sitter/tree-sitter-javascript/bindings/go"
)
func TestEmbeddedGoQueryCompiles(t *testing.T) {
@ -23,3 +24,20 @@ func TestEmbeddedGoQueryCompiles(t *testing.T) {
}
q.Close()
}
func TestEmbeddedJavaScriptQueryCompiles(t *testing.T) {
b, err := loadJavaScriptHighlightsQuery()
if err != nil {
t.Fatalf("failed loading embedded query: %v", err)
}
if len(b) == 0 {
t.Fatalf("embedded query is empty")
}
lang := sitter.NewLanguage(ts_js.Language())
q, qErr := sitter.NewQuery(lang, string(b))
if qErr != nil {
t.Fatalf("embedded javascript query failed to compile: %v", qErr)
}
q.Close()
}

183
internal/syntax/registry.go Normal file
View File

@ -0,0 +1,183 @@
package syntax
import (
"fmt"
"strings"
sitter "github.com/tree-sitter/go-tree-sitter"
ts_go "github.com/tree-sitter/tree-sitter-go/bindings/go"
ts_js "github.com/tree-sitter/tree-sitter-javascript/bindings/go"
)
type languagePack struct {
// languagePack.id is the stable registry identifier (for example, "go").
id string
// languagePack.filetypes are normalized aliases resolved from buffer filetype.
filetypes []string
// languagePack.extensions are normalized filename extensions (for example, ".go").
extensions []string
// languagePack.newLanguage constructs the tree-sitter language handle.
newLanguage func() *sitter.Language
// languagePack.loadQuery returns highlights query source for this language.
loadQuery func() ([]byte, error)
}
// resolvedLanguage stores compiled runtime assets for one language.
//
// Instances are cached in languageRegistry.compiledByLang and reused by all
// buffers that resolve to the same language id.
type resolvedLanguage struct {
id string
language *sitter.Language
query *sitter.Query
}
// languageRegistry maps buffer metadata to language packs and lazily compiles
// tree-sitter language/query assets.
type languageRegistry struct {
packs []languagePack
byFiletype map[string]languagePack
byExtension map[string]languagePack
compiledByLang map[string]*resolvedLanguage
}
// newLanguageRegistry constructs the default in-process language registry.
//
// It registers built-in packs and prepares lookup maps for filetype and
// extension resolution.
func newLanguageRegistry() *languageRegistry {
r := &languageRegistry{
packs: []languagePack{},
byFiletype: map[string]languagePack{},
byExtension: map[string]languagePack{},
compiledByLang: map[string]*resolvedLanguage{},
}
r.register(languagePack{
id: "go",
filetypes: []string{"go", "golang"},
extensions: []string{".go"},
newLanguage: func() *sitter.Language { return sitter.NewLanguage(ts_go.Language()) },
loadQuery: loadGoHighlightsQuery,
})
r.register(languagePack{
id: "javascript",
filetypes: []string{"javascript", "js", "jsx"},
extensions: []string{".js", ".mjs", ".cjs", ".jsx"},
newLanguage: func() *sitter.Language { return sitter.NewLanguage(ts_js.Language()) },
loadQuery: loadJavaScriptHighlightsQuery,
})
r.register(languagePack{
id: "gomod",
filetypes: []string{"gomod"},
extensions: []string{".mod"},
newLanguage: func() *sitter.Language { return sitter.NewLanguage(ts_js.Language()) },
loadQuery: loadJavaScriptHighlightsQuery,
})
return r
}
// register adds a language pack and indexes it by normalized keys.
func (r *languageRegistry) register(pack languagePack) {
r.packs = append(r.packs, pack)
for _, ft := range pack.filetypes {
n := normalizeKey(ft)
if n != "" {
r.byFiletype[n] = pack
}
}
for _, ext := range pack.extensions {
n := normalizeExtension(ext)
if n != "" {
r.byExtension[n] = pack
}
}
}
// resolve returns compiled language/query assets for a buffer identity.
//
// Resolution is filetype-first, extension-second. Results are compiled once
// per language id and cached in compiledByLang.
func (r *languageRegistry) resolve(filetype, filename string) (*resolvedLanguage, bool, error) {
pack, ok := r.resolvePack(filetype, filename)
if !ok {
return nil, false, nil
}
if cached, ok := r.compiledByLang[pack.id]; ok {
return cached, true, nil
}
lang := pack.newLanguage()
if lang == nil {
return nil, false, fmt.Errorf("language %q did not provide a language handle", pack.id)
}
qBytes, err := pack.loadQuery()
if err != nil {
return nil, false, fmt.Errorf("load query for %q: %w", pack.id, err)
}
q, qErr := sitter.NewQuery(lang, string(qBytes))
if qErr != nil {
return nil, false, fmt.Errorf("compile query for %q: %w", pack.id, qErr)
}
resolved := &resolvedLanguage{id: pack.id, language: lang, query: q}
r.compiledByLang[pack.id] = resolved
return resolved, true, nil
}
// resolvePack finds a registered language pack using normalized buffer
// metadata without compiling queries.
func (r *languageRegistry) resolvePack(filetype, filename string) (languagePack, bool) {
if p, ok := r.byFiletype[normalizeKey(filetype)]; ok {
return p, true
}
if p, ok := r.byExtension[extensionOf(filename)]; ok {
return p, true
}
return languagePack{}, false
}
// normalizeKey canonicalizes filetype-like keys for registry lookups.
func normalizeKey(s string) string {
s = strings.TrimSpace(strings.ToLower(s))
s = strings.TrimPrefix(s, ".")
return s
}
// normalizeExtension canonicalizes extension keys and guarantees a leading
// dot for non-empty values.
func normalizeExtension(ext string) string {
ext = strings.TrimSpace(strings.ToLower(ext))
if ext == "" {
return ""
}
if !strings.HasPrefix(ext, ".") {
ext = "." + ext
}
return ext
}
// extensionOf extracts a normalized extension from a filename.
// Returns empty string when no usable extension is present.
func extensionOf(filename string) string {
name := strings.TrimSpace(strings.ToLower(filename))
if name == "" {
return ""
}
i := strings.LastIndex(name, ".")
if i <= 0 || i == len(name)-1 {
return ""
}
return name[i:]
}

View File

@ -0,0 +1,45 @@
package syntax
import "testing"
func TestLanguageRegistryResolveByFiletype(t *testing.T) {
r := newLanguageRegistry()
res, ok, err := r.resolve("go", "")
if err != nil {
t.Fatalf("resolve error: %v", err)
}
if !ok || res == nil {
t.Fatalf("expected go to resolve")
}
if res.id != "go" {
t.Fatalf("expected go id, got %q", res.id)
}
}
func TestLanguageRegistryResolveByExtension(t *testing.T) {
r := newLanguageRegistry()
res, ok, err := r.resolve("", "main.js")
if err != nil {
t.Fatalf("resolve error: %v", err)
}
if !ok || res == nil {
t.Fatalf("expected javascript to resolve")
}
if res.id != "javascript" {
t.Fatalf("expected javascript id, got %q", res.id)
}
}
func TestLanguageRegistryUnknown(t *testing.T) {
r := newLanguageRegistry()
res, ok, err := r.resolve("txt", "notes.txt")
if err != nil {
t.Fatalf("expected no error for unknown language, got: %v", err)
}
if ok || res != nil {
t.Fatalf("expected unknown language to not resolve")
}
}

View File

@ -9,15 +9,11 @@ import (
"git.gophernest.net/azpect/TextEditor/internal/style"
"github.com/charmbracelet/lipgloss"
sitter "github.com/tree-sitter/go-tree-sitter"
ts_go "github.com/tree-sitter/tree-sitter-go/bindings/go"
)
type TreeSitterEngine struct {
styles style.Styles
goLanguage *sitter.Language
goQuery *sitter.Query
queryLoaded bool
registry *languageRegistry
cache map[*core.Buffer]*bufferCache
}
@ -32,6 +28,10 @@ type bufferCache struct {
source []byte
dirtyAll bool
dirty []lineRange
langID string
language *sitter.Language
query *sitter.Query
}
type lineRange struct {
@ -55,6 +55,7 @@ type captureRange struct {
func NewTreeSitterEngine(styles style.Styles) *TreeSitterEngine {
return &TreeSitterEngine{
styles: styles,
registry: newLanguageRegistry(),
cache: map[*core.Buffer]*bufferCache{},
}
}
@ -78,18 +79,13 @@ func (e *TreeSitterEngine) PrepareBuffer(buf *core.Buffer) {
}
// If we do no support the buffer, load empty styles into the cache
if !e.supportsBuffer(buf) {
bc.lines = map[int][]lipgloss.Style{}
bc.built = true
return
}
// Load the query. If we cannot, load empty styles into the cache
if err := e.ensureGoQuery(); err != nil {
lang, ok, err := e.resolveBufferLanguage(buf, bc)
if err != nil || !ok {
bc.lines = map[int][]lipgloss.Style{}
bc.built = true
return
}
_ = lang
e.buildFullBuffer(buf, bc)
}
@ -121,20 +117,17 @@ func (e *TreeSitterEngine) ApplyEdit(buf *core.Buffer, edit *core.BufferEdit) {
}
bc := e.getCache(buf)
if !e.supportsBuffer(buf) {
lang, ok, err := e.resolveBufferLanguage(buf, bc)
if err != nil || !ok {
bc.built = false
bc.dirtyAll = true
return
}
if err := e.ensureGoQuery(); err != nil {
bc.dirtyAll = true
return
}
_ = lang
if bc.parser == nil {
bc.parser = sitter.NewParser()
bc.parser.SetLanguage(e.goLanguage)
bc.parser.SetLanguage(bc.language)
}
if bc.tree == nil || len(bc.source) == 0 {
@ -205,38 +198,28 @@ func (e *TreeSitterEngine) InvalidateLines(buf *core.Buffer, startLine, endLine
// TreeSitterEngine.supportsBuffer: Returns whether the buffer can be parsed and highlighted
// by the engine. When false, there should be a fallback.
func (e *TreeSitterEngine) supportsBuffer(buf *core.Buffer) bool {
ft := strings.TrimPrefix(strings.ToLower(strings.TrimSpace(buf.Filetype)), ".")
if ft == "go" {
return true
}
if strings.HasSuffix(strings.ToLower(buf.Filename), ".go") {
return true
}
return false
}
// TreeSitterEngine.ensureGoQuery: Loads the highlight (.scm) file from the query dir and
// attaches it to the engine. If the query is already loaded, this function does nothing.
func (e *TreeSitterEngine) ensureGoQuery() error {
if e.queryLoaded {
return nil
func (e *TreeSitterEngine) resolveBufferLanguage(buf *core.Buffer, bc *bufferCache) (*resolvedLanguage, bool, error) {
if e.registry == nil {
e.registry = newLanguageRegistry()
}
e.goLanguage = sitter.NewLanguage(ts_go.Language())
qBytes, err := loadGoHighlightsQuery()
if err != nil {
return err
resolved, ok, err := e.registry.resolve(buf.Filetype, buf.Filename)
if err != nil || !ok {
return nil, ok, err
}
q, qErr := sitter.NewQuery(e.goLanguage, string(qBytes))
if qErr != nil {
return qErr
if bc.langID != resolved.id {
bc.langID = resolved.id
bc.language = resolved.language
bc.query = resolved.query
if bc.parser != nil {
bc.parser.SetLanguage(bc.language)
}
bc.dirtyAll = true
bc.built = false
}
e.goQuery = q
e.queryLoaded = true
return nil
return resolved, true, nil
}
// TreeSitterEngine.getCache: Returns the buffers cache. If the cache does not exist, a new one
@ -281,7 +264,7 @@ func (e *TreeSitterEngine) buildFullBuffer(buf *core.Buffer, bc *bufferCache) {
if bc.parser == nil {
bc.parser = sitter.NewParser()
bc.parser.SetLanguage(e.goLanguage)
bc.parser.SetLanguage(bc.language)
}
if !useCurrentTree {
@ -310,8 +293,8 @@ func (e *TreeSitterEngine) buildFullBuffer(buf *core.Buffer, bc *bufferCache) {
var captures []captureRange
if fullRebuild {
iter := cursor.Captures(e.goQuery, root, source)
captures = append(captures, collectCaptures(iter, e.goQuery)...)
iter := cursor.Captures(bc.query, root, source)
captures = append(captures, collectCaptures(iter, bc.query)...)
} else {
dirty := normalizedDirtyRanges(bc.dirty, lineCount)
for _, r := range dirty {
@ -323,8 +306,8 @@ func (e *TreeSitterEngine) buildFullBuffer(buf *core.Buffer, bc *bufferCache) {
sitter.NewPoint(uint(queryStart), 0),
sitter.NewPoint(uint(queryEnd+1), 0),
)
iter := rangeCursor.Captures(e.goQuery, root, source)
captures = append(captures, collectCaptures(iter, e.goQuery)...)
iter := rangeCursor.Captures(bc.query, root, source)
captures = append(captures, collectCaptures(iter, bc.query)...)
rangeCursor.Close()
}
}

View File

@ -1,254 +0,0 @@
; Forked from tree-sitter-go
; Copyright (c) 2014 Max Brunsfeld (The MIT License)
;
; Identifiers
(type_identifier) @type
(type_spec
name: (type_identifier) @type.definition)
(field_identifier) @property
(identifier) @variable
(package_identifier) @module
(parameter_declaration
(identifier) @variable.parameter)
(variadic_parameter_declaration
(identifier) @variable.parameter)
(label_name) @label
(const_spec
name: (identifier) @constant)
; Function calls
(call_expression
function: (identifier) @function.call)
(call_expression
function: (selector_expression
field: (field_identifier) @function.method.call))
; Function definitions
(function_declaration
name: (identifier) @function)
(method_declaration
name: (field_identifier) @function.method)
(method_elem
name: (field_identifier) @function.method)
; Constructors
((call_expression
(identifier) @constructor)
(#lua-match? @constructor "^[nN]ew.+$"))
((call_expression
(identifier) @constructor)
(#lua-match? @constructor "^[mM]ake.+$"))
; Operators
[
"--"
"-"
"-="
":="
"!"
"!="
"..."
"*"
"*"
"*="
"/"
"/="
"&"
"&&"
"&="
"&^"
"&^="
"%"
"%="
"^"
"^="
"+"
"++"
"+="
"<-"
"<"
"<<"
"<<="
"<="
"="
"=="
">"
">="
">>"
">>="
"|"
"|="
"||"
"~"
] @operator
; Keywords
[
"break"
"const"
"continue"
"default"
"defer"
"goto"
"range"
"select"
"var"
"fallthrough"
] @keyword
[
"type"
"struct"
"interface"
] @keyword.type
"func" @keyword.function
"return" @keyword.return
"go" @keyword.coroutine
"for" @keyword.repeat
[
"import"
"package"
] @keyword.import
[
"else"
"case"
"switch"
"if"
] @keyword.conditional
; Builtin types
[
"chan"
"map"
] @type.builtin
((type_identifier) @type.builtin
(#any-of? @type.builtin
"any" "bool" "byte" "comparable" "complex128" "complex64" "error" "float32" "float64" "int"
"int16" "int32" "int64" "int8" "rune" "string" "uint" "uint16" "uint32" "uint64" "uint8"
"uintptr"))
; Builtin functions
((identifier) @function.builtin
(#any-of? @function.builtin
"append" "cap" "clear" "close" "complex" "copy" "delete" "imag" "len" "make" "max" "min" "new"
"panic" "print" "println" "real" "recover"))
; Delimiters
"." @punctuation.delimiter
"," @punctuation.delimiter
":" @punctuation.delimiter
";" @punctuation.delimiter
"(" @punctuation.bracket
")" @punctuation.bracket
"{" @punctuation.bracket
"}" @punctuation.bracket
"[" @punctuation.bracket
"]" @punctuation.bracket
; Literals
(interpreted_string_literal) @string
(raw_string_literal) @string
(rune_literal) @string
(escape_sequence) @string.escape
(int_literal) @number
(float_literal) @number.float
(imaginary_literal) @number
[
(true)
(false)
] @boolean
[
(nil)
(iota)
] @constant.builtin
(keyed_element
.
(literal_element
(identifier) @variable.member))
(field_declaration
name: (field_identifier) @variable.member)
; Comments
(comment) @comment @spell
; Doc Comments
(source_file
.
(comment)+ @comment.documentation)
(source_file
(comment)+ @comment.documentation
.
(const_declaration))
(source_file
(comment)+ @comment.documentation
.
(function_declaration))
(source_file
(comment)+ @comment.documentation
.
(type_declaration))
(source_file
(comment)+ @comment.documentation
.
(var_declaration))
; Spell
((interpreted_string_literal) @spell
(#not-has-parent? @spell import_spec))
; Regex
(call_expression
(selector_expression) @_function
(#any-of? @_function
"regexp.Match" "regexp.MatchReader" "regexp.MatchString" "regexp.Compile" "regexp.CompilePOSIX"
"regexp.MustCompile" "regexp.MustCompilePOSIX")
(argument_list
.
[
(raw_string_literal
(raw_string_literal_content) @string.regexp)
(interpreted_string_literal
(interpreted_string_literal_content) @string.regexp)
]))