Gim/internal/syntax/registry.go
Hayden Hargreaves 6034e44364
All checks were successful
Run Test Suite / test (push) Successful in 35s
feat: implemented more languages
HTML isnt great, but I guess there isnt much to color, or maybe its just
the styles. The next step is making the colorschemes.
2026-04-07 11:40:34 -07:00

292 lines
8.9 KiB
Go

package syntax
import (
"fmt"
"strings"
sitter "github.com/tree-sitter/go-tree-sitter"
ts_bash "github.com/tree-sitter/tree-sitter-bash/bindings/go"
ts_csharp "github.com/tree-sitter/tree-sitter-c-sharp/bindings/go"
ts_c "github.com/tree-sitter/tree-sitter-c/bindings/go"
ts_cpp "github.com/tree-sitter/tree-sitter-cpp/bindings/go"
ts_css "github.com/tree-sitter/tree-sitter-css/bindings/go"
ts_go "github.com/tree-sitter/tree-sitter-go/bindings/go"
ts_html "github.com/tree-sitter/tree-sitter-html/bindings/go"
ts_java "github.com/tree-sitter/tree-sitter-java/bindings/go"
ts_js "github.com/tree-sitter/tree-sitter-javascript/bindings/go"
ts_json "github.com/tree-sitter/tree-sitter-json/bindings/go"
ts_python "github.com/tree-sitter/tree-sitter-python/bindings/go"
ts_ruby "github.com/tree-sitter/tree-sitter-ruby/bindings/go"
ts_rust "github.com/tree-sitter/tree-sitter-rust/bindings/go"
ts_ts "github.com/tree-sitter/tree-sitter-typescript/bindings/go"
)
type languagePack struct {
// languagePack.id is the stable registry identifier (for example, "go").
id string
// languagePack.filetypes are normalized aliases resolved from buffer filetype.
filetypes []string
// languagePack.extensions are normalized filename extensions (for example, ".go").
extensions []string
// languagePack.newLanguage constructs the tree-sitter language handle.
newLanguage func() *sitter.Language
// languagePack.loadQuery returns highlights query source for this language.
loadQuery func() ([]byte, error)
}
// resolvedLanguage stores compiled runtime assets for one language.
//
// Instances are cached in languageRegistry.compiledByLang and reused by all
// buffers that resolve to the same language id.
type resolvedLanguage struct {
id string
language *sitter.Language
query *sitter.Query
}
// languageRegistry maps buffer metadata to language packs and lazily compiles
// tree-sitter language/query assets.
type languageRegistry struct {
packs []languagePack
byFiletype map[string]languagePack
byExtension map[string]languagePack
compiledByLang map[string]*resolvedLanguage
}
// newLanguageRegistry constructs the default in-process language registry.
//
// It registers built-in packs and prepares lookup maps for filetype and
// extension resolution.
func newLanguageRegistry() *languageRegistry {
r := &languageRegistry{
packs: []languagePack{},
byFiletype: map[string]languagePack{},
byExtension: map[string]languagePack{},
compiledByLang: map[string]*resolvedLanguage{},
}
r.register(languagePack{
id: "go",
filetypes: []string{"go", "golang"},
extensions: []string{".go"},
newLanguage: func() *sitter.Language { return sitter.NewLanguage(ts_go.Language()) },
loadQuery: loadGoHighlightsQuery,
})
r.register(languagePack{
id: "javascript",
filetypes: []string{"javascript", "js", "jsx"},
extensions: []string{".js", ".mjs", ".cjs", ".jsx"},
newLanguage: func() *sitter.Language { return sitter.NewLanguage(ts_js.Language()) },
loadQuery: loadJavaScriptHighlightsQuery,
})
r.register(languagePack{
id: "typescript",
filetypes: []string{"typescript", "ts"},
extensions: []string{".ts"},
newLanguage: func() *sitter.Language { return sitter.NewLanguage(ts_ts.LanguageTypescript()) },
loadQuery: loadTypeScriptHighlightsQuery,
})
r.register(languagePack{
id: "tsx",
filetypes: []string{"tsx"},
extensions: []string{".tsx"},
newLanguage: func() *sitter.Language { return sitter.NewLanguage(ts_ts.LanguageTSX()) },
loadQuery: loadTSXHighlightsQuery,
})
r.register(languagePack{
id: "python",
filetypes: []string{"python", "py"},
extensions: []string{".py"},
newLanguage: func() *sitter.Language { return sitter.NewLanguage(ts_python.Language()) },
loadQuery: loadPythonHighlightsQuery,
})
r.register(languagePack{
id: "rust",
filetypes: []string{"rust", "rs"},
extensions: []string{".rs"},
newLanguage: func() *sitter.Language { return sitter.NewLanguage(ts_rust.Language()) },
loadQuery: loadRustHighlightsQuery,
})
r.register(languagePack{
id: "bash",
filetypes: []string{"bash", "sh", "shell"},
extensions: []string{".sh", ".bash"},
newLanguage: func() *sitter.Language { return sitter.NewLanguage(ts_bash.Language()) },
loadQuery: loadBashHighlightsQuery,
})
r.register(languagePack{
id: "json",
filetypes: []string{"json"},
extensions: []string{".json"},
newLanguage: func() *sitter.Language { return sitter.NewLanguage(ts_json.Language()) },
loadQuery: loadJSONHighlightsQuery,
})
r.register(languagePack{
id: "css",
filetypes: []string{"css"},
extensions: []string{".css"},
newLanguage: func() *sitter.Language { return sitter.NewLanguage(ts_css.Language()) },
loadQuery: loadCSSHighlightsQuery,
})
r.register(languagePack{
id: "html",
filetypes: []string{"html"},
extensions: []string{".html", ".htm"},
newLanguage: func() *sitter.Language { return sitter.NewLanguage(ts_html.Language()) },
loadQuery: loadHTMLHighlightsQuery,
})
r.register(languagePack{
id: "c",
filetypes: []string{"c"},
extensions: []string{".c", ".h"},
newLanguage: func() *sitter.Language { return sitter.NewLanguage(ts_c.Language()) },
loadQuery: loadCHighlightsQuery,
})
r.register(languagePack{
id: "cpp",
filetypes: []string{"cpp", "c++", "hpp"},
extensions: []string{".cc", ".cpp", ".cxx", ".hpp", ".hh", ".hxx"},
newLanguage: func() *sitter.Language { return sitter.NewLanguage(ts_cpp.Language()) },
loadQuery: loadCppHighlightsQuery,
})
r.register(languagePack{
id: "java",
filetypes: []string{"java"},
extensions: []string{".java"},
newLanguage: func() *sitter.Language { return sitter.NewLanguage(ts_java.Language()) },
loadQuery: loadJavaHighlightsQuery,
})
r.register(languagePack{
id: "csharp",
filetypes: []string{"csharp", "cs"},
extensions: []string{".cs"},
newLanguage: func() *sitter.Language { return sitter.NewLanguage(ts_csharp.Language()) },
loadQuery: loadCSharpHighlightsQuery,
})
r.register(languagePack{
id: "ruby",
filetypes: []string{"ruby", "rb"},
extensions: []string{".rb"},
newLanguage: func() *sitter.Language { return sitter.NewLanguage(ts_ruby.Language()) },
loadQuery: loadRubyHighlightsQuery,
})
return r
}
// register adds a language pack and indexes it by normalized keys.
func (r *languageRegistry) register(pack languagePack) {
r.packs = append(r.packs, pack)
for _, ft := range pack.filetypes {
n := normalizeKey(ft)
if n != "" {
r.byFiletype[n] = pack
}
}
for _, ext := range pack.extensions {
n := normalizeExtension(ext)
if n != "" {
r.byExtension[n] = pack
}
}
}
// resolve returns compiled language/query assets for a buffer identity.
//
// Resolution is filetype-first, extension-second. Results are compiled once
// per language id and cached in compiledByLang.
func (r *languageRegistry) resolve(filetype, filename string) (*resolvedLanguage, bool, error) {
pack, ok := r.resolvePack(filetype, filename)
if !ok {
return nil, false, nil
}
if cached, ok := r.compiledByLang[pack.id]; ok {
return cached, true, nil
}
lang := pack.newLanguage()
if lang == nil {
return nil, false, fmt.Errorf("language %q did not provide a language handle", pack.id)
}
qBytes, err := pack.loadQuery()
if err != nil {
return nil, false, fmt.Errorf("load query for %q: %w", pack.id, err)
}
q, qErr := sitter.NewQuery(lang, string(qBytes))
if qErr != nil {
return nil, false, fmt.Errorf("compile query for %q: %w", pack.id, qErr)
}
resolved := &resolvedLanguage{id: pack.id, language: lang, query: q}
r.compiledByLang[pack.id] = resolved
return resolved, true, nil
}
// resolvePack finds a registered language pack using normalized buffer
// metadata without compiling queries.
func (r *languageRegistry) resolvePack(filetype, filename string) (languagePack, bool) {
if p, ok := r.byFiletype[normalizeKey(filetype)]; ok {
return p, true
}
if p, ok := r.byExtension[extensionOf(filename)]; ok {
return p, true
}
return languagePack{}, false
}
// normalizeKey canonicalizes filetype-like keys for registry lookups.
func normalizeKey(s string) string {
s = strings.TrimSpace(strings.ToLower(s))
s = strings.TrimPrefix(s, ".")
return s
}
// normalizeExtension canonicalizes extension keys and guarantees a leading
// dot for non-empty values.
func normalizeExtension(ext string) string {
ext = strings.TrimSpace(strings.ToLower(ext))
if ext == "" {
return ""
}
if !strings.HasPrefix(ext, ".") {
ext = "." + ext
}
return ext
}
// extensionOf extracts a normalized extension from a filename.
// Returns empty string when no usable extension is present.
func extensionOf(filename string) string {
name := strings.TrimSpace(strings.ToLower(filename))
if name == "" {
return ""
}
i := strings.LastIndex(name, ".")
if i <= 0 || i == len(name)-1 {
return ""
}
return name[i:]
}