From 573b60e20ca4f2556cab54fcd5700f09da9c0d02 Mon Sep 17 00:00:00 2001 From: Jiri Tyr Date: Sat, 14 Mar 2026 17:35:16 +0000 Subject: [PATCH 01/16] Tree-sitter integration for better syntax highlighting Signed-off-by: Jiri Tyr --- .gitignore | 1 + configure.ac | 161 ++ doc/Makefile.am | 2 +- doc/TREE-SITTER | 745 ++++++++ lib/fileloc.h | 1 + misc/Makefile.am | 2 +- misc/syntax-ts/Makefile.am | 76 + misc/syntax-ts/grammars | 108 ++ misc/syntax-ts/queries/ada-highlights.scm | 126 ++ misc/syntax-ts/queries/asm-highlights.scm | 37 + misc/syntax-ts/queries/awk-highlights.scm | 89 + misc/syntax-ts/queries/bash-highlights.scm | 63 + misc/syntax-ts/queries/bison-highlights.scm | 36 + misc/syntax-ts/queries/c-highlights.scm | 123 ++ misc/syntax-ts/queries/c_sharp-highlights.scm | 145 ++ misc/syntax-ts/queries/caddy-highlights.scm | 37 + misc/syntax-ts/queries/cmake-highlights.scm | 45 + misc/syntax-ts/queries/cobol-highlights.scm | 40 + .../queries/commonlisp-highlights.scm | 70 + misc/syntax-ts/queries/cpp-highlights.scm | 148 ++ misc/syntax-ts/queries/css-highlights.scm | 80 + misc/syntax-ts/queries/cuda-highlights.scm | 128 ++ misc/syntax-ts/queries/d-highlights.scm | 173 ++ misc/syntax-ts/queries/diff-highlights.scm | 35 + .../queries/dockerfile-highlights.scm | 59 + misc/syntax-ts/queries/dot-highlights.scm | 58 + misc/syntax-ts/queries/erlang-highlights.scm | 112 ++ misc/syntax-ts/queries/fortran-highlights.scm | 135 ++ misc/syntax-ts/queries/glsl-highlights.scm | 113 ++ misc/syntax-ts/queries/go-highlights.scm | 110 ++ misc/syntax-ts/queries/haskell-highlights.scm | 79 + misc/syntax-ts/queries/hcl-highlights.scm | 85 + misc/syntax-ts/queries/html-highlights.scm | 35 + misc/syntax-ts/queries/idl-highlights.scm | 133 ++ misc/syntax-ts/queries/ini-highlights.scm | 25 + misc/syntax-ts/queries/java-highlights.scm | 141 ++ .../queries/javascript-highlights.scm | 133 ++ misc/syntax-ts/queries/json-highlights.scm | 39 + misc/syntax-ts/queries/kotlin-highlights.scm | 134 ++ misc/syntax-ts/queries/lua-highlights.scm | 99 ++ misc/syntax-ts/queries/make-highlights.scm | 61 + .../syntax-ts/queries/markdown-highlights.scm | 67 + .../queries/markdown_inline-highlights.scm | 48 + misc/syntax-ts/queries/matlab-highlights.scm | 65 + misc/syntax-ts/queries/meson-highlights.scm | 40 + misc/syntax-ts/queries/muttrc-highlights.scm | 48 + misc/syntax-ts/queries/ocaml-highlights.scm | 109 ++ misc/syntax-ts/queries/pascal-highlights.scm | 105 ++ misc/syntax-ts/queries/perl-highlights.scm | 113 ++ misc/syntax-ts/queries/php-highlights.scm | 165 ++ misc/syntax-ts/queries/po-highlights.scm | 30 + .../queries/properties-highlights.scm | 25 + misc/syntax-ts/queries/proto-highlights.scm | 94 ++ misc/syntax-ts/queries/python-highlights.scm | 122 ++ misc/syntax-ts/queries/r-highlights.scm | 90 + misc/syntax-ts/queries/ruby-highlights.scm | 132 ++ misc/syntax-ts/queries/rust-highlights.scm | 83 + misc/syntax-ts/queries/scala-highlights.scm | 80 + .../queries/smalltalk-highlights.scm | 63 + misc/syntax-ts/queries/sql-highlights.scm | 44 + misc/syntax-ts/queries/strace-highlights.scm | 44 + misc/syntax-ts/queries/swift-highlights.scm | 1 + misc/syntax-ts/queries/tcl-highlights.scm | 51 + misc/syntax-ts/queries/toml-highlights.scm | 43 + misc/syntax-ts/queries/turtle-highlights.scm | 64 + .../queries/typescript-highlights.scm | 137 ++ misc/syntax-ts/queries/verilog-highlights.scm | 124 ++ misc/syntax-ts/queries/vhdl-highlights.scm | 152 ++ misc/syntax-ts/queries/xml-highlights.scm | 43 + misc/syntax-ts/queries/yaml-highlights.scm | 63 + po/be.po | 6 +- po/br.po | 8 +- po/ca.po | 4 +- po/hr.po | 4 +- src/Makefile.am | 3 +- src/editor/Makefile.am | 20 +- src/editor/edit-impl.h | 5 + src/editor/edit.c | 35 +- src/editor/editcmd.c | 13 + src/editor/editwidget.h | 15 +- src/editor/syntax.c | 1495 ++++++++++++++++- src/editor/ts-grammar-loader.h | 150 ++ src/editor/ts-grammars/.gitignore | 4 + src/editor/ts-grammars/download-grammars.sh | 298 ++++ src/editor/ts-grammars/tree_sitter/alloc.h | 41 + src/editor/ts-grammars/tree_sitter/array.h | 291 ++++ src/editor/ts-grammars/tree_sitter/parser.h | 310 ++++ .../ts-grammars/tree_sitter/ts_assert.h | 11 + src/editor/ts-grammars/ts-grammar-registry.h | 427 +++++ src/subshell/common.c | 2 +- tests/.gitignore | 23 + tests/src/editor/Makefile.am | 41 + tests/src/editor/edit_syntax_ts.c | 632 +++++++ 93 files changed, 10044 insertions(+), 61 deletions(-) create mode 100644 doc/TREE-SITTER create mode 100644 misc/syntax-ts/Makefile.am create mode 100644 misc/syntax-ts/grammars create mode 100644 misc/syntax-ts/queries/ada-highlights.scm create mode 100644 misc/syntax-ts/queries/asm-highlights.scm create mode 100644 misc/syntax-ts/queries/awk-highlights.scm create mode 100644 misc/syntax-ts/queries/bash-highlights.scm create mode 100644 misc/syntax-ts/queries/bison-highlights.scm create mode 100644 misc/syntax-ts/queries/c-highlights.scm create mode 100644 misc/syntax-ts/queries/c_sharp-highlights.scm create mode 100644 misc/syntax-ts/queries/caddy-highlights.scm create mode 100644 misc/syntax-ts/queries/cmake-highlights.scm create mode 100644 misc/syntax-ts/queries/cobol-highlights.scm create mode 100644 misc/syntax-ts/queries/commonlisp-highlights.scm create mode 100644 misc/syntax-ts/queries/cpp-highlights.scm create mode 100644 misc/syntax-ts/queries/css-highlights.scm create mode 100644 misc/syntax-ts/queries/cuda-highlights.scm create mode 100644 misc/syntax-ts/queries/d-highlights.scm create mode 100644 misc/syntax-ts/queries/diff-highlights.scm create mode 100644 misc/syntax-ts/queries/dockerfile-highlights.scm create mode 100644 misc/syntax-ts/queries/dot-highlights.scm create mode 100644 misc/syntax-ts/queries/erlang-highlights.scm create mode 100644 misc/syntax-ts/queries/fortran-highlights.scm create mode 100644 misc/syntax-ts/queries/glsl-highlights.scm create mode 100644 misc/syntax-ts/queries/go-highlights.scm create mode 100644 misc/syntax-ts/queries/haskell-highlights.scm create mode 100644 misc/syntax-ts/queries/hcl-highlights.scm create mode 100644 misc/syntax-ts/queries/html-highlights.scm create mode 100644 misc/syntax-ts/queries/idl-highlights.scm create mode 100644 misc/syntax-ts/queries/ini-highlights.scm create mode 100644 misc/syntax-ts/queries/java-highlights.scm create mode 100644 misc/syntax-ts/queries/javascript-highlights.scm create mode 100644 misc/syntax-ts/queries/json-highlights.scm create mode 100644 misc/syntax-ts/queries/kotlin-highlights.scm create mode 100644 misc/syntax-ts/queries/lua-highlights.scm create mode 100644 misc/syntax-ts/queries/make-highlights.scm create mode 100644 misc/syntax-ts/queries/markdown-highlights.scm create mode 100644 misc/syntax-ts/queries/markdown_inline-highlights.scm create mode 100644 misc/syntax-ts/queries/matlab-highlights.scm create mode 100644 misc/syntax-ts/queries/meson-highlights.scm create mode 100644 misc/syntax-ts/queries/muttrc-highlights.scm create mode 100644 misc/syntax-ts/queries/ocaml-highlights.scm create mode 100644 misc/syntax-ts/queries/pascal-highlights.scm create mode 100644 misc/syntax-ts/queries/perl-highlights.scm create mode 100644 misc/syntax-ts/queries/php-highlights.scm create mode 100644 misc/syntax-ts/queries/po-highlights.scm create mode 100644 misc/syntax-ts/queries/properties-highlights.scm create mode 100644 misc/syntax-ts/queries/proto-highlights.scm create mode 100644 misc/syntax-ts/queries/python-highlights.scm create mode 100644 misc/syntax-ts/queries/r-highlights.scm create mode 100644 misc/syntax-ts/queries/ruby-highlights.scm create mode 100644 misc/syntax-ts/queries/rust-highlights.scm create mode 100644 misc/syntax-ts/queries/scala-highlights.scm create mode 100644 misc/syntax-ts/queries/smalltalk-highlights.scm create mode 100644 misc/syntax-ts/queries/sql-highlights.scm create mode 100644 misc/syntax-ts/queries/strace-highlights.scm create mode 100644 misc/syntax-ts/queries/swift-highlights.scm create mode 100644 misc/syntax-ts/queries/tcl-highlights.scm create mode 100644 misc/syntax-ts/queries/toml-highlights.scm create mode 100644 misc/syntax-ts/queries/turtle-highlights.scm create mode 100644 misc/syntax-ts/queries/typescript-highlights.scm create mode 100644 misc/syntax-ts/queries/verilog-highlights.scm create mode 100644 misc/syntax-ts/queries/vhdl-highlights.scm create mode 100644 misc/syntax-ts/queries/xml-highlights.scm create mode 100644 misc/syntax-ts/queries/yaml-highlights.scm create mode 100644 src/editor/ts-grammar-loader.h create mode 100644 src/editor/ts-grammars/.gitignore create mode 100755 src/editor/ts-grammars/download-grammars.sh create mode 100644 src/editor/ts-grammars/tree_sitter/alloc.h create mode 100644 src/editor/ts-grammars/tree_sitter/array.h create mode 100644 src/editor/ts-grammars/tree_sitter/parser.h create mode 100644 src/editor/ts-grammars/tree_sitter/ts_assert.h create mode 100644 src/editor/ts-grammars/ts-grammar-registry.h create mode 100644 tests/src/editor/edit_syntax_ts.c diff --git a/.gitignore b/.gitignore index 6d8cb8b622..9508f058a2 100644 --- a/.gitignore +++ b/.gitignore @@ -44,6 +44,7 @@ TAGS doc/devel/ doc/html/ .deps +.dirstamp libtool make.log make.clang diff --git a/configure.ac b/configure.ac index 5a5155f9e0..6fd089cf85 100644 --- a/configure.ac +++ b/configure.ac @@ -41,6 +41,7 @@ dnl that ar cannot be found and linking via libtool will fail at a later stage AC_CHECK_TOOLS([AR], [ar gar]) AC_PROG_CC +AC_PROG_CXX # AC_PROG_CC doesn't try enabling C99 in autoconf 2.69 and below, but # AC_PROG_CC_C99 is deprecated in newer ones. In autoconf 2.70+ both @@ -536,11 +537,169 @@ AC_SUBST(CPPFLAGS) AC_SUBST(LDFLAGS) AC_SUBST(LIBS) +dnl ############################################################################ +dnl Syntax highlighting system selection +dnl ############################################################################ + +AC_ARG_WITH([tree-sitter], + AS_HELP_STRING([--with-tree-sitter], + [Enable tree-sitter syntax highlighting (in addition to legacy MC highlighting)]), + [with_tree_sitter=$withval], + [with_tree_sitter=no]) + +AC_ARG_WITH([tree-sitter-grammars], + AS_HELP_STRING([--with-tree-sitter-grammars=LIST], + [Comma-separated list of tree-sitter grammars to build (default: all)]), + [with_tree_sitter_grammars=$withval], + [with_tree_sitter_grammars=all]) + +AC_ARG_WITH([tree-sitter-static], + AS_HELP_STRING([--with-tree-sitter-static], + [Link tree-sitter grammars statically into the binary (default: build as shared modules)]), + [with_tree_sitter_static=$withval], + [with_tree_sitter_static=no]) + +if test x"$with_tree_sitter" = xyes; then + AC_CHECK_HEADER([tree_sitter/api.h], [], + [AC_MSG_ERROR([tree-sitter headers not found (required for --with-tree-sitter)])]) + AC_CHECK_LIB([tree-sitter], [ts_parser_new], [], + [AC_MSG_ERROR([tree-sitter library not found (required for --with-tree-sitter)])]) + AC_DEFINE([HAVE_TREE_SITTER], [1], [Define if tree-sitter syntax highlighting is enabled]) + + if test x"$with_tree_sitter_static" = xyes; then + AC_DEFINE([TREE_SITTER_STATIC], [1], [Define if tree-sitter grammars are linked statically]) + TREE_SITTER_BUILD_TARGET="libtsgrammars.la" + TREE_SITTER_BUILD_MODE="static" + else + AC_DEFINE([TREE_SITTER_SHARED], [1], [Define if tree-sitter grammars are loaded as shared modules]) + PKG_CHECK_MODULES([GMODULE], [gmodule-2.0], [], + [AC_MSG_ERROR([gmodule-2.0 required for shared tree-sitter grammars (use --with-tree-sitter-static to avoid)])]) + TREE_SITTER_BUILD_TARGET="shared-modules" + TREE_SITTER_BUILD_MODE="shared" + TREE_SITTER_GRAMMAR_LIBDIR="${libdir}/mc/ts-grammars" + fi + AC_SUBST([TREE_SITTER_BUILD_TARGET]) + AC_SUBST([TREE_SITTER_BUILD_MODE]) + AC_SUBST([TREE_SITTER_GRAMMAR_LIBDIR]) + + dnl Known grammar directory names (must match download-grammars.sh) + all_ts_grammars="ada asm awk bash bison c caddy cmake cobol cpp c_sharp css cuda d diff dockerfile dot erlang fortran glsl go haskell hcl html idl ini java javascript json kotlin lisp lua make markdown markdown_inline matlab meson muttrc ocaml pascal perl php po properties proto python r ruby rust scala smalltalk sql strace swift tcl toml turtle typescript verilog vhdl xml yaml" + + dnl Grammars that have scanner.c + ts_scanner_c="awk bash bison caddy cmake cobol cpp c_sharp css cuda d dockerfile fortran haskell hcl html javascript kotlin lua markdown markdown_inline matlab ocaml perl php properties python r ruby rust scala tcl toml typescript xml yaml" + + dnl Grammars that have scanner.cc (C++ scanner) + ts_scanner_cc="sql" + + dnl Parse and validate selected grammars + if test x"$with_tree_sitter_grammars" = xall; then + tree_sitter_grammars="$all_ts_grammars" + else + tree_sitter_grammars=`echo "$with_tree_sitter_grammars" | tr ',' ' '` + for g in $tree_sitter_grammars; do + case " $all_ts_grammars " in + *" $g "*) ;; + *) AC_MSG_ERROR([unknown tree-sitter grammar: $g +Valid grammars: $all_ts_grammars]) ;; + esac + done + fi + + dnl Build source/object file lists, -D flags, shared lib list, and grammar dir list + TREE_SITTER_GRAMMAR_SOURCES="" + TREE_SITTER_GRAMMAR_OBJECTS="" + TREE_SITTER_GRAMMAR_DEFS="" + TREE_SITTER_SHARED_LIBS="" + TREE_SITTER_GRAMMARS="" + ts_need_cxx=no + for g in $tree_sitter_grammars; do + TREE_SITTER_GRAMMAR_SOURCES="$TREE_SITTER_GRAMMAR_SOURCES $g/parser.c" + TREE_SITTER_GRAMMAR_OBJECTS="$TREE_SITTER_GRAMMAR_OBJECTS $g/parser.lo" + case " $ts_scanner_c " in + *" $g "*) + TREE_SITTER_GRAMMAR_SOURCES="$TREE_SITTER_GRAMMAR_SOURCES $g/scanner.c" + TREE_SITTER_GRAMMAR_OBJECTS="$TREE_SITTER_GRAMMAR_OBJECTS $g/scanner.lo" ;; + esac + case " $ts_scanner_cc " in + *" $g "*) + TREE_SITTER_GRAMMAR_SOURCES="$TREE_SITTER_GRAMMAR_SOURCES $g/scanner.cc" + TREE_SITTER_GRAMMAR_OBJECTS="$TREE_SITTER_GRAMMAR_OBJECTS $g/scanner.lo" + ts_need_cxx=yes ;; + esac + if test x"$with_tree_sitter_static" = xyes; then + upper=`echo "$g" | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'` + TREE_SITTER_GRAMMAR_DEFS="$TREE_SITTER_GRAMMAR_DEFS -DHAVE_GRAMMAR_${upper}=1" + fi + TREE_SITTER_SHARED_LIBS="$TREE_SITTER_SHARED_LIBS mc-ts-$g.la" + TREE_SITTER_GRAMMARS="$TREE_SITTER_GRAMMARS $g" + done + dnl Remove leading spaces + TREE_SITTER_GRAMMAR_SOURCES=`echo $TREE_SITTER_GRAMMAR_SOURCES` + TREE_SITTER_GRAMMAR_OBJECTS=`echo $TREE_SITTER_GRAMMAR_OBJECTS` + TREE_SITTER_GRAMMAR_DEFS=`echo $TREE_SITTER_GRAMMAR_DEFS` + TREE_SITTER_SHARED_LIBS=`echo $TREE_SITTER_SHARED_LIBS` + TREE_SITTER_GRAMMARS=`echo $TREE_SITTER_GRAMMARS` + + AC_SUBST([TREE_SITTER_GRAMMAR_SOURCES]) + AC_SUBST([TREE_SITTER_GRAMMAR_OBJECTS]) + AC_SUBST([TREE_SITTER_GRAMMAR_DEFS]) + AC_SUBST([TREE_SITTER_SHARED_LIBS]) + AC_SUBST([TREE_SITTER_GRAMMARS]) + + if test x"$ts_need_cxx" = xyes; then + TREE_SITTER_LIBS="-ltree-sitter -lstdc++" + else + TREE_SITTER_LIBS="-ltree-sitter" + fi + TREE_SITTER_CFLAGS="" + AC_SUBST([TREE_SITTER_LIBS]) + AC_SUBST([TREE_SITTER_CFLAGS]) + + ts_grammar_count=`echo $tree_sitter_grammars | wc -w | tr -d ' '` + if test x"$with_tree_sitter_grammars" = xall; then + AC_MSG_NOTICE([tree-sitter: enabled ($TREE_SITTER_BUILD_MODE) with all $ts_grammar_count grammars]) + else + AC_MSG_NOTICE([tree-sitter: enabled ($TREE_SITTER_BUILD_MODE) with $ts_grammar_count grammars: $TREE_SITTER_GRAMMARS]) + fi + + dnl Auto-download missing grammar sources + ts_grammars_missing="" + for g in $tree_sitter_grammars; do + if test ! -f "$srcdir/src/editor/ts-grammars/$g/parser.c"; then + ts_grammars_missing="$ts_grammars_missing $g" + fi + done + + if test -n "$ts_grammars_missing"; then + ts_grammars_dir="$srcdir/src/editor/ts-grammars" + if test -n "$TREE_SITTER_GRAMMARS_DIR"; then + AC_MSG_NOTICE([copying missing grammar sources from $TREE_SITTER_GRAMMARS_DIR...]) + for g in $ts_grammars_missing; do + if test -d "$TREE_SITTER_GRAMMARS_DIR/$g"; then + mkdir -p "$ts_grammars_dir/$g" + cp -a "$TREE_SITTER_GRAMMARS_DIR/$g"/* "$ts_grammars_dir/$g/" + else + AC_MSG_ERROR([grammar $g not found in $TREE_SITTER_GRAMMARS_DIR]) + fi + done + else + AC_MSG_NOTICE([downloading missing grammar sources...]) + (cd "$ts_grammars_dir" && $SHELL download-grammars.sh $ts_grammars_missing) || \ + AC_MSG_ERROR([failed to download grammar sources. +For offline builds, set TREE_SITTER_GRAMMARS_DIR=/path/to/grammars]) + fi + fi +else + AC_MSG_NOTICE([tree-sitter syntax highlighting disabled]) +fi + AM_CONDITIONAL(USE_NLS, [test x"$USE_NLS" = xyes]) AM_CONDITIONAL(USE_MAINTAINER_MODE, [test x"$USE_MAINTAINER_MODE" = xyes]) AM_CONDITIONAL(USE_SCREEN_SLANG, [test x"$with_screen" = xslang]) AM_CONDITIONAL(USE_INTERNAL_EDIT, [test x"$use_internal_edit" = xyes ]) AM_CONDITIONAL(USE_ASPELL, [test x"$enable_aspell" = xyes ]) +AM_CONDITIONAL(USE_TREE_SITTER, [test x"$with_tree_sitter" = xyes]) +AM_CONDITIONAL(TREE_SITTER_STATIC, [test x"$with_tree_sitter" = xyes -a x"$with_tree_sitter_static" = xyes]) AM_CONDITIONAL(USE_DIFF, [test -n "$use_diff"]) AM_CONDITIONAL(CONS_SAVER, [test -n "$cons_saver"]) dnl Clarify do we really need GModule @@ -582,6 +741,7 @@ misc/mc.ext.ini src/Makefile src/consaver/Makefile src/editor/Makefile +src/editor/ts-grammars/Makefile src/man2hlp/Makefile src/subshell/Makefile src/viewer/Makefile @@ -654,6 +814,7 @@ lib/vfs/Makefile lib/widget/Makefile misc/syntax/Makefile +misc/syntax-ts/Makefile doc/Makefile diff --git a/doc/Makefile.am b/doc/Makefile.am index dcf6f84740..de69ac6b54 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -1,4 +1,4 @@ DIST_SUBDIRS = hints man hlp SUBDIRS = hints man hlp -EXTRA_DIST = FAQ HACKING INSTALL +EXTRA_DIST = FAQ HACKING INSTALL TREE-SITTER diff --git a/doc/TREE-SITTER b/doc/TREE-SITTER new file mode 100644 index 0000000000..475edcda07 --- /dev/null +++ b/doc/TREE-SITTER @@ -0,0 +1,745 @@ +Tree-sitter syntax highlighting for the internal editor +------------------------------------------------------- + +Contents +-------- + +* Introduction +* Building with tree-sitter support +* Downloading grammar sources +* How it works +* Language injection +* File layout +* Grammar configuration file (grammars) +* Highlight query files (queries/*.scm) +* Color mapping +* Adding a new language +* Removing or disabling a language +* Updating a grammar +* Validating query files +* Troubleshooting +* Limitations + + +Introduction +------------ + +The internal editor (mcedit) supports an alternative syntax highlighting +backend based on tree-sitter (https://tree-sitter.github.io/). When +enabled at build time, tree-sitter provides AST-based highlighting that +is more accurate than the legacy regex-based system. + +The tree-sitter backend is enabled with the --with-tree-sitter configure +flag. When a file is opened in the editor, the system first attempts to +match it against a tree-sitter grammar. If no grammar is found (or the +query file fails to compile), the editor falls back to the legacy +regex-based highlighting from *.syntax files. + +The two highlighting systems share the same rendering path: both produce +a color pair integer per byte, which is consumed by editdraw.c. + +61 languages are supported via tree-sitter grammars, with language +injection for HTML (JavaScript + CSS) and markdown (inline elements + +fenced code blocks in any supported language). + + +Building with tree-sitter support +--------------------------------- + +Requirements: + +- libtree-sitter >= 0.22 (development headers and shared library) + - https://github.com/tree-sitter/tree-sitter +- gmodule-2.0 (for shared mode, part of GLib) +- A C++ compiler (needed for the sql grammar which has a C++ scanner) +- curl (needed to download grammar sources, unless using a local copy) + +There are two build modes for grammars: shared (default) and static. + +Shared mode (default) -- each grammar is built as a separate .so module +and loaded at runtime via g_module_open(). The mc binary stays small +(~5 MB) regardless of how many grammars are available. Distros can +package each grammar separately: + + ./configure --with-tree-sitter + make -j$(nproc) + +Static mode -- all selected grammars are linked directly into the mc +binary. No runtime module loading, but the binary can grow large: + + ./configure --with-tree-sitter --with-tree-sitter-static + make -j$(nproc) + +To build with only specific grammars (works in both modes): + + ./configure --with-tree-sitter --with-tree-sitter-grammars=c,python,bash + make -j$(nproc) + +Grammar sources are automatically downloaded during configure if they +are not already present. For offline builds, set TREE_SITTER_GRAMMARS_DIR +to a directory containing pre-downloaded grammar sources: + + TREE_SITTER_GRAMMARS_DIR=/path/to/grammars ./configure --with-tree-sitter + +The --with-tree-sitter-grammars option accepts a comma-separated list +of grammar names. The default is 'all' (62 grammars). Invalid names +cause configure to abort with an error listing all valid grammars. + +Binary size comparison (approximate): +- Without tree-sitter: ~5 MB +- Shared mode (any grammar count): ~5 MB (grammars in separate .so) +- Static mode, 3 grammars: ~8 MB +- Static mode, all 62 grammars: ~107 MB + +To build with the legacy highlighting only (default): + + ./configure + make -j$(nproc) + +Both build modes are always supported. The --with-tree-sitter flag +controls which backend is compiled. When --with-tree-sitter is not +used, no tree-sitter headers or libraries are required and the +tree-sitter code is excluded via #ifdef HAVE_TREE_SITTER guards. + + +Downloading grammar sources +---------------------------- + +Grammar source files (parser.c, scanner.c, etc.) are not stored in the +git repository. Configure automatically downloads missing grammar +sources when --with-tree-sitter is used. Only grammars selected at +configure time are downloaded (all by default, or the subset specified +via --with-tree-sitter-grammars). + +For offline builds, set TREE_SITTER_GRAMMARS_DIR before running +configure: + + TREE_SITTER_GRAMMARS_DIR=/path/to/grammars ./configure --with-tree-sitter + +To manually re-download grammar sources (e.g. after updating a version +in download-grammars.sh): + + make -C src/editor/ts-grammars download-grammars + +The directory must contain subdirectories named after each grammar (e.g. +c/, python/, bash/) with at least parser.c in each. + +The download script (download-grammars.sh) pins all grammar versions to +specific commit SHAs for reproducible builds. It also handles: +- Grammars with non-standard repository layouts (monorepos) +- GitLab-hosted grammars (bison) +- Extra header files needed by some scanners +- Post-download steps (perl/bsearch.h stub, include path fixes) + +Grammar source files MUST NOT be modified after download. All +compatibility is handled via the headers in tree_sitter/ and build +flags in Makefile.in. + + +How it works +------------ + +The integration point is edit_get_syntax_color() in src/editor/syntax.c. +The renderer in editdraw.c calls this function for each byte position to +get a color pair. When tree-sitter is active the following happens: + +1. Initialization (edit_load_syntax / ts_init_for_file): + - The grammars config file is read to find a grammar matching the + current filename (by regex) or first line (shebang). + - The grammar is looked up via ts_grammar_registry_lookup(): + * Shared mode: g_module_open() loads the grammar's .so module on + demand from the grammar module directory. + * Static mode: a compile-time registry maps names to functions. + - A TSParser is created with a 3-second parse timeout as safety net. + - The corresponding highlight query file (-highlights.scm) is + loaded and compiled with ts_query_new(). + - If injection is configured for this grammar, injection parsers + and queries are initialized (multiple per grammar supported). + - If any step fails, ts_active is set to FALSE and the legacy system + takes over transparently. + +2. Parsing: + - The TSParser uses a TSInput callback (ts_input_read) that reads + directly from MC's edit buffer (edit_buffer_t). No copy of the + file contents is made. + - The initial parse produces a TSTree. + +3. Incremental re-parsing: + - When the buffer is modified (insert, delete, backspace), the + function edit_syntax_ts_notify_edit() is called from edit.c. + - It calls ts_tree_edit() with the byte range of the change. The + actual re-parse is deferred to the next highlight cache rebuild, + avoiding redundant parses during bulk operations. + +4. Highlight cache (ts_rebuild_highlight_cache): + - A TSQueryCursor is restricted to a byte range around the viewport + (viewport +/- 8 KB). + - Query matches produce (start_byte, end_byte, color) entries stored + in a GArray cache (ts_highlights). + - If injection is active, injection ranges are collected from the + primary tree and injection queries are run on those ranges. + - For dynamic injection, fenced code blocks are detected, the + language name is read from the buffer, and the content is parsed + with the matching grammar (with per-language caching). + - The cache is rebuilt when the viewport scrolls outside the cached + range or the tree needs re-parsing. + +5. Color lookup (ts_get_color_at): + - For each byte, a linear scan in the highlight cache returns the + matching color (last match wins). + - When multiple captures overlap (e.g. a keyword inside a function + call), the last/innermost match wins because tree-sitter returns + parent nodes before child nodes. + + +Language injection +------------------ + +Language injection allows one grammar to delegate parsing of specific +AST nodes to another grammar. This is configured via "inject" lines +in the grammars config file: + + inject ,,... + +For example, markdown inline elements are parsed by a separate grammar: + + inject markdown markdown_inline inline,pipe_table_cell + +Multiple injections per parent grammar are supported. HTML uses two +injections for JavaScript and CSS: + + inject html javascript script_element/raw_text + inject html css style_element/raw_text + +Node types support a "parent/child" syntax for targeting child nodes +inside specific parent nodes. For example, "script_element/raw_text" +matches raw_text nodes that are children of script_element nodes. +This is needed when different parent elements contain content for +different languages (e.g. \n" + "\n"; + const TSLanguage *html_lang; + TSParser *html_parser; + TSTree *html_tree; + TSNode root; + uint32_t child_count, i; + + html_lang = ts_grammar_registry_lookup ("html"); + ck_assert_msg (html_lang != NULL, "html grammar must exist"); + + html_parser = ts_parser_new (); + ts_parser_set_language (html_parser, html_lang); + html_tree = + ts_parser_parse_string (html_parser, NULL, test_html, (uint32_t) strlen (test_html)); + ck_assert_msg (html_tree != NULL, "HTML parse must succeed"); + + // Verify that script_element and style_element nodes exist in the tree + { + gboolean found_script = FALSE; + gboolean found_style = FALSE; + GArray *stack = g_array_new (FALSE, FALSE, sizeof (TSNode)); + + root = ts_tree_root_node (html_tree); + g_array_append_val (stack, root); + + while (stack->len > 0) + { + TSNode node = g_array_index (stack, TSNode, stack->len - 1); + const char *type; + + g_array_set_size (stack, stack->len - 1); + type = ts_node_type (node); + + if (strcmp (type, "script_element") == 0) + found_script = TRUE; + if (strcmp (type, "style_element") == 0) + found_style = TRUE; + + child_count = ts_node_child_count (node); + for (i = 0; i < child_count; i++) + { + TSNode child = ts_node_child (node, i); + g_array_append_val (stack, child); + } + } + + g_array_free (stack, TRUE); + + ck_assert_msg (found_script, "HTML tree must contain script_element"); + ck_assert_msg (found_style, "HTML tree must contain style_element"); + } + + // Parse the raw_text inside script_element with the JavaScript grammar + { + const TSLanguage *js_lang; + TSParser *js_parser; + char query_path[1024]; + char *query_src = NULL; + gsize query_len = 0; + uint32_t error_offset = 0; + TSQueryError error_type = TSQueryErrorNone; + TSQuery *query; + + js_lang = ts_grammar_registry_lookup ("javascript"); + ck_assert_msg (js_lang != NULL, "javascript grammar must exist"); + + // Find raw_text inside script_element + { + GArray *stack = g_array_new (FALSE, FALSE, sizeof (TSNode)); + + root = ts_tree_root_node (html_tree); + g_array_append_val (stack, root); + + while (stack->len > 0) + { + TSNode node = g_array_index (stack, TSNode, stack->len - 1); + const char *type; + + g_array_set_size (stack, stack->len - 1); + type = ts_node_type (node); + + if (strcmp (type, "script_element") == 0) + { + // Find raw_text child and parse it with JS + uint32_t cc = ts_node_child_count (node); + uint32_t ci; + + for (ci = 0; ci < cc; ci++) + { + TSNode child = ts_node_child (node, ci); + + if (strcmp (ts_node_type (child), "raw_text") == 0) + { + TSRange r; + TSTree *js_tree; + TSQueryCursor *cursor; + TSQueryMatch match; + int js_captures = 0; + + r.start_point = ts_node_start_point (child); + r.end_point = ts_node_end_point (child); + r.start_byte = ts_node_start_byte (child); + r.end_byte = ts_node_end_byte (child); + + js_parser = ts_parser_new (); + ts_parser_set_language (js_parser, js_lang); + ts_parser_set_included_ranges (js_parser, &r, 1); + + js_tree = ts_parser_parse_string (js_parser, NULL, test_html, + (uint32_t) strlen (test_html)); + ck_assert_msg (js_tree != NULL, "JS injection parse must succeed"); + + snprintf (query_path, sizeof (query_path), + "%s/javascript-highlights.scm", TEST_TS_QUERIES_DIR); + ck_assert_msg ( + g_file_get_contents (query_path, &query_src, &query_len, NULL), + "javascript-highlights.scm must be readable"); + + query = ts_query_new (js_lang, query_src, (uint32_t) query_len, + &error_offset, &error_type); + ck_assert_msg (query != NULL, "JS query must compile"); + + cursor = ts_query_cursor_new (); + ts_query_cursor_exec (cursor, query, ts_tree_root_node (js_tree)); + + while (ts_query_cursor_next_match (cursor, &match)) + { + uint16_t mi; + + for (mi = 0; mi < match.capture_count; mi++) + js_captures++; + } + + ck_assert_msg (js_captures >= 1, + "Expected JS captures from script content, got %d", + js_captures); + + ts_query_cursor_delete (cursor); + ts_query_delete (query); + g_free (query_src); + ts_tree_delete (js_tree); + ts_parser_delete (js_parser); + } + } + break; + } + + child_count = ts_node_child_count (node); + for (i = 0; i < child_count; i++) + { + TSNode child = ts_node_child (node, i); + g_array_append_val (stack, child); + } + } + + g_array_free (stack, TRUE); + } + } + + ts_tree_delete (html_tree); + ts_parser_delete (html_parser); +} +END_TEST +#endif + +/* --------------------------------------------------------------------------------------------- */ + +int +main (void) +{ + TCase *tc_core; + + tc_core = tcase_create ("Core"); + + tcase_add_checked_fixture (tc_core, setup, teardown); + + // Add new tests here: *************** + if (sizeof (test_registry_lookup_found_ds) > 0) + mctest_add_parameterized_test (tc_core, test_registry_lookup_found, + test_registry_lookup_found_ds); + tcase_add_test (tc_core, test_registry_lookup_not_found); + tcase_add_test (tc_core, test_all_query_files_compile); +#ifdef HAVE_GRAMMAR_C + tcase_add_test (tc_core, test_parser_basic_parse); + tcase_add_test (tc_core, test_query_captures_c); +#endif +#if defined(HAVE_GRAMMAR_MARKDOWN) && defined(HAVE_GRAMMAR_MARKDOWN_INLINE) + tcase_add_test (tc_core, test_markdown_inline_injection); +#endif +#if defined(HAVE_GRAMMAR_HTML) && defined(HAVE_GRAMMAR_JAVASCRIPT) && defined(HAVE_GRAMMAR_CSS) + tcase_add_test (tc_core, test_html_multi_injection); +#endif + // *********************************** + + return mctest_run_all (tc_core); +} + +/* --------------------------------------------------------------------------------------------- */ From 7bec7d39d2a06b7835cdb562dce3977c9db5e4a9 Mon Sep 17 00:00:00 2001 From: Jiri Tyr Date: Sat, 14 Mar 2026 22:02:23 +0000 Subject: [PATCH 02/16] Adding missing Makefile.in and fixing installation Signed-off-by: Jiri Tyr --- configure.ac | 4 +- src/editor/Makefile.am | 2 +- src/editor/ts-grammars/Makefile.in | 169 +++++++++++++++++++++++++++++ 3 files changed, 173 insertions(+), 2 deletions(-) create mode 100644 src/editor/ts-grammars/Makefile.in diff --git a/configure.ac b/configure.ac index 6fd089cf85..2c6f9b1dfa 100644 --- a/configure.ac +++ b/configure.ac @@ -576,7 +576,9 @@ if test x"$with_tree_sitter" = xyes; then [AC_MSG_ERROR([gmodule-2.0 required for shared tree-sitter grammars (use --with-tree-sitter-static to avoid)])]) TREE_SITTER_BUILD_TARGET="shared-modules" TREE_SITTER_BUILD_MODE="shared" - TREE_SITTER_GRAMMAR_LIBDIR="${libdir}/mc/ts-grammars" + dnl Resolve libdir for use in Makefile.in (which doesn't have automake's variable chain) + eval "ts_libdir=\"$libdir\"" + TREE_SITTER_GRAMMAR_LIBDIR="${ts_libdir}/mc/ts-grammars" fi AC_SUBST([TREE_SITTER_BUILD_TARGET]) AC_SUBST([TREE_SITTER_BUILD_MODE]) diff --git a/src/editor/Makefile.am b/src/editor/Makefile.am index e98e97f180..2df77a6c17 100644 --- a/src/editor/Makefile.am +++ b/src/editor/Makefile.am @@ -1,4 +1,4 @@ -EXTRA_DIST = +EXTRA_DIST = ts-grammar-loader.h if USE_TREE_SITTER SUBDIRS = ts-grammars diff --git a/src/editor/ts-grammars/Makefile.in b/src/editor/ts-grammars/Makefile.in new file mode 100644 index 0000000000..d11457605d --- /dev/null +++ b/src/editor/ts-grammars/Makefile.in @@ -0,0 +1,169 @@ +# Makefile.in for tree-sitter grammar sources. +# This is a hand-written Makefile.in (not generated by automake) because +# automake does not support configure substitutions in _SOURCES variables, +# and creates .deps directories for all EXTRA_SOURCES entries regardless +# of which grammars are selected. +# +# Supports two build modes (set by configure): +# static: all grammars linked into libtsgrammars.la (linked into mc binary) +# shared: each grammar built as a separate .so module (loaded at runtime) + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +top_builddir = @top_builddir@ +VPATH = @srcdir@ +INSTALL = @INSTALL@ + +CC = @CC@ +CXX = @CXX@ +CFLAGS = @CFLAGS@ +CXXFLAGS = @CXXFLAGS@ +CPPFLAGS = @CPPFLAGS@ +LDFLAGS = @LDFLAGS@ +SHELL = @SHELL@ +LIBTOOL = @LIBTOOL@ + +TREE_SITTER_CFLAGS = @TREE_SITTER_CFLAGS@ +TREE_SITTER_LIBS = @TREE_SITTER_LIBS@ + +AM_CPPFLAGS = -I$(top_srcdir) -I$(srcdir) $(TREE_SITTER_CFLAGS) + +# Suppress warnings in downloaded grammar code +AM_CFLAGS = -w + +# Grammar sources and objects (set by configure) +TREE_SITTER_GRAMMAR_SOURCES = @TREE_SITTER_GRAMMAR_SOURCES@ +TREE_SITTER_GRAMMAR_OBJECTS = @TREE_SITTER_GRAMMAR_OBJECTS@ + +# Selected grammar directories (set by configure) +TS_GRAMMAR_DIRS = @TREE_SITTER_GRAMMARS@ + +# Build mode: "static" or "shared" (set by configure) +BUILD_TARGET = @TREE_SITTER_BUILD_TARGET@ + +# Shared module list (set by configure, e.g. "mc-ts-c.la mc-ts-python.la") +SHARED_LIBS = @TREE_SITTER_SHARED_LIBS@ + +# Install directory for shared grammar modules +grammardir = @TREE_SITTER_GRAMMAR_LIBDIR@ + +OBJECTS = $(TREE_SITTER_GRAMMAR_OBJECTS) + +all: $(BUILD_TARGET) + +# ============================================================================ +# Static mode: single convenience library +# ============================================================================ + +libtsgrammars.la: $(OBJECTS) + $(LIBTOOL) --tag=CXX --mode=link $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) + +# ============================================================================ +# Shared mode: one loadable module per grammar +# ============================================================================ + +shared-modules: $(OBJECTS) + @for lang in $(TS_GRAMMAR_DIRS); do \ + objs="$$lang/parser.lo"; \ + linker="$(CC)"; tag="CC"; \ + if test -f "$$lang/scanner.lo"; then \ + objs="$$objs $$lang/scanner.lo"; \ + fi; \ + if test -f "$(srcdir)/$$lang/scanner.cc"; then \ + linker="$(CXX)"; tag="CXX"; \ + fi; \ + echo " LINK $$lang.so"; \ + $(LIBTOOL) --tag=$$tag --mode=link $$linker $(LDFLAGS) \ + -module -avoid-version -rpath $(grammardir) \ + $(TREE_SITTER_LIBS) -o mc-ts-$$lang.la $$objs || exit 1; \ + done + +# ============================================================================ +# Pattern rules for compiling grammar sources +# ============================================================================ + +%.lo: %.c + @dir=`dirname $@`; test -d "$$dir" || mkdir -p "$$dir" + $(LIBTOOL) --tag=CC --mode=compile $(CC) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o $@ $< + +%.lo: %.cc + @dir=`dirname $@`; test -d "$$dir" || mkdir -p "$$dir" + $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CXXFLAGS) -c -o $@ $< + +.PHONY: all clean distclean install install-static install-shared download-grammars shared-modules distdir + +# ============================================================================ +# Install +# ============================================================================ + +install: install-@TREE_SITTER_BUILD_MODE@ + +install-static: + @true + +install-shared: + $(INSTALL) -d $(DESTDIR)$(grammardir) + @for lang in $(TS_GRAMMAR_DIRS); do \ + $(INSTALL) -m 755 .libs/mc-ts-$$lang.so $(DESTDIR)$(grammardir)/mc-ts-$$lang.so; \ + done + +# ============================================================================ +# Grammar download target +# ============================================================================ + +download-grammars: + @if test -n "$(TREE_SITTER_GRAMMARS_DIR)"; then \ + echo "Copying grammar sources from $(TREE_SITTER_GRAMMARS_DIR)..."; \ + for lang in $(TS_GRAMMAR_DIRS); do \ + if test -d "$(TREE_SITTER_GRAMMARS_DIR)/$$lang"; then \ + mkdir -p "$(srcdir)/$$lang"; \ + cp -a "$(TREE_SITTER_GRAMMARS_DIR)/$$lang"/* "$(srcdir)/$$lang/"; \ + else \ + echo "WARNING: $$lang not found in $(TREE_SITTER_GRAMMARS_DIR)"; \ + fi; \ + done; \ + echo "Done."; \ + else \ + $(SHELL) "$(srcdir)/download-grammars.sh" $(TS_GRAMMAR_DIRS); \ + fi + +# Catch missing grammar sources and print a helpful error message. +%/parser.c: + @echo ""; \ + echo "ERROR: Tree-sitter grammar source not found: $@"; \ + echo ""; \ + echo "Please run: ./configure --with-tree-sitter"; \ + echo "(configure auto-downloads missing grammar sources)"; \ + echo ""; \ + echo "Or manually: make -C src/editor/ts-grammars download-grammars"; \ + echo ""; \ + exit 1 + +# ============================================================================ +# Clean +# ============================================================================ + +clean: + @for lang in $(TS_GRAMMAR_DIRS); do \ + rm -f "$$lang"/*.lo "$$lang"/*.o; \ + rm -rf "$$lang/.libs"; \ + done + rm -f libtsgrammars.la + rm -f mc-ts-*.la + rm -rf .libs + +# Copy distributable files into the dist directory. +# Grammar sources are NOT included — they are downloaded at build time. +distdir: + @test -d "$(distdir)" || mkdir -p "$(distdir)" + cp $(srcdir)/Makefile.in $(distdir)/ + cp $(srcdir)/download-grammars.sh $(distdir)/ + cp $(srcdir)/ts-grammar-registry.h $(distdir)/ + cp $(srcdir)/.gitignore $(distdir)/ + cp -r $(srcdir)/tree_sitter $(distdir)/ + +distclean: clean + @for d in "$(srcdir)"/*/; do \ + case "$$d" in */tree_sitter/) ;; *) rm -rf "$$d" ;; esac; \ + done + rm -f Makefile From ecb282793f24d4018b49e1b312124068fe23e070 Mon Sep 17 00:00:00 2001 From: Jiri Tyr Date: Sun, 15 Mar 2026 11:44:52 +0000 Subject: [PATCH 03/16] Auto-generate Makefile Signed-off-by: Jiri Tyr --- po/be.po | 6 +- po/br.po | 8 +- po/ca.po | 4 +- po/hr.po | 4 +- src/editor/ts-grammars/Makefile.am | 143 ++++++++++++++++++++++++ src/editor/ts-grammars/Makefile.in | 169 ----------------------------- 6 files changed, 154 insertions(+), 180 deletions(-) create mode 100644 src/editor/ts-grammars/Makefile.am delete mode 100644 src/editor/ts-grammars/Makefile.in diff --git a/po/be.po b/po/be.po index 6a0117f8c1..61eb9d1524 100644 --- a/po/be.po +++ b/po/be.po @@ -29,9 +29,9 @@ msgstr "" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=4; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && " -"n%10<=4 && (n%100<12 || n%100>14) ? 1 : n%10==0 || (n%10>=5 && n%10<=9) || " -"(n%100>=11 && n%100<=14)? 2 : 3);\n" +"Plural-Forms: nplurals=4; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n" +"%10<=4 && (n%100<12 || n%100>14) ? 1 : n%10==0 || (n%10>=5 && n%10<=9) || (n" +"%100>=11 && n%100<=14)? 2 : 3);\n" # "Plural-Forms: nplurals=INTEGER; plural=EXPRESSION;\n" msgid "No translation" diff --git a/po/br.po b/po/br.po index 6cc73e2751..3498e0bdf0 100644 --- a/po/br.po +++ b/po/br.po @@ -17,10 +17,10 @@ msgstr "" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "Plural-Forms: nplurals=5; plural=((n%10 == 1) && (n%100 != 11) && (n%100 !" -"=71) && (n%100 !=91) ? 0 :(n%10 == 2) && (n%100 != 12) && (n%100 !=72) && " -"(n%100 !=92) ? 1 :(n%10 ==3 || n%10==4 || n%10==9) && (n%100 < 10 || n% 100 " -"> 19) && (n%100 < 70 || n%100 > 79) && (n%100 < 90 || n%100 > 99) ? 2 :(n != " -"0 && n % 1000000 == 0) ? 3 : 4);\n" +"=71) && (n%100 !=91) ? 0 :(n%10 == 2) && (n%100 != 12) && (n%100 !=72) && (n" +"%100 !=92) ? 1 :(n%10 ==3 || n%10==4 || n%10==9) && (n%100 < 10 || n% 100 > " +"19) && (n%100 < 70 || n%100 > 79) && (n%100 < 90 || n%100 > 99) ? 2 :(n != 0 " +"&& n % 1000000 == 0) ? 3 : 4);\n" # "Plural-Forms: nplurals=INTEGER; plural=EXPRESSION;\n" msgid "No translation" diff --git a/po/ca.po b/po/ca.po index 18fc752875..42f2e8a0d3 100644 --- a/po/ca.po +++ b/po/ca.po @@ -762,8 +762,8 @@ msgid "" "as tickets at %s\n" msgstr "" "\n" -"Si us plau, envieu qualsevol informe d'error (incloent-hi la sortida de «%s " -"-V»)\n" +"Si us plau, envieu qualsevol informe d'error (incloent-hi la sortida de «%s -" +"V»)\n" "com a entrades a %s\n" msgid "Main options" diff --git a/po/hr.po b/po/hr.po index 92c0f17c86..6f56f6c34d 100644 --- a/po/hr.po +++ b/po/hr.po @@ -15,8 +15,8 @@ msgstr "" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=3; plural=n%10==1 && n%100!=11 ? 0 : n%10>=2 && " -"n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2;\n" +"Plural-Forms: nplurals=3; plural=n%10==1 && n%100!=11 ? 0 : n%10>=2 && n" +"%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2;\n" # "Plural-Forms: nplurals=INTEGER; plural=EXPRESSION;\n" msgid "No translation" diff --git a/src/editor/ts-grammars/Makefile.am b/src/editor/ts-grammars/Makefile.am new file mode 100644 index 0000000000..1ab97fd079 --- /dev/null +++ b/src/editor/ts-grammars/Makefile.am @@ -0,0 +1,143 @@ +# Makefile.am for tree-sitter grammar sources. +# Compiles selected grammar sources into a static convenience library (static +# mode) or per-grammar shared modules (shared mode). +# +# We use all-local/clean-local/install hooks instead of automake's _LTLIBRARIES +# because the source list is determined at configure time and automake does not +# support configure substitutions in _SOURCES variables. + +EXTRA_DIST = \ + download-grammars.sh \ + ts-grammar-registry.h \ + tree_sitter/parser.h \ + tree_sitter/alloc.h \ + tree_sitter/array.h \ + tree_sitter/ts_assert.h + +# Variables substituted by configure +TREE_SITTER_GRAMMAR_SOURCES = @TREE_SITTER_GRAMMAR_SOURCES@ +TREE_SITTER_GRAMMAR_OBJECTS = @TREE_SITTER_GRAMMAR_OBJECTS@ +TREE_SITTER_GRAMMARS = @TREE_SITTER_GRAMMARS@ +TREE_SITTER_SHARED_LIBS = @TREE_SITTER_SHARED_LIBS@ +TREE_SITTER_BUILD_TARGET = @TREE_SITTER_BUILD_TARGET@ +TREE_SITTER_BUILD_MODE = @TREE_SITTER_BUILD_MODE@ +TREE_SITTER_GRAMMAR_LIBDIR = @TREE_SITTER_GRAMMAR_LIBDIR@ + +TS_GRAMMAR_DIRS = $(TREE_SITTER_GRAMMARS) +OBJECTS = $(TREE_SITTER_GRAMMAR_OBJECTS) + +AM_CPPFLAGS = -I$(top_srcdir) -I$(srcdir) $(TREE_SITTER_CFLAGS) +AM_CFLAGS = -w + +# ============================================================================ +# Build +# ============================================================================ + +all-local: $(TREE_SITTER_BUILD_TARGET) + +# Static mode: single convenience library +libtsgrammars.la: check-grammar-sources $(OBJECTS) + $(LIBTOOL) --tag=CXX --mode=link $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) + +# Shared mode: one loadable module per grammar +shared-modules: check-grammar-sources $(OBJECTS) + @for lang in $(TS_GRAMMAR_DIRS); do \ + objs="$$lang/parser.lo"; \ + linker="$(CC)"; tag="CC"; \ + if test -f "$$lang/scanner.lo"; then \ + objs="$$objs $$lang/scanner.lo"; \ + fi; \ + if test -f "$(srcdir)/$$lang/scanner.cc"; then \ + linker="$(CXX)"; tag="CXX"; \ + fi; \ + echo " LINK $$lang.so"; \ + $(LIBTOOL) --tag=$$tag --mode=link $$linker $(LDFLAGS) \ + -module -avoid-version -rpath $(TREE_SITTER_GRAMMAR_LIBDIR) \ + $(TREE_SITTER_LIBS) -o mc-ts-$$lang.la $$objs || exit 1; \ + done + +# Pattern rules for compiling grammar sources +SUFFIXES = .c .cc .lo + +.c.lo: + @dir=`dirname $@`; test -d "$$dir" || mkdir -p "$$dir" + $(LIBTOOL) --tag=CC --mode=compile $(CC) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o $@ $< + +.cc.lo: + @dir=`dirname $@`; test -d "$$dir" || mkdir -p "$$dir" + $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CXXFLAGS) -c -o $@ $< + +# ============================================================================ +# Install +# ============================================================================ + +install-exec-local: install-exec-@TREE_SITTER_BUILD_MODE@ + +install-exec-static: + @true + +install-exec-shared: + $(INSTALL) -d $(DESTDIR)$(TREE_SITTER_GRAMMAR_LIBDIR) + @for lang in $(TS_GRAMMAR_DIRS); do \ + $(INSTALL) -m 755 .libs/mc-ts-$$lang.so $(DESTDIR)$(TREE_SITTER_GRAMMAR_LIBDIR)/mc-ts-$$lang.so; \ + done + +# ============================================================================ +# Clean +# ============================================================================ + +clean-local: + @for lang in $(TS_GRAMMAR_DIRS); do \ + rm -f "$$lang"/*.lo "$$lang"/*.o; \ + rm -rf "$$lang/.libs"; \ + done + rm -f libtsgrammars.la + rm -f mc-ts-*.la + rm -rf .libs + +distclean-local: + @for d in "$(srcdir)"/*/; do \ + case "$$d" in */tree_sitter/) ;; *) rm -rf "$$d" ;; esac; \ + done + +# ============================================================================ +# Grammar download +# ============================================================================ + +.PHONY: download-grammars shared-modules check-grammar-sources + +download-grammars: + @if test -n "$(TREE_SITTER_GRAMMARS_DIR)"; then \ + echo "Copying grammar sources from $(TREE_SITTER_GRAMMARS_DIR)..."; \ + for lang in $(TS_GRAMMAR_DIRS); do \ + if test -d "$(TREE_SITTER_GRAMMARS_DIR)/$$lang"; then \ + mkdir -p "$(srcdir)/$$lang"; \ + cp -a "$(TREE_SITTER_GRAMMARS_DIR)/$$lang"/* "$(srcdir)/$$lang/"; \ + else \ + echo "WARNING: $$lang not found in $(TREE_SITTER_GRAMMARS_DIR)"; \ + fi; \ + done; \ + echo "Done."; \ + else \ + $(SHELL) "$(srcdir)/download-grammars.sh" $(TS_GRAMMAR_DIRS); \ + fi + +# Check that grammar sources exist before compiling. +check-grammar-sources: + @missing=""; \ + for lang in $(TS_GRAMMAR_DIRS); do \ + if test ! -f "$(srcdir)/$$lang/parser.c"; then \ + missing="$$missing $$lang"; \ + fi; \ + done; \ + if test -n "$$missing"; then \ + echo ""; \ + echo "ERROR: Tree-sitter grammar sources not found for:$$missing"; \ + echo ""; \ + echo "Please run: ./configure --with-tree-sitter"; \ + echo "(configure auto-downloads missing grammar sources)"; \ + echo ""; \ + echo "Or manually: make -C src/editor/ts-grammars download-grammars"; \ + echo ""; \ + exit 1; \ + fi diff --git a/src/editor/ts-grammars/Makefile.in b/src/editor/ts-grammars/Makefile.in deleted file mode 100644 index d11457605d..0000000000 --- a/src/editor/ts-grammars/Makefile.in +++ /dev/null @@ -1,169 +0,0 @@ -# Makefile.in for tree-sitter grammar sources. -# This is a hand-written Makefile.in (not generated by automake) because -# automake does not support configure substitutions in _SOURCES variables, -# and creates .deps directories for all EXTRA_SOURCES entries regardless -# of which grammars are selected. -# -# Supports two build modes (set by configure): -# static: all grammars linked into libtsgrammars.la (linked into mc binary) -# shared: each grammar built as a separate .so module (loaded at runtime) - -srcdir = @srcdir@ -top_srcdir = @top_srcdir@ -top_builddir = @top_builddir@ -VPATH = @srcdir@ -INSTALL = @INSTALL@ - -CC = @CC@ -CXX = @CXX@ -CFLAGS = @CFLAGS@ -CXXFLAGS = @CXXFLAGS@ -CPPFLAGS = @CPPFLAGS@ -LDFLAGS = @LDFLAGS@ -SHELL = @SHELL@ -LIBTOOL = @LIBTOOL@ - -TREE_SITTER_CFLAGS = @TREE_SITTER_CFLAGS@ -TREE_SITTER_LIBS = @TREE_SITTER_LIBS@ - -AM_CPPFLAGS = -I$(top_srcdir) -I$(srcdir) $(TREE_SITTER_CFLAGS) - -# Suppress warnings in downloaded grammar code -AM_CFLAGS = -w - -# Grammar sources and objects (set by configure) -TREE_SITTER_GRAMMAR_SOURCES = @TREE_SITTER_GRAMMAR_SOURCES@ -TREE_SITTER_GRAMMAR_OBJECTS = @TREE_SITTER_GRAMMAR_OBJECTS@ - -# Selected grammar directories (set by configure) -TS_GRAMMAR_DIRS = @TREE_SITTER_GRAMMARS@ - -# Build mode: "static" or "shared" (set by configure) -BUILD_TARGET = @TREE_SITTER_BUILD_TARGET@ - -# Shared module list (set by configure, e.g. "mc-ts-c.la mc-ts-python.la") -SHARED_LIBS = @TREE_SITTER_SHARED_LIBS@ - -# Install directory for shared grammar modules -grammardir = @TREE_SITTER_GRAMMAR_LIBDIR@ - -OBJECTS = $(TREE_SITTER_GRAMMAR_OBJECTS) - -all: $(BUILD_TARGET) - -# ============================================================================ -# Static mode: single convenience library -# ============================================================================ - -libtsgrammars.la: $(OBJECTS) - $(LIBTOOL) --tag=CXX --mode=link $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) - -# ============================================================================ -# Shared mode: one loadable module per grammar -# ============================================================================ - -shared-modules: $(OBJECTS) - @for lang in $(TS_GRAMMAR_DIRS); do \ - objs="$$lang/parser.lo"; \ - linker="$(CC)"; tag="CC"; \ - if test -f "$$lang/scanner.lo"; then \ - objs="$$objs $$lang/scanner.lo"; \ - fi; \ - if test -f "$(srcdir)/$$lang/scanner.cc"; then \ - linker="$(CXX)"; tag="CXX"; \ - fi; \ - echo " LINK $$lang.so"; \ - $(LIBTOOL) --tag=$$tag --mode=link $$linker $(LDFLAGS) \ - -module -avoid-version -rpath $(grammardir) \ - $(TREE_SITTER_LIBS) -o mc-ts-$$lang.la $$objs || exit 1; \ - done - -# ============================================================================ -# Pattern rules for compiling grammar sources -# ============================================================================ - -%.lo: %.c - @dir=`dirname $@`; test -d "$$dir" || mkdir -p "$$dir" - $(LIBTOOL) --tag=CC --mode=compile $(CC) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o $@ $< - -%.lo: %.cc - @dir=`dirname $@`; test -d "$$dir" || mkdir -p "$$dir" - $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CXXFLAGS) -c -o $@ $< - -.PHONY: all clean distclean install install-static install-shared download-grammars shared-modules distdir - -# ============================================================================ -# Install -# ============================================================================ - -install: install-@TREE_SITTER_BUILD_MODE@ - -install-static: - @true - -install-shared: - $(INSTALL) -d $(DESTDIR)$(grammardir) - @for lang in $(TS_GRAMMAR_DIRS); do \ - $(INSTALL) -m 755 .libs/mc-ts-$$lang.so $(DESTDIR)$(grammardir)/mc-ts-$$lang.so; \ - done - -# ============================================================================ -# Grammar download target -# ============================================================================ - -download-grammars: - @if test -n "$(TREE_SITTER_GRAMMARS_DIR)"; then \ - echo "Copying grammar sources from $(TREE_SITTER_GRAMMARS_DIR)..."; \ - for lang in $(TS_GRAMMAR_DIRS); do \ - if test -d "$(TREE_SITTER_GRAMMARS_DIR)/$$lang"; then \ - mkdir -p "$(srcdir)/$$lang"; \ - cp -a "$(TREE_SITTER_GRAMMARS_DIR)/$$lang"/* "$(srcdir)/$$lang/"; \ - else \ - echo "WARNING: $$lang not found in $(TREE_SITTER_GRAMMARS_DIR)"; \ - fi; \ - done; \ - echo "Done."; \ - else \ - $(SHELL) "$(srcdir)/download-grammars.sh" $(TS_GRAMMAR_DIRS); \ - fi - -# Catch missing grammar sources and print a helpful error message. -%/parser.c: - @echo ""; \ - echo "ERROR: Tree-sitter grammar source not found: $@"; \ - echo ""; \ - echo "Please run: ./configure --with-tree-sitter"; \ - echo "(configure auto-downloads missing grammar sources)"; \ - echo ""; \ - echo "Or manually: make -C src/editor/ts-grammars download-grammars"; \ - echo ""; \ - exit 1 - -# ============================================================================ -# Clean -# ============================================================================ - -clean: - @for lang in $(TS_GRAMMAR_DIRS); do \ - rm -f "$$lang"/*.lo "$$lang"/*.o; \ - rm -rf "$$lang/.libs"; \ - done - rm -f libtsgrammars.la - rm -f mc-ts-*.la - rm -rf .libs - -# Copy distributable files into the dist directory. -# Grammar sources are NOT included — they are downloaded at build time. -distdir: - @test -d "$(distdir)" || mkdir -p "$(distdir)" - cp $(srcdir)/Makefile.in $(distdir)/ - cp $(srcdir)/download-grammars.sh $(distdir)/ - cp $(srcdir)/ts-grammar-registry.h $(distdir)/ - cp $(srcdir)/.gitignore $(distdir)/ - cp -r $(srcdir)/tree_sitter $(distdir)/ - -distclean: clean - @for d in "$(srcdir)"/*/; do \ - case "$$d" in */tree_sitter/) ;; *) rm -rf "$$d" ;; esac; \ - done - rm -f Makefile From f7737c5f2ffd499b89c11476530880b440cc87b3 Mon Sep 17 00:00:00 2001 From: Jiri Tyr Date: Sun, 15 Mar 2026 12:13:08 +0000 Subject: [PATCH 04/16] Separating TS syntax handling Signed-off-by: Jiri Tyr --- src/editor/Makefile.am | 3 +- src/editor/syntax.c | 1367 +------------------------------------- src/editor/syntax_ts.c | 1410 ++++++++++++++++++++++++++++++++++++++++ src/editor/syntax_ts.h | 32 + 4 files changed, 1448 insertions(+), 1364 deletions(-) create mode 100644 src/editor/syntax_ts.c create mode 100644 src/editor/syntax_ts.h diff --git a/src/editor/Makefile.am b/src/editor/Makefile.am index 2df77a6c17..5bf5706d45 100644 --- a/src/editor/Makefile.am +++ b/src/editor/Makefile.am @@ -27,7 +27,8 @@ libedit_la_SOURCES = \ editwidget.c editwidget.h \ etags.c etags.h \ format.c \ - syntax.c + syntax.c \ + syntax_ts.c syntax_ts.h if USE_ASPELL if HAVE_GMODULE diff --git a/src/editor/syntax.c b/src/editor/syntax.c index 970f15c1b6..77ffaa3776 100644 --- a/src/editor/syntax.c +++ b/src/editor/syntax.c @@ -66,15 +66,7 @@ #include "edit-impl.h" #include "editwidget.h" - -#ifdef HAVE_TREE_SITTER -#include -#ifdef TREE_SITTER_STATIC -#include "ts-grammars/ts-grammar-registry.h" -#else -#include "ts-grammar-loader.h" -#endif -#endif +#include "syntax_ts.h" /*** global variables ****************************************************************************/ @@ -158,48 +150,6 @@ typedef struct edit_syntax_rule_t rule; } syntax_marker_t; -#ifdef HAVE_TREE_SITTER -// tree-sitter highlight cache entry: a range with an associated color -typedef struct -{ - uint32_t start_byte; - uint32_t end_byte; - int color; -} ts_highlight_entry_t; - -// Cached parser+query for a dynamically-injected language -typedef struct -{ - void *parser; // TSParser* - void *query; // TSQuery* -} ts_dynamic_lang_t; - -// A single injection context: parser + query + node types to inject into -typedef struct -{ - gboolean dynamic; // FALSE = static injection, TRUE = dynamic (per-block language) - - // Static injection fields (dynamic == FALSE): - void *parser; // TSParser* for the injected language - void *query; // TSQuery* for the injected language - char **node_types; // NULL-terminated list of parent node types to inject into - - // Dynamic injection fields (dynamic == TRUE): - char *block_type; // parent node type (e.g. "fenced_code_block") - char *lang_type; // child node containing language name (e.g. "info_string") - char *content_type; // child node containing code (e.g. "code_fence_content") - GHashTable *lang_cache; // language name -> ts_dynamic_lang_t* -} ts_injection_t; - -// mapping from tree-sitter capture name to MC color -typedef struct -{ - const char *capture_name; - const char *fg; - const char *attrs; -} ts_color_mapping_t; - -#endif /*** forward declarations (file scope functions) *************************************************/ @@ -207,52 +157,6 @@ typedef struct static char *error_file_name = NULL; -#ifdef HAVE_TREE_SITTER -// Default color theme for tree-sitter highlight capture names. -// Colors are chosen to match MC's default syntax highlighting (blue background skin). -static const ts_color_mapping_t ts_default_colors[] = { - { "keyword", "yellow", NULL }, - { "keyword.other", "white", NULL }, // Lua, AWK, Pascal keywords - { "keyword.control", "brightmagenta", NULL }, // PHP keywords - { "keyword.directive", "magenta", NULL }, // Makefile directives - { "function", "brightcyan", NULL }, - { "function.special", "brightred", NULL }, // preprocessor macros - { "function.builtin", "brown", NULL }, // Go builtins - { "function.macro", "brightmagenta", NULL }, // Rust macros - { "type", "yellow", NULL }, - { "type.builtin", "brightgreen", NULL }, // Go builtin types - { "string", "green", NULL }, - { "string.special", "brightgreen", NULL }, // char literals, escape sequences - { "number", "lightgray", NULL }, - { "number.builtin", "brightgreen", NULL }, // JSON/JS/Rust/Go numbers - { "comment", "brown", NULL }, - { "comment.special", "brightgreen", NULL }, // XML comments - { "comment.error", "red", NULL }, // Swift comments - { "constant", "lightgray", NULL }, - { "constant.builtin", "brightmagenta", NULL }, - { "variable", NULL, NULL }, // default color - { "variable.builtin", "brightred", NULL }, // self, $vars - { "variable.special", "brightgreen", NULL }, // shell $() expansions - { "operator", "brightcyan", NULL }, - { "operator.word", "yellow", NULL }, // Python/Ruby word operators - { "delimiter", "brightcyan", NULL }, - { "delimiter.special", "brightmagenta", NULL }, // semicolons - { "property", "brightcyan", NULL }, - { "property.key", "yellow", NULL }, // INI/properties keys - { "label", "cyan", NULL }, - { "tag", "brightcyan", NULL }, // HTML tags - { "tag.special", "white", NULL }, // XML tags - { "markup.bold", "brightmagenta", NULL }, // markdown bold - { "markup.italic", "magenta", NULL }, // markdown italic - { "markup.addition", "brightgreen", NULL }, // diff additions - { "markup.deletion", "brightred", NULL }, // diff deletions - { "markup.heading", "brightmagenta", NULL }, // diff file headers - { NULL, NULL, NULL } -}; - -// Config file name for tree-sitter grammar mappings -#define TS_GRAMMARS_FILE "grammars" -#endif /* --------------------------------------------------------------------------------------------- */ /*** file scope functions ************************************************************************/ @@ -784,7 +688,7 @@ translate_rule_to_color (const WEdit *edit, const edit_syntax_rule_t *rule) In case of an error, *line will not be modified. */ -static size_t +size_t read_one_line (char **line, FILE *f) { GString *p; @@ -931,7 +835,7 @@ get_args (char *l, char **args, int args_size) /* --------------------------------------------------------------------------------------------- */ -static int +int this_try_alloc_color_pair (tty_color_pair_t *color) { char f[80], b[80], a[80], *p; @@ -1515,7 +1419,7 @@ edit_read_syntax_file (WEdit *edit, GPtrArray *pnames, const char *syntax_file, /* --------------------------------------------------------------------------------------------- */ -static const char * +const char * get_first_editor_line (WEdit *edit) { static char s[256]; @@ -1576,1269 +1480,6 @@ exec_edit_syntax_dialog (const GPtrArray *names, const char *current_syntax) return listbox_run (syntaxlist); } -/* --------------------------------------------------------------------------------------------- */ -/* Tree-sitter integration */ -/* --------------------------------------------------------------------------------------------- */ - -#ifdef HAVE_TREE_SITTER - -/** - * TSInput read callback: reads chunks of text from the edit buffer. - */ -static const char * -ts_input_read (void *payload, uint32_t byte_index, TSPoint position, uint32_t *bytes_read) -{ - static char buf[4096]; - WEdit *edit = (WEdit *) payload; - uint32_t i; - - (void) position; - - for (i = 0; i < sizeof (buf) && (off_t) (byte_index + i) < edit->buffer.size; i++) - buf[i] = edit_buffer_get_byte (&edit->buffer, (off_t) (byte_index + i)); - - *bytes_read = i; - return (i > 0) ? buf : NULL; -} - -/* --------------------------------------------------------------------------------------------- */ - -/** - * Allocate an MC color pair for a tree-sitter capture name. - * Looks up the capture name in the default color mapping table. - */ -static int -ts_capture_name_to_color (const char *capture_name) -{ - const ts_color_mapping_t *m; - tty_color_pair_t color; - - for (m = ts_default_colors; m->capture_name != NULL; m++) - { - if (strcmp (m->capture_name, capture_name) == 0) - { - if (m->fg == NULL) - return EDITOR_NORMAL_COLOR; - - color.fg = m->fg; - color.bg = NULL; - color.attrs = m->attrs; - return this_try_alloc_color_pair (&color); - } - } - - return EDITOR_NORMAL_COLOR; -} - -/* --------------------------------------------------------------------------------------------- */ - -/** - * Open the ts-grammars config file. Tries user config dir first, then system share dir. - * Returns a FILE* on success, or NULL. - */ -static FILE * -ts_open_config (void) -{ - char *config_path; - FILE *f; - - config_path = g_build_filename (mc_config_get_data_path (), EDIT_SYNTAX_TS_DIR, TS_GRAMMARS_FILE, - (char *) NULL); - f = fopen (config_path, "r"); - g_free (config_path); - - if (f == NULL) - { - config_path = g_build_filename (mc_global.share_data_dir, EDIT_SYNTAX_TS_DIR, - TS_GRAMMARS_FILE, (char *) NULL); - f = fopen (config_path, "r"); - g_free (config_path); - } - - return f; -} - -/* --------------------------------------------------------------------------------------------- */ - -/** - * Read the ts-grammars config file and find a matching grammar for the filename. - * On match, fills grammar_name and syntax_name (newly allocated strings). - * Returns TRUE if a match was found. - */ -static gboolean -ts_find_grammar (const char *filename, const char *first_line, char **grammar_name, - char **syntax_name) -{ - FILE *f; - char *line = NULL; - - if (filename == NULL) - return FALSE; - - *grammar_name = NULL; - *syntax_name = NULL; - - f = ts_open_config (); - if (f == NULL) - return FALSE; - - while (read_one_line (&line, f) != 0) - { - char *p, *pattern, *name, *display; - - p = line; - - // Skip whitespace - while (*p != '\0' && whiteness (*p)) - p++; - - // Skip comments and empty lines - if (*p == '#' || *p == '\0') - { - g_free (line); - line = NULL; - continue; - } - - // Must start with "file" or "shebang" - gboolean is_shebang = FALSE; - - if (strncmp (p, "shebang", 7) == 0 && whiteness (p[7])) - { - is_shebang = TRUE; - p += 7; - } - else if (strncmp (p, "file", 4) == 0 && whiteness (p[4])) - { - p += 4; - } - else - { - g_free (line); - line = NULL; - continue; - } - - // Skip whitespace - while (*p != '\0' && whiteness (*p)) - p++; - pattern = p; - - // Find end of pattern (next whitespace) - while (*p != '\0' && !whiteness (*p)) - p++; - if (*p == '\0') - { - g_free (line); - line = NULL; - continue; - } - *p++ = '\0'; - - // Skip whitespace - while (*p != '\0' && whiteness (*p)) - p++; - name = p; - - // Find end of grammar name (next whitespace) - while (*p != '\0' && !whiteness (*p)) - p++; - if (*p == '\0') - { - g_free (line); - line = NULL; - continue; - } - *p++ = '\0'; - - // Skip whitespace - rest is display name (may contain backslash-escaped spaces) - while (*p != '\0' && whiteness (*p)) - p++; - display = p; - - // Unescape backslash-space in display name - { - char *src = display, *dst = display; - - while (*src != '\0') - { - if (*src == '\\' && *(src + 1) == ' ') - { - *dst++ = ' '; - src += 2; - } - else - *dst++ = *src++; - } - *dst = '\0'; - } - - // Check if filename or first line matches this pattern - const char *match_against = is_shebang ? first_line : filename; - - if (match_against != NULL && match_against[0] != '\0' - && mc_search (pattern, NULL, match_against, MC_SEARCH_T_REGEX)) - { - *grammar_name = g_strdup (name); - *syntax_name = g_strdup (display); - g_free (line); - fclose (f); - return TRUE; - } - - g_free (line); - line = NULL; - } - - g_free (line); - fclose (f); - return FALSE; -} - -/* --------------------------------------------------------------------------------------------- */ - -/** - * Load a highlight query file. Searches in user data dir, then system share dir. - * Returns a newly allocated string with the file contents, or NULL on failure. - */ -static char * -ts_load_query_file (const char *query_filename, uint32_t *out_len) -{ - char *path; - char *contents = NULL; - gsize len = 0; - - // Try user config dir first - path = g_build_filename (mc_config_get_data_path (), EDIT_SYNTAX_TS_DIR, "queries", - query_filename, (char *) NULL); - if (g_file_get_contents (path, &contents, &len, NULL)) - { - g_free (path); - *out_len = (uint32_t) len; - return contents; - } - g_free (path); - - // Try system share dir - path = g_build_filename (mc_global.share_data_dir, EDIT_SYNTAX_TS_DIR, "queries", - query_filename, (char *) NULL); - if (g_file_get_contents (path, &contents, &len, NULL)) - { - g_free (path); - *out_len = (uint32_t) len; - return contents; - } - g_free (path); - - *out_len = 0; - return NULL; -} - -/* --------------------------------------------------------------------------------------------- */ - -/** - * Find all injection configs for a grammar. - * Searches for "inject ,,..." - * lines in the config file. Returns a GArray of ts_injection_t entries with only - * the grammar_name (stored in parser as a temporary hack) and node_types filled in. - * The caller is responsible for initializing the parser/query or freeing the results. - * - * Actually, we use a small helper struct for the config results to keep it clean. - */ - -typedef struct -{ - gboolean dynamic; // FALSE = static inject, TRUE = inject_dynamic - - // Static inject fields: - char *grammar_name; - char **node_types; - - // Dynamic inject fields: - char *block_type; - char *lang_type; - char *content_type; -} ts_inject_config_t; - -static GArray * -ts_find_injections (const char *grammar_name) -{ - FILE *f; - char *line = NULL; - GArray *configs; - - f = ts_open_config (); - if (f == NULL) - return NULL; - - configs = g_array_new (FALSE, TRUE, sizeof (ts_inject_config_t)); - - while (read_one_line (&line, f) != 0) - { - char *p, *parent, *child, *nodes_str; - - p = line; - - // Skip whitespace - while (*p != '\0' && whiteness (*p)) - p++; - - // Skip comments and empty lines - if (*p == '#' || *p == '\0') - { - g_free (line); - line = NULL; - continue; - } - - // Check for "inject_dynamic" or "inject" - gboolean is_dynamic = FALSE; - - if (strncmp (p, "inject_dynamic", 14) == 0 && whiteness (p[14])) - { - is_dynamic = TRUE; - p += 14; - } - else if (strncmp (p, "inject", 6) == 0 && whiteness (p[6])) - { - p += 6; - } - else - { - g_free (line); - line = NULL; - continue; - } - - // Skip whitespace -> parent grammar name - while (*p != '\0' && whiteness (*p)) - p++; - parent = p; - - // Find end of parent grammar name - while (*p != '\0' && !whiteness (*p)) - p++; - if (*p == '\0') - { - g_free (line); - line = NULL; - continue; - } - *p++ = '\0'; - - // Check if this injection is for our grammar - if (strcmp (parent, grammar_name) != 0) - { - g_free (line); - line = NULL; - continue; - } - - if (is_dynamic) - { - // Format: inject_dynamic - char *block_type, *lang_type, *content_type; - ts_inject_config_t cfg; - - // block_type - while (*p != '\0' && whiteness (*p)) - p++; - block_type = p; - while (*p != '\0' && !whiteness (*p)) - p++; - if (*p == '\0') - { - g_free (line); - line = NULL; - continue; - } - *p++ = '\0'; - - // lang_type - while (*p != '\0' && whiteness (*p)) - p++; - lang_type = p; - while (*p != '\0' && !whiteness (*p)) - p++; - if (*p == '\0') - { - g_free (line); - line = NULL; - continue; - } - *p++ = '\0'; - - // content_type - while (*p != '\0' && whiteness (*p)) - p++; - content_type = p; - while (*p != '\0' && !whiteness (*p)) - p++; - *p = '\0'; - - memset (&cfg, 0, sizeof (cfg)); - cfg.dynamic = TRUE; - cfg.block_type = g_strdup (block_type); - cfg.lang_type = g_strdup (lang_type); - cfg.content_type = g_strdup (content_type); - g_array_append_val (configs, cfg); - } - else - { - // Format: inject ,,... - char *child, *nodes_str; - ts_inject_config_t cfg; - gchar **parts; - int count, i; - - // child grammar name - while (*p != '\0' && whiteness (*p)) - p++; - child = p; - while (*p != '\0' && !whiteness (*p)) - p++; - if (*p == '\0') - { - g_free (line); - line = NULL; - continue; - } - *p++ = '\0'; - - // comma-separated node types - while (*p != '\0' && whiteness (*p)) - p++; - nodes_str = p; - - parts = g_strsplit (nodes_str, ",", -1); - count = (int) g_strv_length (parts); - - memset (&cfg, 0, sizeof (cfg)); - cfg.dynamic = FALSE; - cfg.grammar_name = g_strdup (child); - cfg.node_types = g_new0 (char *, count + 1); - for (i = 0; i < count; i++) - cfg.node_types[i] = g_strstrip (g_strdup (parts[i])); - cfg.node_types[count] = NULL; - - g_strfreev (parts); - g_array_append_val (configs, cfg); - } - - g_free (line); - line = NULL; - } - - g_free (line); - fclose (f); - - if (configs->len == 0) - { - g_array_free (configs, TRUE); - return NULL; - } - - return configs; -} - -/* --------------------------------------------------------------------------------------------- */ - -/** - * Initialize injection parsers/queries for a grammar. - * Called after the primary grammar is initialized. - * Failure is non-fatal — highlighting works without injections. - */ -static void -ts_init_injections (WEdit *edit, const char *grammar_name) -{ - GArray *configs; - guint i; - - configs = ts_find_injections (grammar_name); - if (configs == NULL) - return; - - edit->ts_injections = g_array_new (FALSE, TRUE, sizeof (ts_injection_t)); - - for (i = 0; i < configs->len; i++) - { - ts_inject_config_t *cfg = &g_array_index (configs, ts_inject_config_t, i); - ts_injection_t inj; - - memset (&inj, 0, sizeof (inj)); - - if (cfg->dynamic) - { - // Dynamic injection: no parser/query created now — they're created - // on demand per language in ts_rebuild_highlight_cache. - inj.dynamic = TRUE; - inj.block_type = cfg->block_type; - inj.lang_type = cfg->lang_type; - inj.content_type = cfg->content_type; - inj.lang_cache = g_hash_table_new (g_str_hash, g_str_equal); - cfg->block_type = NULL; // ownership transferred - cfg->lang_type = NULL; - cfg->content_type = NULL; - g_array_append_val (edit->ts_injections, inj); - } - else - { - // Static injection: create parser/query now - const TSLanguage *lang; - TSParser *parser; - char *query_filename; - char *query_src; - uint32_t query_len; - uint32_t error_offset; - TSQueryError error_type; - TSQuery *query; - - lang = ts_grammar_registry_lookup (cfg->grammar_name); - if (lang == NULL) - goto skip; - - parser = ts_parser_new (); - if (!ts_parser_set_language (parser, lang)) - { - ts_parser_delete (parser); - goto skip; - } - - ts_parser_set_timeout_micros (parser, 3000000); - - query_filename = g_strdup_printf ("%s-highlights.scm", cfg->grammar_name); - query_src = ts_load_query_file (query_filename, &query_len); - g_free (query_filename); - - if (query_src == NULL) - { - ts_parser_delete (parser); - goto skip; - } - - query = ts_query_new (lang, query_src, query_len, &error_offset, &error_type); - g_free (query_src); - - if (query == NULL) - { - ts_parser_delete (parser); - goto skip; - } - - inj.parser = parser; - inj.query = query; - inj.node_types = cfg->node_types; - cfg->node_types = NULL; // ownership transferred - g_array_append_val (edit->ts_injections, inj); - g_free (cfg->grammar_name); - continue; - - skip: - g_free (cfg->grammar_name); - g_strfreev (cfg->node_types); - } - } - - g_array_free (configs, TRUE); - - if (edit->ts_injections->len == 0) - { - g_array_free (edit->ts_injections, TRUE); - edit->ts_injections = NULL; - } -} - -/* --------------------------------------------------------------------------------------------- */ - -/** - * Try to initialize tree-sitter for the given edit widget. - * Returns TRUE on success, FALSE if we should fall back to legacy highlighting. - * - * Grammar discovery: - * 1. Config file ts-grammars maps filename regex -> grammar_name - * 2. Grammar looked up in static registry (compiled-in) - * 3. Query file by convention: -highlights.scm - */ -static gboolean -ts_init_for_file (WEdit *edit) -{ - const char *filename; - char *grammar_name = NULL; - char *display_name = NULL; - char *query_filename = NULL; - const TSLanguage *lang; - TSParser *parser; - TSTree *tree; - TSInput input; - char *query_src; - uint32_t query_len; - uint32_t error_offset; - TSQueryError error_type; - TSQuery *query; - - filename = vfs_path_as_str (edit->filename_vpath); - - if (!ts_find_grammar (filename, get_first_editor_line (edit), &grammar_name, &display_name)) - return FALSE; - - // Look up grammar in the static registry - lang = ts_grammar_registry_lookup (grammar_name); - if (lang == NULL) - { - g_free (grammar_name); - g_free (display_name); - return FALSE; - } - - // Create parser and set language - parser = ts_parser_new (); - if (!ts_parser_set_language (parser, lang)) - { - ts_parser_delete (parser); - g_free (grammar_name); - g_free (display_name); - return FALSE; - } - - // Set a timeout to prevent pathological grammars from freezing the editor - ts_parser_set_timeout_micros (parser, 3000000); // 3 seconds - - // Parse the buffer - input.payload = edit; - input.read = ts_input_read; - input.encoding = TSInputEncodingUTF8; - - tree = ts_parser_parse (parser, NULL, input); - if (tree == NULL) - { - ts_parser_delete (parser); - g_free (grammar_name); - g_free (display_name); - return FALSE; - } - - // Load and compile highlight query: -highlights.scm - query_filename = g_strdup_printf ("%s-highlights.scm", grammar_name); - query_src = ts_load_query_file (query_filename, &query_len); - g_free (query_filename); - - if (query_src == NULL) - { - ts_tree_delete (tree); - ts_parser_delete (parser); - g_free (grammar_name); - g_free (display_name); - return FALSE; - } - - query = ts_query_new (lang, query_src, query_len, &error_offset, &error_type); - g_free (query_src); - - if (query == NULL) - { - ts_tree_delete (tree); - ts_parser_delete (parser); - g_free (grammar_name); - g_free (display_name); - return FALSE; - } - - // All good -- store in edit widget - edit->ts_parser = parser; - edit->ts_tree = tree; - edit->ts_highlight_query = query; - edit->ts_highlights = g_array_new (FALSE, FALSE, sizeof (ts_highlight_entry_t)); - edit->ts_highlights_start = -1; - edit->ts_highlights_end = -1; - edit->ts_active = TRUE; - edit->ts_need_reparse = FALSE; - - // Try to initialize language injection (e.g., markdown inline within markdown block) - // Failure is non-fatal — highlighting works without injection. - ts_init_injections (edit, grammar_name); - - g_free (edit->syntax_type); - edit->syntax_type = display_name; // takes ownership - - g_free (grammar_name); - - return TRUE; -} - -/* --------------------------------------------------------------------------------------------- */ - -/** - * Free all tree-sitter resources associated with the edit widget. - */ -static void -ts_free (WEdit *edit) -{ - // Free injection resources - if (edit->ts_injections != NULL) - { - guint i; - - for (i = 0; i < edit->ts_injections->len; i++) - { - ts_injection_t *inj = &g_array_index (edit->ts_injections, ts_injection_t, i); - - if (inj->dynamic) - { - g_free (inj->block_type); - g_free (inj->lang_type); - g_free (inj->content_type); - if (inj->lang_cache != NULL) - { - GHashTableIter iter; - gpointer key, value; - - g_hash_table_iter_init (&iter, inj->lang_cache); - while (g_hash_table_iter_next (&iter, &key, &value)) - { - ts_dynamic_lang_t *dl = (ts_dynamic_lang_t *) value; - - if (dl->query != NULL) - ts_query_delete ((TSQuery *) dl->query); - if (dl->parser != NULL) - ts_parser_delete ((TSParser *) dl->parser); - g_free (dl); - g_free (key); - } - g_hash_table_destroy (inj->lang_cache); - } - } - else - { - if (inj->query != NULL) - ts_query_delete ((TSQuery *) inj->query); - if (inj->parser != NULL) - ts_parser_delete ((TSParser *) inj->parser); - if (inj->node_types != NULL) - g_strfreev (inj->node_types); - } - } - g_array_free (edit->ts_injections, TRUE); - edit->ts_injections = NULL; - } - - // Free primary resources - if (edit->ts_highlight_query != NULL) - { - ts_query_delete ((TSQuery *) edit->ts_highlight_query); - edit->ts_highlight_query = NULL; - } - - if (edit->ts_tree != NULL) - { - ts_tree_delete ((TSTree *) edit->ts_tree); - edit->ts_tree = NULL; - } - - if (edit->ts_parser != NULL) - { - ts_parser_delete ((TSParser *) edit->ts_parser); - edit->ts_parser = NULL; - } - - if (edit->ts_highlights != NULL) - { - g_array_free (edit->ts_highlights, TRUE); - edit->ts_highlights = NULL; - } - - edit->ts_highlights_start = -1; - edit->ts_highlights_end = -1; - edit->ts_active = FALSE; -} - -/* --------------------------------------------------------------------------------------------- */ - -/** - * Append a TSRange for the given node to the ranges array. - */ -static void -ts_append_node_range (TSNode node, GArray *ranges) -{ - TSRange r; - - r.start_point = ts_node_start_point (node); - r.end_point = ts_node_end_point (node); - r.start_byte = ts_node_start_byte (node); - r.end_byte = ts_node_end_byte (node); - g_array_append_val (ranges, r); -} - -/** - * Recursively collect byte ranges of nodes matching the injection node types. - * Ranges are appended to the GArray of TSRange and are ordered by byte offset. - * - * Node types support two formats: - * "node_type" — match nodes of this type directly - * "parent_type/child_type" — match child_type nodes inside parent_type nodes - * (used for HTML: "script_element/raw_text" targets the raw_text inside + + + + + + +

Main Heading

+

Section Heading

+

Subsection

+ + +

Special characters: & < > " '  

+

Numeric entity: © ☃

+ + + Visit Example + About + + + A photo + + +
+
+ + + +
    +
  • First item
  • +
  • Second item
  • +
  • Third & final item
  • +
+ +
    +
  1. Numbered item
  2. +
+ + + + + + + + + + + + + + + + + + + +
NameValue
Alpha100
Beta200
+ + +
+ + + + + +
+ + +
+ Styled & interactive div +
+ + + +
+
+

Article Title

+

Article content with — dash.

+
Published 2024
+
+
+ + + + + + +
+ Bold & italic +
+ + + + + + + + + + diff --git a/tests/syntax/samples/idl-report.md b/tests/syntax/samples/idl-report.md new file mode 100644 index 0000000000..51dbb9b94b --- /dev/null +++ b/tests/syntax/samples/idl-report.md @@ -0,0 +1,70 @@ +IDL syntax highlighting: TS vs Legacy comparison report +======================================================== + +Sample file: `idl.idl` +Legacy reference: `misc/syntax/idl.syntax` +TS query: `misc/syntax-ts/queries-override/idl-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[idl]` + +Aligned with legacy +------------------- + +- Keywords (`module`, `interface`, `struct`, `union`, `enum`, `typedef`, + `const`, `exception`, `valuetype`, `switch`, `case`, `default`, `void`, `in`, + `out`, `inout`, `raises`, `readonly`, `attribute`, `oneway`, `context`): + `yellow` - MATCH +- Type keywords (`short`, `long`, `float`, `double`, `string`, `wstring`, `any`, + `sequence`, `fixed`, `Object`): `yellow` - MATCH +- Boolean type (`boolean`): `yellow` - MATCH +- Boolean literals (`TRUE`, `FALSE`): `yellow` - MATCH +- Comments (line `//` and block `/* */`): `brown` - MATCH +- Strings (double-quoted `"..."`): `green` - MATCH +- Char literals (`'X'`): `brightgreen` - MATCH +- Operators (`+`, `-`, `*`, `/`, `%`, `=`, `::`, `<<`, `>>`, `~`): `yellow` - + MATCH +- Brackets (`{`, `}`, `(`, `)`, `[`, `]`): `brightcyan` - MATCH +- Punctuation (`,`, `:`, `<`, `>`): `brightcyan` - MATCH +- Semicolons (`;`): `brightmagenta` - MATCH + +Intentional improvements over legacy +------------------------------------- + +- Preprocessor directives (`#ifndef`, `#define`, `#include`, `#pragma`, + `#endif`): TS colors these as `brightmagenta` via `keyword.control`. Legacy + uses a line context starting with `#` colored as `brightred`. The TS approach + captures the entire preprocessor statement more precisely. +- The `public` and `private` keywords (in valuetype): TS colors these as + `yellow` via the keyword list. Legacy does not include `public` and `private` + in its keyword list, leaving them as default text. +- System include strings (``): TS captures these via + `system_lib_string` as `green`. Legacy colors the `<` and `>` as `yellow` + operators and the path as `red` inside the preprocessor context. +- The `|` operator in `0x0A | 0x50`: legacy does not explicitly list `|` as an + operator keyword in the IDL syntax file, leaving it uncolored. TS does not + capture it either (not in the TS operator list), so both leave it as default + text. +- The `map` type keyword: TS includes `map` in the keyword list. Legacy does not + have `map` as a keyword. +- The `fixed` type with angle brackets (`fixed<10,2>`): TS colors `fixed` as + `yellow` and the angle brackets as `brightcyan` delimiters. Legacy colors + `fixed` as `yellow` and `<` / `>` as `yellow` operators. +- Comment at end of closing: the `// end module` comment on the last closing + brace line is correctly colored `brown` by TS even after the `#endif` + preprocessor. Legacy has issues with context transitions near the end of the + file. + +Known shortcomings +------------------ + +- The `unsigned`, `char`, `octet`, `wchar`, `wstring`, `fixed`, `native`, and + `ValueBase` type keywords: FIXED -- now included in the type keywords list as + `@keyword` (`yellow`), matching legacy. +- The closing comment `/* ... */` opening markers: legacy colors `/*` and `*/` + as `brown` markers. TS colors the entire comment node uniformly as `brown`, + which is cleaner but legacy also shows the markers. +- Preprocessor string content: legacy colors `"types.idl"` as `red` inside the + preprocessor context. TS colors it as `green` (via `string_literal`), matching + the standard string color. This is arguably better but differs from legacy. +- The `#endif` line with a trailing comment: TS colors `#endif` as + `brightmagenta` and the `// _SAMPLE_IDL_` comment continuation also as + `brown`. Legacy colors the entire line as `brightred`. diff --git a/tests/syntax/samples/idl.idl b/tests/syntax/samples/idl.idl new file mode 100644 index 0000000000..13c7cd53b9 --- /dev/null +++ b/tests/syntax/samples/idl.idl @@ -0,0 +1,164 @@ +/* IDL syntax sample file */ + +#ifndef _SAMPLE_IDL_ +#define _SAMPLE_IDL_ + +#include +#include "types.idl" + +#pragma prefix "example.org" + +// Module definition +module SampleModule { + + // Forward declaration + interface Logger; + + // Type definitions + typedef long Identifier; + typedef sequence StringList; + typedef sequence BoundedLongs; + + // Enum + enum Color { RED, GREEN, BLUE }; + + // Constants + const long MAX_SIZE = 1024; + const double PI = 3.14159; + const boolean ENABLED = TRUE; + const string LABEL = "default"; + const char DELIM = 'X'; + + // Struct + struct Point { + float x; + float y; + float z; + }; + + struct Record { + Identifier id; + string name; + long timestamp; + boolean active; + }; + + // Union with switch + union Variant switch (long) { + case 0: long intVal; + case 1: double floatVal; + case 2: string strVal; + default: octet rawData; + }; + + // Exception + exception NotFound { + string message; + long errorCode; + }; + + exception AccessDenied { + string reason; + }; + + // Valuetype + valuetype Timestamp { + public long seconds; + public long nanoseconds; + private string timezone; + }; + + // Interface with attributes and operations + interface DataStore { + // Readonly attribute + readonly attribute long count; + readonly attribute string version; + + // Regular attribute + attribute boolean connected; + attribute string label; + + // Void operation + void initialize(); + + // Parameterized operations + Record lookup(in Identifier id) + raises (NotFound); + + void store( + in Identifier id, + in string name, + inout Record data + ) raises (NotFound, AccessDenied); + + // Oneway operation + oneway void notify(in string message); + + // Return types: short, long, float, double + short getStatus(); + long getTimestamp(); + float getLoad(); + double getPreciseLoad(); + + // Unsigned types + unsigned short getPort(); + unsigned long getSize(); + + // Wide string + wstring getDescription(); + + // Sequence return + StringList listNames( + in long offset, + out long total + ); + + // Any type + any getProperty(in string key); + + // Context + void configure(in string key, in string val) + context("user", "session"); + + // Fixed type + fixed<10,2> getBalance(); + }; + + // Interface inheritance + interface Logger { + void log(in string msg); + }; + + interface AuditLogger : Logger { + void audit(in string action, in Record rec); + }; + + // Operators in constant expressions + const long MASK = 0xFF << 4; + const long FLAGS = 0x0A | 0x50; + const long INVERTED = ~0xFF; + const long COMBINED = (MAX_SIZE + 1) * 2 - 1; + const long REMAINDER = MAX_SIZE % 10; + const long SHIFTED = FLAGS >> 2; + + // Boolean type in operation + interface Validator { + boolean validate( + in string input, + out string error + ); + }; + + // Map type + typedef map NameMap; + + // Nested module + module SubModule { + interface Helper { + void assist(); + }; + }; + +}; // end module SampleModule + +#endif // _SAMPLE_IDL_ diff --git a/tests/syntax/samples/ini-report.md b/tests/syntax/samples/ini-report.md new file mode 100644 index 0000000000..8c1ad9a32b --- /dev/null +++ b/tests/syntax/samples/ini-report.md @@ -0,0 +1,51 @@ +INI syntax highlighting: TS vs Legacy comparison report +======================================================= + +Sample file: `ini.ini` +Legacy reference: `misc/syntax/ini.syntax` +TS query: `misc/syntax-ts/queries-override/ini-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[ini]` + +Aligned with legacy +------------------- + +- Section headers (`[general]`, `[database]`, etc.): `yellow` - MATCH (legacy + colors `[...]` context yellow, TS uses `@keyword` for `section_name` and + brackets). +- Setting keys (`name`, `host`, `port`, etc.): `cyan` - MATCH (legacy default + context is cyan, TS uses `@label`). +- Equals sign `=`: `brightred` - MATCH (legacy uses `brightred` for `=`, TS uses + `@variable.builtin`). +- Setting values (`My Application`, `localhost`, `5432`, etc.): `brightcyan` - + MATCH (legacy uses `brightcyan` via exclusive `= \n` context, TS uses + `@delimiter`). +- Hash comments (`# ...`): `brown` - MATCH. +- Semicolon comments (`; ...`): `brown` - MATCH. + +Intentional improvements over legacy +------------------------------------- + +- TS and legacy produce effectively identical output for INI files. The color + mapping was designed to exactly replicate the legacy behavior. +- The TS section header highlighting includes the brackets `[` and `]` as part + of the `@keyword` capture (yellow), matching the legacy context-based approach + where the entire `[...]` span is yellow. + +Known shortcomings +------------------ + +- The TS output shows section headers (`[general]`, `[database]`, etc.) without + a closing color tag on the same line, meaning the yellow color from the + section name may bleed into the next line visually. In practice the line ends + and color resets, but the dump output shows unclosed tags. Legacy behaves + similarly with its context-based approach. +- Neither TS nor legacy distinguish between different value types (numbers, + booleans, paths, URLs). All values after `=` are uniformly `brightcyan`. +- Inline comments after values (e.g., `key = value # comment`) are not + separately highlighted by either engine. The entire value portion through + end-of-line is treated as the setting value. +- The legacy engine colors the default context as `cyan`, meaning any text not + matched by a specific rule (including blank lines between sections) appears in + cyan. The TS engine only colors explicitly matched nodes, leaving unmatched + text as default. Both dumps show the same effective result for well-formed INI + files. diff --git a/tests/syntax/samples/ini.ini b/tests/syntax/samples/ini.ini new file mode 100644 index 0000000000..e1bad4dff3 --- /dev/null +++ b/tests/syntax/samples/ini.ini @@ -0,0 +1,57 @@ +# Sample INI file demonstrating syntax highlighting features +# Exercises all tree-sitter captures from ini-highlights.scm + +; This is also a comment (semicolon style) + +[general] +name = My Application +version = 1.0.0 +debug = false + +[database] +host = localhost +port = 5432 +name = mydb +user = admin +password = secret123 + +[server] +address = 0.0.0.0 +port = 8080 +max_connections = 100 +timeout = 30 + +[logging] +level = info +file = /var/log/app.log +format = json +rotate = true + +[paths] +home = /home/user +data = /var/data +cache = /tmp/cache +config = /etc/app/config + +[features] +enable_api = true +enable_web = false +rate_limit = 1000 +retry_count = 3 + +[email] +smtp_host = mail.example.com +smtp_port = 587 +from = noreply@example.com +use_tls = true + +# Section with special characters in values +[urls] +api = https://api.example.com/v1 +docs = https://docs.example.com +webhook = https://hooks.example.com/notify + +[limits] +max_size = 104857600 +max_age = 86400 +threshold = 0.95 diff --git a/tests/syntax/samples/java-report.md b/tests/syntax/samples/java-report.md new file mode 100644 index 0000000000..8c1665e954 --- /dev/null +++ b/tests/syntax/samples/java-report.md @@ -0,0 +1,88 @@ +Java syntax highlighting: TS vs Legacy comparison report +========================================================= + +Sample file: `java.java` +Legacy reference: `misc/syntax/java.syntax` +TS query: `misc/syntax-ts/queries-override/java-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[java]` + +Aligned with legacy +------------------- + +- Language keywords (`abstract`, `assert`, `break`, `case`, `catch`, `class`, + `continue`, `default`, `do`, `else`, `enum`, `extends`, `final`, `finally`, + `for`, `if`, `implements`, `import`, `instanceof`, `interface`, `native`, + `new`, `package`, `private`, `protected`, `public`, `return`, `static`, + `strictfp`, `switch`, `synchronized`, `throw`, `throws`, `transient`, `try`, + `volatile`, `while`): `yellow` - MATCH +- `this`, `super`: `yellow` - MATCH +- Primitive types (`byte`, `short`, `int`, `long`, `float`, `double`, `char`): + `yellow` - MATCH +- `boolean`, `void`: `yellow` - MATCH +- Literals `true`, `false`, `null`: `yellow` - MATCH +- Operators (`+`, `-`, `*`, `/`, `%`, `=`, `==`, `!=`, `<`, `>`, `<=`, `>=`, + `&&`, `||`, `!`, `&`, `|`, `^`, `~`, `<<`, `>>`, `>>>`, `+=`, `-=`, `*=`, + `/=`, `%=`, `++`, `--`, `->`): `yellow` - MATCH +- Semicolons `;`: `brightmagenta` - MATCH +- Brackets/parens/braces (`(`, `)`, `[`, `]`, `{`, `}`): `brightcyan` - MATCH +- Comma `,`: `brightcyan` - MATCH +- Dot `.`: `brightcyan` - MATCH (TS explicitly captures `.` as `delimiter`) +- Colon `:`: `brightcyan` - MATCH +- Strings (double-quoted `"..."`): `green` - MATCH +- Character literals (`'A'`, `'\n'`): `brightgreen` - MATCH +- Comments (line `//` and block `/* */`): `brown` - MATCH +- Annotations (`@Override`, `@SuppressWarnings`, `@Deprecated`): `brightred` - + MATCH +- Labels (`outer:`): TS colors as `cyan` via `label` capture; legacy colors + `outer` as `yellow` (matching the keyword `outer`). Different color but both + intentional. + +Intentional improvements over legacy +------------------------------------- + +- Labels (`outer:`): TS colors the label identifier as `cyan` via the + `labeled_statement` capture, which is semantically correct. Legacy does not + have label detection; it colors `outer` as `yellow` because `outer` happens to + be in the keyword list (a false positive from a different language era). +- Dot `.` in method chains (`System.err.println`): TS explicitly captures dots + as `brightcyan` delimiters, providing consistent visual separation. Legacy + also colors `.` but as part of the general punctuation keyword rule. Both are + `brightcyan`. +- Escape sequences in strings (`\n`, `\t`, `\\`, `\"`): TS colors the entire + string including escapes as `green` for the string node, with + `character_literal` escape chars colored as `brightgreen` via + `string.special`. Legacy also colors escape sequences as `brightgreen` inside + string contexts. MATCH in behavior. +- Generic angle brackets `<>`: TS colors `<` and `>` as `yellow` (operator). + Legacy also colors them `yellow`. MATCH. +- Ternary operator `?`: legacy colors `?` as `brightcyan`. TS does not + explicitly capture `?` in the query, so it appears as default text. Minor + difference. + +Known shortcomings +------------------ + +- Object methods (`clone`, `equals`, `finalize`, `getClass`, `hashCode`, + `notify`, `notifyAll`, `toString`, `wait`): legacy colors these as `brightred` + via a keyword list. TS does not distinguish these from regular method names. + `toString` is colored as default text by TS (except when preceded by + `@Override`, where the annotation gets `brightred`). +- `record`, `sealed`, `permits`, `non-sealed`, `yield`: these newer Java + keywords are in the TS query but the sample comments them out since they + require newer Java syntax. Legacy does not have them at all. +- The `?` ternary operator: TS does not capture `?` explicitly. Legacy colors it + as `brightcyan`. Minor visual difference in ternary expressions. +- Legacy keywords not in TS: `byvalue`, `cast`, `clone`, `def`, `equals`, + `finalize`, `future`, `generic`, `getClass`, `goto`, `hashCode`, `inner`, + `notify`, `notifyAll`, `outer`, `rest`, `toString`, `var`, `wait` are in + legacy's keyword list but not all are standard Java keywords. TS only captures + actual language keywords. +- Preprocessor-style directives (`#include`-like lines): legacy has a + `context linestart # \n brightred` rule for C-style preprocessor lines that + might appear in Java files. TS does not handle these since they are not valid + Java syntax. +- String escape sequences: TS colors the entire string node as `green` + uniformly. Individual escape sequences like `\n` are not separately + highlighted within the string (the `string.special` capture applies to + `character_literal`, not escape sequences inside strings). Legacy colors + escape sequences as `brightgreen` inside string contexts. diff --git a/tests/syntax/samples/java.java b/tests/syntax/samples/java.java new file mode 100644 index 0000000000..6362870a41 --- /dev/null +++ b/tests/syntax/samples/java.java @@ -0,0 +1,199 @@ +// Comment: demonstrate all Java syntax highlighting features +// This file exercises every TS capture from java-highlights.scm + +package com.example; + +import java.util.List; +import java.util.ArrayList; +import java.util.Map; + +// Keywords: class, public, abstract, extends, implements +public abstract class java { + + // Primitive types and void + byte b = 1; + short s = 2; + int i = 3; + long l = 4L; + float f = 1.0f; + double d = 2.0; + char c = 'A'; + boolean flag = true; + + // true, false, null + boolean yes = true; + boolean no = false; + Object nothing = null; + + // Keywords: abstract, final, static, native, volatile, transient + static final int MAX = 100; + volatile int counter = 0; + transient String temp = "temp"; + + // Keywords: synchronized, strictfp + synchronized void syncMethod() {} + + // Access modifiers: private, protected, public + private int secret = 0; + protected int shared = 0; + + // this, super + void example() { + this.secret = 1; + // super.toString(); + } + + // Keywords: if, else, for, while, do, switch, case, default + void controlFlow() { + if (flag) { + int x = 1; + } else { + int x = 2; + } + + for (int j = 0; j < 10; j++) { + if (j == 5) continue; + if (j == 8) break; + } + + while (counter < 10) { + counter++; + } + + do { + counter--; + } while (counter > 0); + + switch (i) { + case 1: + break; + case 2: + break; + default: + break; + } + } + + // Keywords: try, catch, finally, throw, throws + void exceptions() throws Exception { + try { + throw new RuntimeException("error"); + } catch (RuntimeException e) { + System.err.println(e.getMessage()); + } finally { + counter = 0; + } + } + + // Keywords: new, instanceof, return + Object create() { + Object obj = new ArrayList<>(); + if (obj instanceof List) { + return obj; + } + return null; + } + + // Keywords: enum + enum Color { + RED, GREEN, BLUE + } + + // Keywords: interface, extends + interface Printable { + void print(); + } + + // Keywords: record, sealed, permits, non-sealed, yield + // record Point(int x, int y) {} + + // Annotations + @Override + public String toString() { + return "java instance"; + } + + @SuppressWarnings("unchecked") + void annotated() {} + + @Deprecated + void oldMethod() {} + + // Labels + void labeled() { + outer: + for (int j = 0; j < 10; j++) { + for (int k = 0; k < 10; k++) { + if (j + k > 15) { + break outer; + } + } + } + } + + // Operators + void operators() { + int a = 10, b = 20; + int sum = a + b; + int diff = a - b; + int prod = a * b; + int quot = a / b; + int rem = a % b; + boolean eq = a == b; + boolean ne = a != b; + boolean lt = a < b; + boolean gt = a > b; + boolean le = a <= b; + boolean ge = a >= b; + boolean and = true && false; + boolean or = true || false; + boolean not = !true; + int band = a & b; + int bor = a | b; + int bxor = a ^ b; + int bnot = ~a; + int lsh = a << 2; + int rsh = a >> 2; + int ursh = a >>> 2; + a += 1; + a -= 1; + a *= 2; + a /= 2; + a %= 3; + a++; + a--; + } + + // Semicolons + void semi() { + int x = 1; + int y = 2; + int z = 3; + } + + // Brackets, parens, braces, comma, dot, colon + void delimiters() { + int[] arr = {1, 2, 3}; + int val = arr[0]; + Map.Entry entry = null; + String result = (val > 0) ? "pos" : "neg"; + } + + // Strings and character literals + void strings() { + String s1 = "hello world"; + String s2 = "escape: \n\t\\\""; + char c1 = 'A'; + char c2 = '\n'; + } + + // Arrow operator (lambda) + void lambdas() { + Runnable r = () -> System.out.println("lambda"); + List nums = List.of(1, 2, 3); + nums.forEach(n -> System.out.println(n)); + } +} + +/* Block comment + spanning multiple lines */ diff --git a/tests/syntax/samples/javascript-report.md b/tests/syntax/samples/javascript-report.md new file mode 100644 index 0000000000..610d120d5d --- /dev/null +++ b/tests/syntax/samples/javascript-report.md @@ -0,0 +1,66 @@ +JavaScript syntax highlighting: TS report +========================================== + +Sample file: `javascript.js` +TS query: `misc/syntax-ts/queries-override/javascript-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[javascript]` +Legacy reference: none + +Color assignments +----------------- + +- Keywords (`var`, `let`, `const`, `if`, `else`, `for`, `while`, `do`, `switch`, + `case`, `default`, `break`, `continue`, `return`, `throw`, `try`, `catch`, + `finally`, `new`, `delete`, `typeof`, `instanceof`, `in`, `of`, `void`, + `class`, `extends`, `static`, `import`, `export`, `from`, `as`, `async`, + `await`, `function`, `yield`, `with`, `debugger`, `get`, `set`, `this`, + `super`, `undefined`): `yellow` +- Comments (`//`, `/* */`): `brown` +- Strings (double-quoted, single-quoted): `green` +- Template strings: `green` +- Regex: `brightgreen` (via `string.special`) +- Numbers: `brightgreen` (via `number.builtin`) +- Boolean literals (`true`, `false`) and `null`: `brightgreen` (via + `number.builtin`) +- Operators (`+`, `-`, `*`, `/`, `%`, `=`, `==`, `===`, `!=`, `!==`, `<`, `>`, + `<=`, `>=`, `&&`, `||`, `++`, `--`, `+=`, `-=`, etc.): `yellow` (via + `operator.word`) +- Arrow `=>` and spread `...`: `brightcyan` (via `operator`) +- Semicolons: `brightmagenta` (via `delimiter.special`) +- Brackets, parens, commas, colons: `brightcyan` (via `delimiter`) +- Labels (`outer:`, `inner:`): `cyan` (via `label`) + +Design decisions +---------------- + +- Keywords and operators share `yellow` since JavaScript uses operators + frequently alongside keywords and the legacy `js.syntax` (which existed before + TS support) used yellow for both categories. +- `undefined` is mapped to `@keyword` (yellow) rather than `@number.builtin` + (brightgreen) because it is technically a global property, not a literal like + `true`/`false`/`null`. The TS query treats it as a keyword. +- Semicolons get `brightmagenta` (via `delimiter.special`) to visually + distinguish statement terminators from structural delimiters like brackets and + commas. +- Arrow `=>` and spread `...` use `brightcyan` (via `operator`) to distinguish + them from arithmetic/assignment operators which use `yellow`. +- Numbers and boolean/null literals share `brightgreen` to group all "literal + values" visually. + +Known shortcomings +------------------ + +- The `as` keyword on the last line renders as `` instead of `yellow`. This + is because `as` in plain JavaScript (not TypeScript) is parsed as part of an + expression statement rather than a keyword node, so the tree-sitter parser + does not emit it as a keyword in this context. +- Template string interpolation `${x}` partially renders: the closing `}` shows + as `brightcyan` delimiter while the content inside loses string coloring. This + is a known limitation of how template_string fragments interact with + delimiters. +- Regex literal `/pattern/gi` shows the slashes in `yellow` (operator) rather + than `brightgreen` (string.special) because the tree-sitter parser captures + the regex node but the `/` delimiters are separate tokens matched by the + operator rule. +- The `function*` generator syntax shows the `*` attached to `function` keyword + in yellow rather than being separately highlighted. diff --git a/tests/syntax/samples/javascript.js b/tests/syntax/samples/javascript.js new file mode 100644 index 0000000000..f4d0351df8 --- /dev/null +++ b/tests/syntax/samples/javascript.js @@ -0,0 +1,131 @@ +// Sample JavaScript file demonstrating syntax highlighting features +// Exercises all tree-sitter captures from javascript-highlights.scm + +/* Multi-line + comment block */ + +// Keywords +var x = 1; +let y = 2; +const z = 3; + +if (x > 0) { + return x; +} else { + throw new Error("negative"); +} + +for (let i = 0; i < 10; i++) { + if (i === 5) break; + if (i === 3) continue; +} + +while (true) { + break; +} + +do { + x++; +} while (x < 100); + +switch (x) { + case 1: + break; + default: + break; +} + +try { + delete x.prop; +} catch (e) { + debugger; +} finally { + void 0; +} + +// Classes and inheritance +class Animal extends Object { + static count = 0; + constructor(name) { + super(); + this.name = name; + } + get info() { return this.name; } + set info(val) { this.name = val; } +} + +// Import/export +import { foo } from "module"; +export const bar = 42; + +// Functions and async/await +function greet(name) { + return "Hello " + name; +} + +async function fetchData() { + const result = await fetch("/api"); + return result; +} + +// Arrow functions and spread +const add = (a, b) => a + b; +const arr = [1, ...other]; + +// typeof, instanceof, in, of +typeof x instanceof Object; +for (const key in obj) {} +for (const val of arr) {} + +// yield and with +function* gen() { + yield 1; + yield 2; +} + +with (obj) { + x = 1; +} + +// Literals: true, false, null, undefined +const a = true; +const b = false; +const c = null; +const d = undefined; + +// Numbers +const num1 = 42; +const num2 = 3.14; +const num3 = 0xff; +const num4 = 1e10; + +// Strings and template literals +const str1 = "double quoted"; +const str2 = 'single quoted'; +const tmpl = `template ${x} literal`; + +// Regular expression +const re = /pattern/gi; + +// Operators +x = y + z - x * y / z % 2; +x += 1; x -= 1; x *= 2; x /= 2; x %= 3; +x **= 2; x &&= true; x ||= false; +let cmp = x === y || x !== y && x <= z; +let bit = x & y | z ^ ~x; +x << 2; x >> 1; x >>> 0; +x++; x--; + +// Semicolons, brackets, colons, commas +const obj = { a: 1, b: [2, 3] }; +(function() {})(); + +// Labels +outer: for (let i = 0; i < 5; i++) { + inner: for (let j = 0; j < 5; j++) { + if (j === 2) break outer; + } +} + +// as keyword +const val = x as any; diff --git a/tests/syntax/samples/json-report.md b/tests/syntax/samples/json-report.md new file mode 100644 index 0000000000..f24e30a4e4 --- /dev/null +++ b/tests/syntax/samples/json-report.md @@ -0,0 +1,62 @@ +JSON syntax highlighting: TS vs Legacy comparison report +======================================================== + +Sample file: `tests/syntax/samples/json.json` +Legacy reference: `misc/syntax/json.syntax` +TS query: `misc/syntax-ts/queries-override/json-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[json]` + +Aligned with legacy +------------------- + +- String keys (`"title"`, `"description"`, `"version"`): `green` via `@string` - + MATCH +- String values (`"Hello, world!"`, `"localhost"`): `green` via `@string` - + MATCH +- Escape sequences (`\"`, `\\`, `\/`, `\t`, `\n`, `\r`, `\b`, `\f`): + `brightgreen` via `@string.special` - MATCH +- Integer numbers (`42`, `-17`, `0`, `5432`): `brightgreen` via + `@string.special` - MATCH +- Boolean `true`: `brightgreen` via `@string.special` - MATCH +- Boolean `false`: `brightgreen` via `@string.special` - MATCH +- `null`: `brightgreen` via `@string.special` - MATCH +- Delimiters (`{`, `}`, `[`, `]`, `,`, `:`): `brightcyan` via `@delimiter` - + MATCH +- Strings in arrays (`"alpha"`, `"beta"`, `"production"`): `green` via `@string` + on `array > string` - MATCH +- Empty strings (`""`): `green` - MATCH +- Strings with special characters (`"key with spaces"`, `"key.with.dots"`): + `green` - MATCH + +Intentional improvements over legacy +------------------------------------- + +- TS handles floating-point numbers (`3.14159`, `-0.001`, `0.85`) as single + `brightgreen` tokens. Legacy splits some floats at the decimal point, coloring + the integer part as `brightgreen` but leaving `.14159` partially uncolored + when the pattern does not match the full format. +- TS handles scientific notation (`1.5e10`, `2.99e-8`, `6.022E23`) as complete + `brightgreen` tokens. Legacy correctly handles most of these but splits some + (e.g. `6.022E23` matched, but `1E+6` has the `E+` part uncolored). +- TS properly handles `\u` unicode escape sequences as `brightgreen` for the + full `\uXXXX` sequence. Legacy matches the `\u` prefix and the 4-digit hex + separately, resulting in `brightgreen` for `\u` but `green` for the digits in + some cases. +- TS handles negative numbers (`-17`, `-0.001`, `-74.0060`) as single tokens in + `brightgreen`. Legacy handles these correctly too via pattern matching. +- TS treats all structural delimiters uniformly (`{`, `}`, `[`, `]`, `,`, `:`) + as `brightcyan`, consistent with legacy. + +Known shortcomings +------------------ + +- TS does not distinguish between key strings and value strings color-wise -- + both are `green`. This matches legacy behavior, so it is not a regression, but + a potential future improvement could differentiate them. +- Legacy splits `1E+6` with the `E+` uncolored (only `1` and `6` in + `brightgreen`). TS also has a minor issue here: it shows `1` as `brightgreen`, + then `E+` as `red` (likely an artifact), then `6` as `brightgreen`. Neither + handles this edge case perfectly. +- Neither TS nor legacy support JSON5/JSONC comments in standard JSON files (the + `(comment) @comment` capture exists in the query but standard JSON parsers do + not emit comment nodes). diff --git a/tests/syntax/samples/json.json b/tests/syntax/samples/json.json new file mode 100644 index 0000000000..8aaff31469 --- /dev/null +++ b/tests/syntax/samples/json.json @@ -0,0 +1,95 @@ +{ + "title": "JSON Syntax Highlighting Sample", + "description": "Demonstrates all TS captures for JSON grammar", + "version": 1, + + "strings": { + "simple": "Hello, world!", + "empty": "", + "with_spaces": " spaces ", + "special_chars": "Quotes: \" Backslash: \\ Slash: \/", + "escapes": "Tab:\tNewline:\nReturn:\rBackspace:\bFormfeed:\f", + "unicode": "\u0041\u0042\u0043", + "unicode_snowman": "\u2603", + "url": "https://example.com/path?q=value&p=2", + "path": "/usr/local/bin/program", + "multiword": "The quick brown fox jumps over the lazy dog" + }, + + "numbers": { + "integer": 42, + "negative": -17, + "zero": 0, + "float": 3.14159, + "negative_float": -0.001, + "exponent": 1.5e10, + "negative_exponent": 2.99e-8, + "big_exponent": 6.022E23, + "exponent_plus": 1E+6 + }, + + "booleans": { + "is_active": true, + "is_deleted": false + }, + + "null_value": null, + + "arrays": { + "empty_array": [], + "numbers": [1, 2, 3, 4, 5], + "strings": ["alpha", "beta", "gamma"], + "mixed": [1, "two", true, null, 3.14], + "nested": [[1, 2], [3, 4], [5, 6]], + "objects_in_array": [ + {"id": 1, "name": "Alice"}, + {"id": 2, "name": "Bob"}, + {"id": 3, "name": "Charlie"} + ] + }, + + "nested_objects": { + "database": { + "host": "localhost", + "port": 5432, + "name": "mydb", + "credentials": { + "username": "admin", + "password": "s3cret", + "ssl": true + } + }, + "cache": { + "enabled": true, + "ttl": 3600, + "max_size": null + } + }, + + "complex_values": { + "coordinates": {"lat": 40.7128, "lng": -74.0060}, + "tags": ["production", "v2.1", "stable"], + "metadata": { + "created": "2024-01-15T10:30:00Z", + "modified": "2024-06-20T14:45:00Z", + "author": "admin" + } + }, + + "edge_cases": { + "deep": { + "nesting": { + "level": { + "four": { + "value": "deep" + } + } + } + }, + "long_string": "Lorem ipsum dolor sit amet, consectetur adipiscing elit", + "special_key-with-dashes": "value", + "key.with.dots": "value", + "key with spaces": "value", + "escape_in_value": "Line 1\nLine 2\tTabbed" + } +} diff --git a/tests/syntax/samples/kotlin-report.md b/tests/syntax/samples/kotlin-report.md new file mode 100644 index 0000000000..924732a016 --- /dev/null +++ b/tests/syntax/samples/kotlin-report.md @@ -0,0 +1,99 @@ +Kotlin syntax highlighting: TS vs Legacy comparison report +=========================================================== + +Sample file: `kotlin.kt` +Legacy reference: `misc/syntax/kotlin.syntax` +TS query: `misc/syntax-ts/queries-override/kotlin-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[kotlin]` + +Aligned with legacy +------------------- + +- Hard keywords (`as`, `class`, `do`, `else`, `for`, `fun`, `if`, `in`, + `interface`, `is`, `object`, `return`, `super`, `this`, `throw`, `try`, + `typealias`, `val`, `var`, `when`, `while`): `yellow` - MATCH +- `package`, `import`: `brown` - MATCH +- Soft keywords (`by`, `catch`, `constructor`, `finally`, `get`, `init`, `set`, + `where`): `brightgreen` - MATCH +- Modifier keywords (`abstract`, `annotation`, `companion`, `const`, + `crossinline`, `data`, `enum`, `external`, `final`, `infix`, `inline`, + `inner`, `internal`, `lateinit`, `noinline`, `open`, `operator`, `out`, + `override`, `private`, `protected`, `public`, `sealed`, `suspend`, `tailrec`, + `vararg`): `brightmagenta` - MATCH +- Built-in types (`Double`, `Float`, `Long`, `Int`, `Short`, `Byte`, `Char`, + `Boolean`, `Array`, `String`, `ByteArray`): `brightred` - MATCH +- Strings (double-quoted `"..."`): `green` - MATCH +- Multiline strings (`"""..."""`): `green` - MATCH +- Character literals (`'A'`): `green` via `string.special` capture -> + `brightgreen` in colors.ini. Legacy also colors as `green`. Close MATCH. +- String interpolation (`$name`, `${e.message}`): legacy colors `$` references + as `brightgreen`, TS colors them similarly within string nodes. MATCH. +- Comments (line `//` and block `/* */`): legacy colors as `gray`, TS colors as + `brown`. See improvements section. +- Operators (`=`, `+=`, `-=`, `*=`, `/=`, `%=`, `==`, `!=`, `<`, `>`, `<=`, + `>=`, `&&`, `||`, `!`, `+`, `-`, `*`, `/`, `%`, `++`, `--`, `->`, `?:`, `..`, + `::`, `!!`): `brightcyan` - MATCH +- Delimiters (`.`, `,`, `:`): `brightcyan` - MATCH +- Semicolons `;`: `brightcyan` via `delimiter` capture. Legacy also colors as + `brightcyan`. MATCH. +- Labels (`loop@`, `@loop`): legacy colors as `brightcyan` via annotation/label + patterns. TS colors via `label` capture as `cyan`. Close match, slightly + different shade. +- Annotations (`@MyAnnotation`, `@Deprecated`): `brightcyan` - MATCH + +Intentional improvements over legacy +------------------------------------- + +- Comments: TS colors comments as `brown` (matching most other languages in MC). + Legacy colors Kotlin comments as `gray`, which is inconsistent with the rest + of the MC syntax highlighting convention. TS provides a more uniform + experience. +- `catch` keyword: legacy colors `catch` as `yellow` (hard keyword). TS colors + it as `brightgreen` (soft keyword via `type.builtin`), which is semantically + more accurate since `catch` is a soft keyword in Kotlin. +- `reified` modifier: TS colors it as `brightmagenta` (modifier keyword via + `keyword.control`). Legacy lists it under modifier keywords as + `brightmagenta`. MATCH. But TS captures it within the `inline fun ` + context structurally. +- `as?` safe cast operator: TS captures this as a single `brightcyan` operator. + Legacy colors `as?` as `yellow` (keyword `as?`). TS treats it as an operator + rather than keyword, which is more consistent with other operators. +- `null`, `true`, `false`: legacy colors these as `yellow` (hard keywords). TS + does not have explicit captures for these literals, so they appear as default + text in contexts where no keyword rule matches. However, `true` and `false` + appear as `yellow` in TS output because they are picked up by other grammar + rules. `null` similarly appears as `yellow`. +- `it` special identifier: legacy colors `it` as `brightgreen`. TS colors `it` + as `brightgreen` when the grammar recognizes it within lambda context. MATCH + in the lambda body. + +Known shortcomings +------------------ + +- `break`/`continue` keywords: legacy lists these as `yellow` hard keywords. TS + does not explicitly include `break` and `continue` in the keyword capture list + in the override query, but they are captured by the base grammar as keywords. + Both show as `yellow`. No actual difference. +- `!in` and `!is` compound operators: legacy colors these as `yellow` via + explicit keyword entries. TS handles `!` and `in`/`is` as separate tokens. The + `!` gets `brightcyan` (operator) and `in`/`is` gets `yellow` (keyword), + creating a split-color effect. Legacy colors the whole `!in`/`!is` as + `yellow`. +- `delegate`, `dynamic`, `field`, `file`, `param`, `property`, `receiver`, + `setparam`: legacy has these soft keywords colored as `brightgreen`. TS does + not include all of them in the soft keyword capture (only `by`, `catch`, + `constructor`, `finally`, `get`, `init`, `set`, `where`). Unused soft keywords + like `delegate` and `dynamic` appear uncolored in TS. +- `expect` modifier keyword: listed in the TS query under `keyword.control` + (`brightmagenta`). Legacy does not include `expect`. TS adds coverage for this + Kotlin Multiplatform keyword. +- `?` (nullable type marker) and `?.` (safe call): legacy colors `?` and `?.` as + `brightcyan`. TS does not explicitly capture these, so they appear as default + text in some contexts. The `?:` elvis operator is captured but standalone `?` + is not. +- Comment delimiters `//` and `/*`: in legacy output, the `//` appears + separately as `brightcyan` (from the delimiter keyword) before the comment + text in `gray`. In TS, the entire comment including `//` is colored as + `brown`. TS is cleaner. +- `$` and `_` standalone: legacy colors these as `brightcyan`. TS does not + explicitly capture standalone `$` or `_` characters. diff --git a/tests/syntax/samples/kotlin.kt b/tests/syntax/samples/kotlin.kt new file mode 100644 index 0000000000..89098a0e5b --- /dev/null +++ b/tests/syntax/samples/kotlin.kt @@ -0,0 +1,240 @@ +// Comment: demonstrate all Kotlin syntax highlighting features +// This file exercises every TS capture from kotlin-highlights.scm + +package com.example + +import java.util.Collections + +// Hard keywords: class, fun, if, else, for, while, when, return, val, var +class Animal(val name: String, var age: Int) { + + fun greet(): String { + return "Hello, $name" + } + + fun describe(): String { + if (age > 10) { + return "Old $name" + } else { + return "Young $name" + } + } +} + +// Hard keywords: object, interface, is, as, in, this, super, throw, try +object Singleton { + val instance = "singleton" +} + +interface Drawable { + fun draw() +} + +open class Shape : Drawable { + override fun draw() { + println("Drawing shape") + } +} + +class Circle(val radius: Double) : Shape() { + override fun draw() { + super.draw() + println("Drawing circle r=$radius") + } + + fun check(obj: Any) { + if (obj is String) { + println(obj as String) + } + if (obj !is Int) { + throw IllegalArgumentException("not int") + } + } +} + +// Hard keywords: do, while, for, in, when, typealias +typealias StringList = List + +fun loops() { + for (i in 1..10) { + if (i == 5) continue + if (i == 8) break + } + + var n = 0 + while (n < 10) { + n++ + } + + do { + n-- + } while (n > 0) + + when (n) { + 0 -> println("zero") + 1 -> println("one") + else -> println("other") + } +} + +// Soft keywords: by, catch, constructor, finally, get, init, set, where +class Config { + var value: Int = 0 + get() = field + set(v) { field = v } + + constructor(initial: Int) : this() { + value = initial + } + + init { + println("Config created") + } +} + +fun constrained(item: T) where T : Comparable { + println(item) +} + +fun tryCatch() { + try { + val x = 1 / 0 + } catch (e: ArithmeticException) { + println("caught: ${e.message}") + } finally { + println("done") + } +} + +// package and import -> brown (already at top) + +// Modifier keywords +abstract class Base { + abstract fun compute(): Int + open fun info() = "base" + protected fun helper() {} + internal fun restricted() {} +} + +data class Point(val x: Double, val y: Double) +enum class Direction { NORTH, SOUTH, EAST, WEST } +sealed class Result +annotation class MyAnnotation +companion object {} + +inline fun check(value: Any): Boolean = value is T + +class Outer { + inner class Inner { + fun access() = "inner" + } +} + +const val PI = 3.14159 +private val secret = "hidden" +public val visible = "shown" +final class Immutable + +suspend fun coroutine() {} +tailrec fun factorial(n: Int, acc: Int = 1): Int = + if (n <= 1) acc else factorial(n - 1, n * acc) + +infix fun Int.times(str: String) = str.repeat(this) +operator fun Point.plus(other: Point) = Point(x + other.x, y + other.y) + +fun varargExample(vararg items: Int) = items.sum() +fun crossinlineExample(crossinline block: () -> Unit) {} +fun noinlineExample(noinline block: () -> Unit) {} + +lateinit var lateValue: String +val lazyVal by lazy { "computed" } + +// Comments +/* Block comment + spanning multiple lines */ + +// Strings +fun strings() { + val s1 = "hello world" + val s2 = """ + multi + line + string + """.trimIndent() + val ch = 'A' +} + +// Annotations +@MyAnnotation +fun annotated() {} + +@Deprecated("use newMethod") +fun oldMethod() {} + +// Built-in types +fun types() { + val a: Double = 1.0 + val b: Float = 2.0f + val c: Long = 3L + val d: Int = 4 + val e: Short = 5 + val f: Byte = 6 + val g: Char = 'Z' + val h: Boolean = true + val i: Array = arrayOf(1, 2, 3) + val j: String = "text" + val k: ByteArray = byteArrayOf(1, 2, 3) +} + +// Labels +fun labeled() { + loop@ for (i in 1..10) { + for (j in 1..10) { + if (i + j > 15) break@loop + } + } +} + +// Operators +fun operators() { + var a = 10 + val b = 20 + val sum = a + b + val diff = a - b + val prod = a * b + val quot = a / b + val rem = a % b + val eq = a == b + val ne = a != b + val lt = a < b + val gt = a > b + val le = a <= b + val ge = a >= b + val and = true && false + val or = true || false + val not = !true + a += 1 + a -= 1 + a *= 2 + a /= 2 + a %= 3 + a++ + a-- + + // Special operators + val range = 1..10 + val elvis = null ?: "default" + val ref = String::class + val force = nullable!! + + // Arrow and safe cast + val fn: (Int) -> Int = { it * 2 } + val safe = obj as? String +} + +// Delimiters: dot, comma, colon, semicolons +fun delimiters() { + val x = listOf(1, 2, 3) + val y: Int = x.size + val z = 1; val w = 2 +} diff --git a/tests/syntax/samples/latex-report.md b/tests/syntax/samples/latex-report.md new file mode 100644 index 0000000000..daf31a84dc --- /dev/null +++ b/tests/syntax/samples/latex-report.md @@ -0,0 +1,70 @@ +LaTeX syntax highlighting: TS vs Legacy comparison report +=========================================================== + +Sample file: `latex.tex` +Legacy reference: `misc/syntax/latex.syntax` +TS query: `misc/syntax-ts/queries-override/latex-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[latex]` + +Aligned with legacy +------------------- + +- Line comments (`% ...`): `brown` (TS `comment`) - MATCH. +- Strings/paths in package includes: `green` (TS `string`) - MATCH. +- Math environments (`$...$`, `equation`, `displaymath`): `brightgreen` (TS + `markup.math`) - MATCH. +- `\begin`/`\end` commands: both use bright colors (legacy `brightred` for known + environments, `brightcyan` for others; TS `brightcyan` via `module`). +- Sectioning command names (`\section`, `\chapter`, `\part`): both highlight + prominently (legacy `brightred`; TS `brightcyan` via `module`). +- `\item`: highlighted distinctly (legacy `yellow`; TS `brightmagenta` via + `punctuation.special`). +- `\newcommand`/`\newenvironment` definitions: highlighted (legacy `cyan`; TS + `brightmagenta` via `function.macro`). +- `\label`, `\ref`, `\cite`: highlighted (legacy `yellow`; TS `brightmagenta` + for command, `yellow` for label/link target). +- Brackets `{}[]`: `brightcyan` (TS `punctuation.bracket`) - legacy also uses + `brightgreen` for `{}`. + +Intentional improvements over legacy +------------------------------------- + +- TS provides granular sectioning depth: section titles get `brightmagenta` + (`markup.heading.N`) with different heading levels (1-6), while legacy colors + all section titles the same `brightred`. +- TS distinguishes `\emph` (italic, `magenta`) from `\textbf` (bold, + `brightmagenta`); legacy colors both via generic command rules. +- TS highlights environment names (e.g., `document`, `itemize`, `equation`) in + `cyan` (`label`), separate from the `\begin`/`\end` command in `brightcyan` + (`module`). +- TS identifies `\href` URLs as `yellow` (`markup.link.url`), giving hyperlinks + distinct color. +- TS highlights operators (`=`, `^`, `_`) as `brightcyan` in math mode; legacy + has no operator distinction. +- TS highlights `\documentclass`, `\usepackage` as `yellow` (`keyword.import`) + with their arguments as `green` (`string`); legacy uses generic `brightcyan`. +- TS identifies math delimiters (`\left`, `\right`, `\frac`) distinctly as + `brightcyan` (`punctuation.delimiter`). +- TS highlights conditional commands (`\ifx`, `\else`, `\fi`) as `yellow` + (`keyword.conditional`); legacy uses generic `brightcyan`. +- TS colors `\author` content as heading text (`brightmagenta`), giving + title-page elements visual weight. +- TS colors key-value pairs with `lightgray` for parameter names + (`variable.parameter`). + +Known shortcomings +------------------ + +- TS does not reproduce the legacy `{\\bf ...}` context mode which colors + bold-braced content in `brightmagenta` as a continuous block. +- TS does not reproduce the legacy `{\\it ...}` context mode which colors + italic-braced content in `magenta` as a continuous block. +- Legacy `\\pagenumbering` context with keyword highlighting (`arabic`, `roman`, + etc.) inside braces is not specifically handled in TS. +- TS `\frametitle` requires the full beamer grammar context to work; in a plain + article document the frametitle capture may not trigger. +- Font size commands (`\tiny`, `\huge`, etc.) are colored as generic + `brightcyan` (`function`) in TS; legacy specifically colors them as `yellow`. +- TS colors the `%%!TeX` directive line as `brown` (`comment`); legacy does not + specifically distinguish it (also `brown` via `%` comment context, though the + `%%` keyword is `yellow`). diff --git a/tests/syntax/samples/latex.tex b/tests/syntax/samples/latex.tex new file mode 100644 index 0000000000..749fea983c --- /dev/null +++ b/tests/syntax/samples/latex.tex @@ -0,0 +1,133 @@ +% This is a line comment +%% !TeX program = lualatex + +\documentclass[12pt]{article} +\usepackage{amsmath} +\usepackage{graphicx} +\usepackage{hyperref} + +\newcommand{\mycommand}[2]{#1 and #2} +\newenvironment{myenv}{\begin{center}}{\end{center}} + +\title{A Sample Document} +\author{Jane Doe \and John Smith} +\date{\today} + +\begin{document} + +\maketitle +\tableofcontents + +\part{First Part} + +\chapter{Introduction} + +\section{Overview} +This is a paragraph of normal text with some \emph{emphasized words} +and some \textbf{bold text} for testing. + +\subsection{Details} +Here we reference Section~\ref{sec:math} and cite~\cite{knuth1984}. + +\subsubsection{Fine Details} +An item list: +\begin{itemize} + \item First item + \item[*] Second item with optional label +\end{itemize} + +\begin{enumerate} + \item Numbered one + \item Numbered two +\end{enumerate} + +\paragraph{A Named Paragraph} +Some text in a named paragraph. + +\subparagraph{A Subparagraph} +Even finer granularity. + +\section{Math Mode}\label{sec:math} +Inline math: $E = mc^2$ and $\alpha + \beta = \gamma$. + +Display math: +\begin{equation} + \int_{0}^{\infty} e^{-x^2} \, dx = \frac{\sqrt{\pi}}{2} +\end{equation} + +\begin{displaymath} + \sum_{n=1}^{N} n = \frac{N(N+1)}{2} +\end{displaymath} + +Operators and symbols: $a = b + c$, $x_1^{2}$, $\left( \frac{a}{b} \right)$. + +\section{Environments and Includes} +\begin{verbatim} +This is verbatim text, no formatting here. +\end{verbatim} + +\begin{center} +Centered text goes here. +\end{center} + +\begin{quote} +A quoted passage. +\end{quote} + +\begin{tabular}{|l|c|r|} + \hline + Left & Center & Right \\ + \hline + a & b & c \\ + \hline +\end{tabular} + +\section{References and Citations} +\label{sec:refs} +Cross-reference: see Section~\ref{sec:math}. +Bibliography entry: \cite{lamport1994}. +\bibliography{references} +\bibliographystyle{plain} + +\section{File Inclusion} +\input{preamble} +\include{chapter1} +\includegraphics[width=0.5\textwidth]{figure.png} + +\section{Definitions} +\newcommand{\highlight}[1]{\textbf{#1}} +\newenvironment{note}{\par\textbf{Note:}}{\par} + +% Conditional commands +\ifx\undefined\something + Fallback text. +\else + Alternative text. +\fi + +\section{Accents and Symbols} +\'{e} \`{a} \^{o} \"{u} \~{n} +Special: \dag\ \ddag\ \S\ \P\ \copyright\ \pounds + +\section{Hyperlinks} +\href{https://example.com}{Visit Example} +\url{https://example.com/path} + +\section{Beamer Frame} +\begin{frame} +{Frame Title Here} +Slide content goes in a frame. +\end{frame} + +\frametitle{Another Frame Title} + +\section{Font Sizes} +{\tiny tiny} {\scriptsize scriptsize} {\footnotesize footnotesize} +{\small small} {\normalsize normalsize} {\large large} +{\Large Large} {\LARGE LARGE} {\huge huge} {\Huge Huge} + +\section{Key-Value and Placeholders} +\setlength{\parindent}{0pt} +Text with a placeholder: #1. + +\end{document} diff --git a/tests/syntax/samples/lisp-report.md b/tests/syntax/samples/lisp-report.md new file mode 100644 index 0000000000..9bef72c7e4 --- /dev/null +++ b/tests/syntax/samples/lisp-report.md @@ -0,0 +1,66 @@ +Lisp syntax highlighting: TS vs Legacy comparison report +========================================================== + +Sample file: `lisp.lisp` +Legacy reference: `misc/syntax/lisp.syntax` +TS query: `misc/syntax-ts/queries-override/commonlisp-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[commonlisp]` + +Aligned with legacy +------------------- + +- Comments (`;` lines): `brown` (TS `comment`) - MATCH. +- Strings (`"..."`): `green` (TS `string`) - MATCH. +- Parentheses (`(`, `)`): `brightcyan` (TS `delimiter`) - MATCH. +- Keyword arguments (`:name`, `:age`): `white` (TS `keyword.other`) - MATCH. +- Quote/unquote (`'`, `,`, `#'`): `brightmagenta` (TS `constant.builtin`) - + MATCH. +- `nil`: `brightred` (TS `variable.builtin`) - MATCH. +- `defun`: `yellow` (TS `keyword`) - MATCH. +- Core functions used as first list element (`car`, `cdr`, `cons`, `format`, + `print`, `mapcar`, `apply`, `list`, `setq`, etc.): both highlight in a + distinct color (legacy `yellow` as keywords; TS `brightcyan` as `function` + calls). + +Intentional improvements over legacy +------------------------------------- + +- TS highlights `defmacro`, `defgeneric`, `defmethod` as `yellow` (`keyword`); + legacy only has `defun` in its keyword list. +- TS highlights function names in `defun` headers as `brightcyan` (`function`), + giving definitions visual prominence separate from the `defun` keyword. +- TS highlights parameter variables in lambda lists as `lightgray` (`variable`), + distinguishing them from function names. +- TS highlights numeric literals (`42`, `3.14`, `#xFF`, `#b1010`, `1.0e-5`) as + `lightgray` (`number`); legacy does not highlight numbers. +- TS highlights character literals (`#\A`, `#\Space`) as `brightgreen` + (`string.special`); legacy does not handle these. +- TS highlights block comments (`#| ... |#`) as `brown` (`comment`); legacy only + supports `;` line comments. +- TS highlights `loop` macro keywords (`for`, `when`, `into`, `finally`, + `return`) as `yellow` (`keyword`), providing structural awareness of the loop + macro; legacy has no loop keyword support. +- TS highlights `lambda` as `yellow` (`keyword`) via `defun_keyword`; legacy + colors it `brightred` (different but both highlighted). +- TS highlights symbols used as variables as `lightgray` (`variable`), providing + a visual baseline distinct from function calls. + +Known shortcomings +------------------ + +- TS does not highlight `t` as `brightred` like legacy does; instead `t` appears + as `lightgray` (default variable) except when it appears as the first element + of a list. Legacy treats both `t` and `nil` as special `brightred` keywords. +- TS does not specifically highlight `lambda` in `brightred` as legacy does; TS + uses `yellow` (`keyword`) which matches other keywords but loses the legacy + distinction between `lambda`/`nil`/`t` and regular keywords. +- `&body` and `&key` parameter markers show as `lightgray` (`variable`) in TS; + legacy colors `&`-prefixed tokens as `white`. +- Some function calls like `cond`, `unless`, `when`, `null`, `let`, `let*`, + `if`, `do`, `and`, `or` that legacy highlights as `yellow` keywords only + appear as `brightcyan` (`function`) in TS when used as the first list element, + not as `yellow` keywords (except those explicitly listed in the keyword list + like `if`, `when`, `unless`, `do`, `and`, `loop`, `defun`). +- TS `#'` (function quote) in `#'car` colors `#'` as `brightmagenta` but the + following symbol differently than legacy, which colors the entire `#'` token + as `brightmagenta` and the following symbol normally. diff --git a/tests/syntax/samples/lisp.lisp b/tests/syntax/samples/lisp.lisp new file mode 100644 index 0000000000..2b9f3d8c77 --- /dev/null +++ b/tests/syntax/samples/lisp.lisp @@ -0,0 +1,128 @@ +;;; Common Lisp sample file for syntax highlighting +;; Exercises all TS captures from commonlisp-highlights.scm + +;; defun with lambda list +(defun factorial (n) + "Compute factorial of N." + (if (<= n 1) + 1 + (* n (factorial (- n 1))))) + +;; defmacro +(defmacro when-positive (x &body body) + `(when (> ,x 0) + ,@body)) + +;; defgeneric and defmethod +(defgeneric describe-object (obj)) +(defmethod describe-object ((obj string)) + (format t "String: ~a~%" obj)) + +;; let and let* +(let ((x 10) + (y 20)) + (+ x y)) + +(let* ((a 5) + (b (* a 2))) + (print b)) + +;; cond expression +(defun classify (n) + (cond + ((< n 0) 'negative) + ((= n 0) 'zero) + (t 'positive))) + +;; lambda +(mapcar (lambda (x) (* x x)) '(1 2 3 4 5)) + +;; car, cdr, cons +(defun first-and-rest (lst) + (cons (car lst) (cdr lst))) + +(car '(a b c)) +(cdr '(a b c)) +(cadr '(a b c)) + +;; do loop +(do ((i 0 (+ i 1))) + ((>= i 10) nil) + (print i)) + +;; loop macro +(loop for i from 1 to 10 + when (evenp i) + collect i into evens + finally (return evens)) + +;; Keyword arguments +(defun make-person (&key name age) + (list :name name :age age)) + +(make-person :name "Alice" :age 30) + +;; nil and t literals +(if nil + (print "never") + (print "always")) + +(and t t nil) +(or nil nil t) + +;; unless and when +(unless (null '(1 2 3)) + (print "not null")) + +(when (> 5 3) + (print "five is greater")) + +;; Quoting and unquoting +'(a b c) +`(a ,x ,@rest) +#'car + +;; Character literals +#\A +#\Space +#\Newline + +;; Numbers +42 +3.14 +-17 +1/3 +#xFF +#b1010 +1.0e-5 + +;; Strings +"Hello, World!" +"Escaped \"quotes\" here" +"Multi-word string value" + +;; Nested function calls +(format t "Result: ~a~%" (+ (* 2 3) (- 10 4))) + +;; setq and multiple values +(setq *global-var* 100) +(defvar *counter* 0) + +;; apply and funcall +(apply #'+ '(1 2 3)) +(funcall #'car '(a b)) + +;; Block comment +#| +This is a block comment. +It can span multiple lines. +|# + +;; Deeply nested expressions +(defun deep-example () + (let ((result (mapcar (lambda (x) + (if (> x 0) + (* x 2) + (- x))) + '(-3 -1 0 1 3)))) + (format t "~a~%" result))) diff --git a/tests/syntax/samples/lua-report.md b/tests/syntax/samples/lua-report.md new file mode 100644 index 0000000000..5bc7dc8423 --- /dev/null +++ b/tests/syntax/samples/lua-report.md @@ -0,0 +1,72 @@ +Lua syntax highlighting: TS vs Legacy comparison report +======================================================= + +Sample file: `tests/syntax/samples/lua.lua` +Legacy reference: `misc/syntax/lua.syntax` +TS query: `misc/syntax-ts/queries-override/lua-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[lua]` + +Aligned with legacy +------------------- + +- Keywords (`and`, `break`, `do`, `else`, `elseif`, `end`, `for`, `function`, + `goto`, `if`, `in`, `local`, `not`, `or`, `repeat`, `return`, `then`, `until`, + `while`) -> `white` - MATCH. +- Constants (`true`, `false`, `nil`) -> `white` - MATCH. +- Operators (`=`, `==`, `~=`, `<=`, `>=`, `<`, `>`, `+`, `-`, `*`, `/`, `//`, + `%`, `^`, `#`, `..`) -> `white` - MATCH. +- Brackets/parens/braces (`(`, `)`, `{`, `}`, `[`, `]`) -> `white` - MATCH. +- Delimiters (`.`, `,`, `;`, `:`, `::`) -> `white` - MATCH. +- Double-quoted strings -> `green` - MATCH. +- Single-quoted strings -> `green` - MATCH. +- Comments (`--`) -> `brown` - MATCH. +- Block comments (`--[[...]]`, `--[=[...]=]`) -> `brown` - MATCH. +- Library function calls (`print`, `type`, `tostring`, `tonumber`, `assert`, + `error`, `pcall`, `xpcall`, `rawget`, `rawset`, `rawequal`, `setmetatable`, + `getmetatable`, `require`, `ipairs`, `pairs`, `next`, `collectgarbage`, + `loadfile`, `dofile`) -> `yellow` - MATCH. +- Dot-access library calls (`string.format`, `string.len`, `string.sub`, + `table.concat`, `table.insert`, `math.abs`, `math.floor`, `io.read`, + `io.write`, `os.clock`, `os.time`) -> `yellow` - MATCH. +- Global variables (`_VERSION`, `_G`) -> `brightmagenta` via `constant.builtin` + - MATCH. +- Default context (identifiers, numbers) -> `lightgray` - MATCH. +- Labels (`::done::`) -> identifier shown but `::` delimiters as `white` in + both. + +Intentional improvements over legacy +------------------------------------- + +- TS highlights the `goto` keyword as `white` and its target label identifier as + `cyan` via `@label`, while legacy does not distinguish the label target. +- TS highlights label definitions (`::done::`) with `cyan` for the identifier, + while legacy shows them in default color. +- TS colors function definitions (`function greet(name)`, + `local function add(a, b)`) with function name as `yellow`, matching library + function coloring. +- TS properly handles `break` as a statement node (`break_statement`) with + `white`, while legacy only handles it as a keyword token. +- TS structurally understands function calls vs function definitions, providing + accurate highlighting even for user-defined functions. +- TS handles escape sequences in strings (`\n`) showing them distinctly, while + legacy uses `brightgreen` for escape sequences inside strings -- both provide + visibility but legacy has slightly different color. + +Known shortcomings +------------------ + +- TS colors long strings (`[[...]]`, `[==[...]==]`) the same as comments + (`brown` for delimiters), while legacy colors them as `brightmagenta` -- this + is a mismatch where legacy's `brightmagenta` for long strings is lost in TS + (both the delimiters and content appear differently). +- TS does not highlight `%` as `white` operator when used as modulo -- it + appears uncolored in `7 % 3` while other operators are correctly colored (this + appears to be a parser issue where `%` is not matched). +- Legacy highlights escape sequences inside strings (`%d`, `%s`, `\n`, etc.) as + `brightgreen`, while TS does not have explicit escape sequence highlighting + for Lua strings. +- TS does not highlight the shebang line (`#!/usr/bin/env lua`) distinctly -- it + shows as `brown` (comment), while legacy also uses `brown`. +- TS handles `goto` keyword coloring (`white`) but the target identifier after + `goto` gets `cyan` label color which differs from legacy's plain handling -- + this is more of a style difference than a shortcoming. diff --git a/tests/syntax/samples/lua.lua b/tests/syntax/samples/lua.lua new file mode 100644 index 0000000000..209ca66455 --- /dev/null +++ b/tests/syntax/samples/lua.lua @@ -0,0 +1,196 @@ +#!/usr/bin/env lua +-- Sample file demonstrating Lua syntax highlighting features. +-- Exercises all captures from the TS query override. + +-- Keywords -> white +local x = 10 +local y = 20 + +-- Constants -> white +local a = true +local b = false +local c = nil + +-- Function definitions -> yellow +function greet(name) + return "Hello, " .. name +end + +local function add(a, b) + return a + b +end + +-- Control flow +if x == 10 then + print("ten") +elseif x > 10 then + print("more") +else + print("less") +end + +-- While loop +while x > 0 do + x = x - 1 +end + +-- Repeat/until loop +repeat + y = y - 1 +until y == 0 + +-- For loops +for i = 1, 10 do + if i == 5 then + break + end +end + +for i = 10, 1, -1 do + -- counting down +end + +for k, v in pairs({a = 1, b = 2}) do + print(k, v) +end + +for i, v in ipairs({10, 20, 30}) do + print(i, v) +end + +-- Goto and labels +goto done + +::done:: +print("reached label") + +-- Logical operators +local r1 = true and false +local r2 = true or false +local r3 = not true + +-- Operators -> white +local sum = 1 + 2 +local diff = 5 - 3 +local prod = 4 * 2 +local quot = 10 / 2 +local idiv = 10 // 3 +local mod = 7 % 3 +local pow = 2 ^ 8 +local len = #"hello" + +-- Comparison operators +local eq = (1 == 2) +local ne = (1 ~= 2) +local lt = (1 < 2) +local gt = (1 > 2) +local le = (1 <= 2) +local ge = (1 >= 2) + +-- String concatenation +local str = "hello" .. " " .. "world" + +-- Brackets/parens/braces -> white +local tbl = {1, 2, 3} +local nested = {{1, 2}, {3, 4}} +local val = tbl[1] + +-- Delimiters -> white +local t = { + a = 1, + b = 2; + c = 3 +} + +-- Dot, colon access +local s = string.format("%d", 42) +local f = io.open("file.txt", "r") + +-- Method call with colon +local str2 = "hello" + +-- Strings -> green +local s1 = "double quoted\n" +local s2 = 'single quoted\n' +local s3 = [[ +long string +with multiple lines +]] +local s4 = [==[ +another long string +with equals +]==] + +-- Library functions -> yellow (function calls) +print("hello") +type(42) +tostring(42) +tonumber("42") +assert(true) +error("oops") +pcall(print, "safe") +xpcall(print, print) +rawget(t, "a") +rawset(t, "d", 4) +rawequal(1, 1) +setmetatable(t, {}) +getmetatable(t) +require("os") +ipairs(tbl) +pairs(t) +next(t) +collectgarbage() +loadfile("test.lua") +dofile("test.lua") + +-- Library functions with dot access -> yellow +string.len("hello") +string.sub("hello", 1, 3) +string.lower("HELLO") +string.upper("hello") +string.format("%s %d", "age", 25) +string.find("hello", "ell") +string.gsub("hello", "l", "r") +string.byte("A") +string.char(65) +string.rep("ab", 3) + +table.concat(tbl, ", ") +table.insert(tbl, 4) +table.remove(tbl, 1) +table.sort(tbl) + +math.abs(-5) +math.floor(3.7) +math.ceil(3.2) +math.sqrt(16) +math.sin(0) +math.cos(0) +math.random() +math.max(1, 2, 3) +math.min(1, 2, 3) + +io.read() +io.write("output") + +os.clock() +os.time() +os.date() +os.exit(0) + +-- Global variables -> brightmagenta +print(_VERSION) +print(type(_G)) + +-- Comments +-- Single line comment + +--[[ +Multi-line +block comment +]] + +--[=[ +Another style of +long comment +]=] diff --git a/tests/syntax/samples/mail-report.md b/tests/syntax/samples/mail-report.md new file mode 100644 index 0000000000..61739d2de2 --- /dev/null +++ b/tests/syntax/samples/mail-report.md @@ -0,0 +1,75 @@ +Mail syntax highlighting: TS vs Legacy comparison report +========================================================= + +Sample file: `sample.mail` +Legacy reference: `misc/syntax/mail.syntax` +TS query: `misc/syntax-ts/queries-override/mail-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[mail]` + +Aligned with legacy +------------------- + +- `From ` envelope line: `brightred` for the `From` keyword - MATCH. Legacy uses + `keyword linestart From\s brightred`. TS would use `(header_email)` mapped to + `@type.builtin` = `brightgreen` for `From:` header specifically (see below). +- `From:` header: `brightgreen` - MATCH. Legacy uses `keyword linestart From: + brightgreen`. +- `To:` header: `brightmagenta` - would match if TS were active. Legacy uses + `keyword linestart To: brightmagenta`. +- `Subject:` header: Legacy uses `brightred/Orange` (brightred foreground on + orange background). TS maps `(header_subject)` to `@tag` = `brightcyan`. +- Email addresses in angle brackets (``, + ``): `white` - MATCH. Legacy uses `keyword <*@*> white`. + TS uses `(email)` mapped to `@string` = `green`. +- Other headers (`Date:`, `Message-ID:`, `MIME-Version:`, `Content-Type:`, + `X-Mailer:`, etc.): `brown` in legacy via `keyword linestart +: brown`. TS + maps `(header_other)` to `@markup.environment` = `brightcyan`. +- Header context body text: `cyan` in legacy (the entire header area context is + cyan). TS does not color non-header body text. +- Quoted text (`> ...`): `brightgreen` for odd levels, `brightred` for even + levels - legacy context patterns handle nested quoting. TS does not appear to + parse quoted text. + +Intentional improvements over legacy +------------------------------------- + +- The TS grammar would provide structured parsing of email headers via distinct + node types (`header_email`, `header_subject`, `header_other`), whereas legacy + uses repeated context blocks for each starting header pattern. +- TS would distinguish email addresses semantically via the `(email)` node type + rather than relying on glob patterns like `<*@*>`. + +Known shortcomings +------------------ + +- The TS engine FAILED to initialize for the `sample.mail` file and fell back to + legacy mode. The output from `--ts` mode is identical to `--legacy` mode. This + is because the `mail` grammar has NO extension or filename mapping in + `misc/syntax-ts/extensions` or `misc/syntax-ts/filenames`. The legacy engine + entry in `misc/syntax/Syntax.in` uses the filename pattern `Don_t_match_me` + (intentionally unmatchable) and relies solely on first-line content matching + (`^(From|Return-(P|p)ath:|From:|Date:)\s`). Mail files have no standard file + extension -- they are identified by content, not by name. Adding a `.mail` or + `.eml` extension mapping would be arbitrary and might conflict with other + uses. This is a fundamental limitation: the TS engine currently supports only + extension and filename matching, not content-based detection. Until the TS + engine gains first-line content matching support, the mail grammar cannot be + activated via TS. +- Because the TS engine cannot be tested, the following captures from the query + override remain unverified: - `(header_email)` -> `@type.builtin` + (`brightgreen`) - `(email)` -> `@string` (`green`) - `(header_subject)` -> + `@tag` (`brightcyan`) - `(header_other)` -> `@markup.environment` + (`brightcyan`) +- The legacy engine colors `Subject:` as `brightred` on orange background, while + TS would use `brightcyan` (via `@tag`). This is a color difference, not an + alignment. +- Neither engine highlights the message body text, email signatures, or URLs in + the body. +- The legacy engine uses alternating `brightgreen`/`brightred` for nested quote + levels (odd/even). The TS grammar does not appear to have captures for quoted + reply lines. +- Bare email addresses in body text (`john@example.com`) are matched by legacy + (`keyword whole +@+ white`) but have no corresponding TS capture. +- The `Return-Path:` header is handled by a dedicated context in legacy but + would fall under `(header_other)` in TS, which changes its color from the + legacy-specific treatment. diff --git a/tests/syntax/samples/mail.mail b/tests/syntax/samples/mail.mail new file mode 100644 index 0000000000..38445ce4c0 --- /dev/null +++ b/tests/syntax/samples/mail.mail @@ -0,0 +1,65 @@ +From user@example.com Tue Mar 25 10:30:00 2025 +Return-Path: +From: John Doe +To: Jane Smith +Subject: Re: Meeting tomorrow at 3pm +Date: Tue, 25 Mar 2025 10:30:00 -0400 +Message-ID: +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 7bit +X-Mailer: Thunderbird 128.0 +X-Priority: 3 +References: +In-Reply-To: +List-Unsubscribe: +Received: from smtp.example.com (192.168.1.10) + by mx.example.com with ESMTP id abc123 + +Hi Jane, + +Thanks for the reminder about the meeting. I wanted to +follow up on a few items before we get together. + +> Could you bring the quarterly report? +> I also need the budget spreadsheet. + +Sure, I will bring both documents. I have updated the +quarterly numbers as of last Friday. + +>> The original plan was to meet on Monday. +>> But we moved it to Tuesday. +> Yes, Tuesday works better for everyone. + +I agree completely. Tuesday is much better. + +> > > This is a triple-nested quote. +> > Second level quote. +> First level quote. + +> Here is another single-level quote to make +> sure we cover enough quoting scenarios in +> the sample file for testing purposes. + +Also, I wanted to mention that the project timeline +has been updated. Please review the attached document +before the meeting. + +The new deadline for Phase 1 is April 15th, and we +need to have all deliverables ready by then. + +Let me know if you have any questions or concerns +about the updated schedule. + +Best regards, +John Doe +john@example.com + +-- +John Doe +Senior Developer +Example Corp +Phone: +1-555-0123 +Fax: +1-555-0124 +Web: https://example.com +Email: john@example.com diff --git a/tests/syntax/samples/markdown-report.md b/tests/syntax/samples/markdown-report.md new file mode 100644 index 0000000000..b9380a586f --- /dev/null +++ b/tests/syntax/samples/markdown-report.md @@ -0,0 +1,92 @@ +Markdown syntax highlighting: TS vs Legacy comparison report +============================================================= + +Sample file: `markdown.md` +Legacy reference: `misc/syntax/markdown.syntax` +TS query: `misc/syntax-ts/queries-override/markdown-highlights.scm` +TS inline query: +`misc/syntax-ts/queries-override/markdown_inline-highlights.scm` +TS injections: +`misc/syntax-ts/queries-override/markdown-injections.scm` +TS colors: `misc/syntax-ts/colors.ini` `[markdown]` and `[markdown_inline]` + +Aligned with legacy +------------------- + +- Headings `#` through `###` (marker and content): `brightred` - MATCH +- Inline code spans (`` `code` ``): `cyan` - MATCH +- Indented code blocks (4-space indent): `cyan` - MATCH +- Fenced code block delimiters (`` ``` ``): `cyan` - MATCH + +Intentional improvements over legacy +------------------------------------- + +- Headings `####` through `######`: TS colors all heading levels as `brightred` + uniformly. Legacy uses `red` for `####`+ and `brightred` for `#`-`###`. The + distinction is arbitrary and TS's uniform treatment is cleaner. +- Setext headings (`===`/`---` underlines): TS colors both the content and the + underline as `brightred`. Legacy does not recognize setext headings at all + (they appear as DEFAULT). +- Bold (`**...**`, `__...__`): TS colors the entire node including content as + `brightmagenta`. Legacy only colors the `**`/`__` markers as `white` and + leaves content as DEFAULT. TS's full-node coloring is more readable and + clearly marks the extent of bold text. +- Italic (`*...*`, `_..._`): TS colors the entire node including content as + `magenta`. Legacy only colors the `*`/`_` markers as `yellow` and leaves + content as DEFAULT. Same rationale as bold. +- Bold italic (`***...***`, `___...___`): TS correctly nests bold inside italic + (or vice versa) with both `brightmagenta` and `magenta` visible. Legacy + partially handles this with `**` as `white` and `*` as `yellow` but the + content is DEFAULT. +- Strikethrough (`~~...~~`): TS colors as `brightcyan`. Legacy does not + recognize strikethrough at all. +- Code span content: TS colors the entire span (delimiters + content) as `cyan`. + Legacy only colors the backtick delimiters as `cyan` and content as DEFAULT. + TS's approach is more consistent and clearly marks the full extent of inline + code. +- Fenced code block language injection: TS parses fenced code block content + with the appropriate language grammar (e.g. Python inside + `` ```python ``), providing full syntax highlighting within code blocks. + Legacy colors the entire block uniformly as `cyan` regardless of language. +- Fenced code block info string + (`` ```python ``): TS colors the language identifier as `cyan` alongside the + delimiters. Legacy does not distinguish the info string. +- Blockquote markers (`>`): TS colors the `>` marker as `green`. Legacy has a + `context linestart > \n green` rule but it only activates for the `>` + character itself; the rest of the line is inconsistently handled. +- Links (`[text](url)`, `[ref]`, `![alt](url)`): TS colors entire link + constructs as `yellow`, including reference links, images, autolinks, and + email autolinks. Legacy has a limited `[*](*)` keyword pattern that only + matches simple inline links. +- Escape sequences (`\*`, `\_`, `` \` ``, `\\`, etc.): TS colors all backslash + escapes as `brightgreen`. Legacy only partially handles `\_` (as DEFAULT) and + `\*` (triggers the `*` keyword pattern). +- Entity references (`&`, `*`): TS colors as `brightgreen`. Legacy does + not recognize entity references. +- HTML blocks (`
...
`): TS colors as `brightred` with HTML injection + for full tag highlighting. Legacy does not recognize HTML blocks. +- Inline HTML tags (``, ``): TS colors tags as `brightred`. Legacy does + not recognize inline HTML. +- Thematic breaks (`---`, `***`, `___`): TS colors as `brightcyan`. Legacy does + not recognize thematic breaks. +- List markers (`-`, `+`, `*`, `1.`, `3)`): TS colors as `lightgray` (matching + DEFAULT). Legacy does not distinguish list markers. +- Table cells: TS applies `markdown_inline` injection to pipe table cells, + enabling inline formatting (`*italic*`, `**bold**`, `` `code` ``) inside + tables. Legacy does not recognize tables. + +Known shortcomings +------------------ + +- Fenced code blocks without a language identifier: TS only colors the + `` ``` `` delimiters as `cyan` and leaves the content as DEFAULT. Legacy + colors the entire block (delimiters + content) as `cyan`. + This is a tradeoff of the injection approach -- without a language, no + injection occurs and content is uncolored. +- Blockquote content beyond the `>` marker is not colored. TS only colors the + `>` marker itself as `green`. The content within blockquotes gets inline + injection for formatting but has no blockquote-specific background or + foreground color. +- Nested blockquote markers on continuation lines (second `>` in `> > text`) are + colored, but the first `>` on those lines may not be since tree-sitter + attributes it to the outer blockquote structure rather than a marker node. diff --git a/tests/syntax/samples/markdown.md b/tests/syntax/samples/markdown.md new file mode 100644 index 0000000000..b170ca77d2 --- /dev/null +++ b/tests/syntax/samples/markdown.md @@ -0,0 +1,91 @@ +# Heading level 1 + +## Heading level 2 + +### Heading level 3 + +#### Heading level 4 + +##### Heading level 5 + +###### Heading level 6 + +Setext Heading Level 1 +====================== + +Setext Heading Level 2 +---------------------- + +Regular paragraph text with normal coloring. + +*Italic text* and _also italic_ here. + +**Bold text** and __also bold__ here. + +***Bold italic*** and ___also bold italic___ here. + +~~Strikethrough text~~ here. + +Escaped special characters: \* \_ \` \\ \[ \] \# \! + +> Blockquote text +> spanning multiple lines + +> Nested blockquote +> > Inner blockquote + +- Unordered list with minus ++ Unordered list with plus +* Unordered list with star + +1. Ordered list item +2. Another ordered item +3) Parenthesis style + +Inline `code span` here. + +Inline ``code with `backtick` inside`` here. + + Indented code block line 1 + Indented code block line 2 + +``` +Fenced code block without language +with multiple lines +``` + +```python +def hello(): + print("Hello, World!") +``` + +--- + +*** + +___ + +[Link text](http://example.com) + +[Link with title](http://example.com "Example Title") + +[Reference link][ref1] + +[ref1]: http://example.com + +![Image alt text](http://example.com/image.png) + + + + + +
Inline HTML block
+ +Paragraph with inline HTML tags inside. + +| Column 1 | Column 2 | Column 3 | +|-----------|----------|----------| +| cell 1 | cell 2 | cell 3 | +| *italic* | **bold** | `code` | + +Text with & entity reference and * numeric reference. diff --git a/tests/syntax/samples/matlab-report.md b/tests/syntax/samples/matlab-report.md new file mode 100644 index 0000000000..7eda6576c1 --- /dev/null +++ b/tests/syntax/samples/matlab-report.md @@ -0,0 +1,41 @@ +MATLAB syntax highlighting: TS vs Legacy comparison report +========================================================== + +Sample file: `matlab.m` +Legacy reference: `misc/syntax/octave.syntax` +TS query: `misc/syntax-ts/queries-override/matlab-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[matlab]` + +Aligned with legacy +------------------- + +- Control flow keywords (`if`, `else`, `elseif`, `end`, `for`, `while`, + `switch`, `case`, `otherwise`, `try`, `catch`, `return`, `continue`, + `break`, `function`, `endfunction`, `parfor`, `spmd`): `white` - MATCH +- Comments (`%`): `brown` - MATCH +- Strings (single-quoted): `green` - MATCH +- Operators (`+`, `-`, `*`, `/`, `=`, `<=`, `>=`, `==`, `~=`, `<`, `>`, + `&&`, `||`, `.*`, `./`, `.^`): `brightcyan` - MATCH +- Delimiters (`(`, `)`, `[`, `]`, `{`, `}`, `,`, `;`): `brightcyan` - + MATCH +- Built-in function names (`disp`, `fprintf`, `sprintf`, `sqrt`, `mod`): + `yellow` - MATCH (via legacy keyword list) +- Class keywords (`classdef`, `properties`, `methods`, `events`, + `enumeration`, `global`, `persistent`, `arguments`): handled + identically in both engines. + +TS and Legacy produce identical output for the sample file. The +highlighting is a perfect match across all token categories. + +Known shortcomings +------------------ + +- Format specifiers (`%s`, `%d`, `%f`) inside strings are not colored. + Neither engine distinguishes format specifiers from string content. +- Double-quoted strings (`"..."`) are colored as `green` by TS but the + legacy octave.syntax treats them the same way, so this is consistent. +- Block comments (`%{ ... %}`) are colored as `brown` in both engines. +- The `classdef`, `properties`, `events`, and `enumeration` keywords are + not colored as `white` by TS because they are not in the keyword list. + Legacy also leaves them uncolored. Both engines only color `methods` + as `yellow` (via the built-in function name list). diff --git a/tests/syntax/samples/matlab.m b/tests/syntax/samples/matlab.m new file mode 100644 index 0000000000..546ecd6034 --- /dev/null +++ b/tests/syntax/samples/matlab.m @@ -0,0 +1,140 @@ +% MATLAB sample: demonstrate all syntax features + +% Function definition +function result = fibonacci(n) + if n <= 1 + result = n; + else + result = fibonacci(n - 1) + fibonacci(n - 2); + end +end + +% Class definition +classdef MyClass + properties + Name string + Value double = 0 + end + + methods + function obj = MyClass(name, val) + obj.Name = name; + obj.Value = val; + end + + function disp(obj) + fprintf('%s: %f\n', obj.Name, obj.Value); + end + end + + events + DataChanged + end + + enumeration + Red, Green, Blue + end +end + +% Control flow +for i = 1:10 + if mod(i, 2) == 0 + continue; + elseif i > 7 + break; + end + disp(i); +end + +% While loop +x = 100; +while x > 1 + x = x / 2; +end + +% Switch statement +color = 'red'; +switch color + case 'red' + disp('Red'); + case 'blue' + disp('Blue'); + otherwise + disp('Other'); +end + +% Try/catch +try + result = 1 / 0; +catch e + disp(e.message); +end + +% Parallel for +parfor i = 1:100 + data(i) = sqrt(i); +end + +% Matrix operations +A = [1, 2, 3; 4, 5, 6; 7, 8, 9]; +B = A'; +C = A * B; +D = A .* B; +E = A ./ B; +F = A .^ 2; + +% Cell array +cells = {'hello', 42, [1 2 3]}; +cells{1} + +% Struct +s.name = 'test'; +s.value = 3.14; + +% Logical operators +if (x > 0) && (x < 100) + flag = true; +elseif (x <= 0) || (x >= 100) + flag = false; +end + +% Comparison operators +a = (1 == 1); +b = (1 ~= 2); +c = (3 > 2); +d = (1 < 2); +e = (2 >= 2); +f = (1 <= 2); + +% String types +str1 = 'single quoted string'; +str2 = "double quoted string"; +str3 = sprintf('formatted: %d %s %.2f', 42, 'hello', 3.14); + +% Global and persistent +global sharedVar; +persistent cachedData; + +% Function arguments block +function out = validate(x, y) + arguments + x (1,1) double {mustBePositive} + y (1,1) double = 1 + end + out = x + y; +end + +% Comments +% Line comment +%{ + Block comment + spanning multiple lines +%} + +% Escape sequences in strings +msg = sprintf('line1\nline2\ttab\\backslash'); + +% SPMD block +spmd + data = labindex * 10; +end diff --git a/tests/syntax/samples/meson.build b/tests/syntax/samples/meson.build new file mode 100644 index 0000000000..e97084713b --- /dev/null +++ b/tests/syntax/samples/meson.build @@ -0,0 +1,164 @@ +# Meson build file sample + +# Project definition +project( + 'sample-project', + 'c', 'cpp', + version : '1.0.0', + license : 'MIT', + default_options : [ + 'c_std=c11', + 'cpp_std=c++17', + 'warning_level=3', + ], +) + +# Comments use hash +# Built-in objects +compiler = meson.get_compiler('c') +build_type = meson.get_option('buildtype') +src_dir = meson.current_source_dir() +bld_dir = meson.current_build_dir() +proj_name = meson.project_name() +proj_ver = meson.project_version() +is_cross = meson.is_cross_build() +host_sys = host_machine.system() +host_cpu = host_machine.cpu_family() +build_sys = build_machine.system() + +# Boolean values +debug_mode = true +release_mode = false + +# String operations +greeting = 'Hello, World!' +multiline = '''This is a +multi-line string''' +name = 'meson' +upper = name.to_upper() + +# Dependencies +glib_dep = dependency('glib-2.0', required : true) +thread_dep = dependency('threads') +math_dep = compiler.find_library('m', required : false) +zlib_dep = dependency('zlib', required : false) + +# Source files +sources = files( + 'src/main.c', + 'src/utils.c', + 'src/parser.c', +) + +extra_sources = files('src/extra.c') + +# Include directories +inc = include_directories('include', 'src') + +# Conditionals +if debug_mode + add_project_arguments('-DDEBUG', language : 'c') + message('Debug mode enabled') +elif build_type == 'release' + add_project_arguments('-DNDEBUG', language : 'c') + message('Release mode') +else + warning('Unknown build type') +endif + +# Logical operators +if debug_mode and not release_mode + message('Development build') +endif + +if host_sys == 'linux' or host_sys == 'freebsd' + message('Unix-like system') +endif + +# Foreach loop +test_names = ['unit', 'integration', 'functional'] +foreach t : test_names + test_src = files('tests' / t + '_test.c') + test_exe = executable( + t + '_test', + test_src, + dependencies : [glib_dep], + include_directories : inc, + ) + test(t, test_exe) +endforeach + +# Dictionary +config_data = { + 'version' : proj_ver, + 'prefix' : get_option('prefix'), + 'debug' : debug_mode, +} + +# Configuration +conf = configuration_data() +conf.set('VERSION', proj_ver) +conf.set('PACKAGE', proj_name) +conf.set10('HAVE_ZLIB', zlib_dep.found()) + +configure_file( + input : 'config.h.in', + output : 'config.h', + configuration : conf, +) + +# Main executable +main_exe = executable( + 'sample-app', + sources, + dependencies : [glib_dep, thread_dep, math_dep], + include_directories : inc, + install : true, +) + +# Library targets +shared_library( + 'sample-shared', + sources, + dependencies : [glib_dep], + install : true, + version : proj_ver, +) + +static_library( + 'sample-static', + sources, + dependencies : [glib_dep], +) + +# Subdir +subdir('docs') +subdir('tests') + +# Custom target +custom_target( + 'generate-docs', + output : 'docs.html', + command : [find_program('doxygen'), '@INPUT@'], + input : 'Doxyfile', + install : true, + install_dir : get_option('datadir') / 'doc', +) + +# Run command +result = run_command('pkg-config', '--version', check : true) +message('pkg-config version: ' + result.stdout().strip()) + +# Install data +install_data('data/config.ini', install_dir : 'etc') +install_headers('include/sample.h') + +# Summary +summary( + { + 'Build type' : build_type, + 'Host system' : host_sys, + 'Debug mode' : debug_mode, + }, + section : 'Configuration', +) diff --git a/tests/syntax/samples/meson.build-report.md b/tests/syntax/samples/meson.build-report.md new file mode 100644 index 0000000000..b8385b49e9 --- /dev/null +++ b/tests/syntax/samples/meson.build-report.md @@ -0,0 +1,69 @@ +Meson syntax highlighting: TS vs Legacy comparison report +========================================================== + +Sample file: `meson.build` +Legacy reference: `misc/syntax/meson.syntax` +TS query: `misc/syntax-ts/queries-override/meson-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[meson]` + +Aligned with legacy +------------------- + +- Control flow keywords (`if`, `elif`, `else`, `endif`, `foreach`, `endforeach`, + `and`, `or`, `not`): `yellow` - MATCH +- Boolean literals (`true`, `false`): `yellow` - MATCH +- Single-quoted strings (`'...'`): `green` - MATCH +- Comments (`# ...`): `brown` - MATCH +- Function calls (`project`, `dependency`, `executable`, `files`, + `include_directories`, `message`, `warning`, `add_project_arguments`, + `configuration_data`, `configure_file`, `shared_library`, `static_library`, + `subdir`, `custom_target`, `find_program`, `get_option`, `run_command`, + `install_data`, `install_headers`, `test`, `summary`): `white` - MATCH + +Intentional improvements over legacy +------------------------------------- + +- Dictionary keys (`version`, `license`, `default_options`, `required`, + `language`, `dependencies`, `include_directories`, `input`, `output`, + `configuration`, `install`, `install_dir`, `command`, `section`, `check`): TS + colors these as `yellow` via `property.key`. Legacy does not distinguish + dictionary keys from regular identifiers, leaving them as default text. This + is a significant improvement for readability of meson build files where + keyword arguments are very common. +- Method calls on objects (`meson.get_compiler`, `meson.get_option`, + `meson.current_source_dir`, `host_machine.system`, `compiler.find_library`, + `name.to_upper`, `conf.set`, `conf.set10`, `result.stdout`, `result.strip`, + `zlib_dep.found`): TS colors the method name as `white` via `normal_command`. + Legacy only colors standalone function names from its hardcoded list, missing + method calls entirely. +- Built-in objects (`meson`, `host_machine`, `build_machine`, `target_machine`): + legacy colors these as `yellow` via keyword matching. TS now colors them as + `yellow` via `variable.builtin` using `#any-of?` predicate on `(identifier)`. + TS also colors the method calls on these objects as `white`. +- Multi-line strings (`'''...'''`): both TS and legacy handle these as string + contexts colored `green`. TS uses the tree-sitter string node which correctly + spans multiple lines. + +Known shortcomings +------------------ + +- Built-in type names (`compiler`, `string`, `Number`, `boolean`, `array`, + `dictionary`): legacy colors all of these as `yellow` via keyword matching. TS + does not color them as they are generic identifiers and would cause false + positives. The four primary built-in objects (`meson`, `host_machine`, + `build_machine`, `target_machine`) are now handled via `#any-of?`. +- Double-quoted strings (`"..."`): legacy colors these as `brightred` (a + deliberate distinction from single-quoted strings). TS colors all strings + uniformly as `green` via the `string` capture, losing this distinction. +- The `summary` function call: TS colors it as `white` via the function call + capture. Legacy also colors it as `white` but does not have `summary` in its + function keyword list, so legacy leaves it uncolored. TS is better here. +- Legacy hardcodes a specific list of known functions (`project`, `executable`, + `dependency`, etc.). TS colors any function call as `white` via the + `normal_command` grammar node, which is more flexible and future-proof but + less selective. +- The `break` and `continue` keywords: TS captures these via `keyword_break` and + `keyword_continue` nodes as `yellow`. The sample file does not exercise these + but they are in the TS query. +- Comparison operators (`==`): neither legacy nor TS color operators in meson. + They appear as default text in both. diff --git a/tests/syntax/samples/muttrc b/tests/syntax/samples/muttrc new file mode 100644 index 0000000000..0a7abc9854 --- /dev/null +++ b/tests/syntax/samples/muttrc @@ -0,0 +1,104 @@ +# Muttrc configuration file +# Personal mail settings + +# Basic settings +set realname = "John Doe" +set from = "john@example.com" +set folder = "imaps://imap.example.com/" +set spoolfile = "+INBOX" +set record = "+Sent" +set postponed = "+Drafts" +set trash = "+Trash" + +# IMAP settings +set imap_user = "john@example.com" +set imap_pass = `gpg --quiet --decrypt ~/.mutt/pass.gpg` +set imap_keepalive = 300 +set imap_check_subscribed +set imap_idle + +# SMTP settings +set smtp_url = "smtps://john@smtp.example.com:465/" +set smtp_pass = `gpg --quiet --decrypt ~/.mutt/pass.gpg` +set ssl_starttls = yes +set ssl_force_tls = yes + +# Compose settings +set editor = "vim" +set edit_headers = yes +set charset = "UTF-8" +set send_charset = "us-ascii:utf-8" +set assumed_charset = "iso-8859-1" + +# Index settings +set sort = threads +set sort_aux = reverse-last-date-received +set index_format = "%4C %Z %{%b %d} %-15.15L (%?l?%4l&%4c?) %s" +set date_format = "%d/%m/%Y %H:%M" +set mark_old = no +set beep_new = yes + +# Pstrstrings and strstrstrstrstrings +set pager_index_lines = 10 +set pager_context = 3 +set pager_stop = yes +set menu_scroll = yes +set tilde = yes + +# Headers +ignore * +unignore From To Cc Bcc Date Subject +unhdr_order * +hdr_order From: To: Cc: Bcc: Date: Subject: + +# Aliases +alias work "Work Contact" +alias home "Home Contact" + +# Key bindings +bind index,pager g noop +bind index gg first-entry +bind index G last-entry +bind pager gg top +bind pager G bottom + +# Macros +macro index,pager \cb "|urlview\n" "Call urlview" +macro index A \ + "~NN" \ + "Mark all new as read" + +# Colors +color normal default default +color indicator brightwhite blue +color tree yellow default +color status brightgreen blue +color error brightred default +color message brightcyan default +color signature yellow default +color attachment brightyellow default +color search brightred black +color hdrdefault cyan default +color quoted green default +color quoted1 yellow default +color quoted2 red default + +# Hooks +folder-hook . "set sort=threads" +folder-hook =Sent "set sort=date-sent" +send-hook . "set signature=~/.mutt/sig" +reply-hook ~f@example.com "set from=john@example.com" +save-hook ~f@work.com =Work +message-hook . "set langstrstrstrstrstrstrstrstrstring" +account-hook imaps://imap.example.com/ \ + "set imap_user=john@example.com" + +# Mstrstrstrstrstrstrstrstrstrilings +mailboxes +INBOX +Sent +Drafts +Trash +Work +subscribe dev-list@example.com users@example.com +lists announce@example.com + +# Source external files +source ~/.mutt/colors +source ~/.mutt/gpg.rc +source `echo ~/.mutt/local` diff --git a/tests/syntax/samples/muttrc-report.md b/tests/syntax/samples/muttrc-report.md new file mode 100644 index 0000000000..bc6c18e6e4 --- /dev/null +++ b/tests/syntax/samples/muttrc-report.md @@ -0,0 +1,78 @@ +Muttrc syntax highlighting: TS vs Legacy comparison report +============================================================ + +Sample file: `muttrc` +Legacy reference: `misc/syntax/muttrc.syntax` +TS query: `misc/syntax-ts/queries-override/muttrc-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[muttrc]` + +Aligned with legacy +------------------- + +- Comments (`# ...`): `brown` - MATCH (TS `@comment`, legacy `context # \n + brown`). +- Primary commands (`set`, `color`, `macro`, `source`, `alternates`, `bind`, + `ignore`, `unignore`, `mailboxes`, `subscribe`, `lists`, `hdr_order`, + `unhdr_order`): `brightgreen` in TS via `(command)` -> `@string.special`. + Legacy uses `keyword whole set brightgreen`, `keyword whole color + brightgreen`, etc. The TS colors.ini maps `string.special` to `brightgreen` - + MATCH. +- Set directive options (`realname`, `from`, `folder`, `spoolfile`, `imap_user`, + `editor`, `sort`, `index_format`, etc.): `yellow` in TS via `(set_directive + (option))` -> `@keyword`. Legacy uses `keyword whole ... yellow` for each + option name - MATCH. +- Hook keywords (`folder-hook`, `send-hook`, `reply-hook`, `save-hook`, + `message-hook`, `account-hook`): `brightcyan` in both TS (`@delimiter`) and + legacy (`keyword whole ... brightcyan`) - MATCH. +- `alias` keyword: `brightcyan` in TS (`@delimiter`), `brightcyan` in legacy + (`keyword whole alias brightcyan`). However, TS treats `alias` as a separate + literal match, not as a command - MATCH. +- String delimiters (`"`, `'`): `green` in TS (`@string`). Legacy uses `context + " " green` and `context ' ' green` - MATCH. +- Backtick shell commands (`` ` ``): `brightred` in TS (`@function.special`). + Legacy uses `keyword \` brightred` - MATCH. + +Intentional improvements over legacy +------------------------------------- + +- TS provides structured parsing, correctly identifying `(command)` nodes, + `(set_directive)` with nested `(option)` nodes, and hook keywords as distinct + grammar elements. Legacy relies on flat keyword lists. +- TS correctly scopes option names only within `set` directives via + `(set_directive (option))`, preventing false matches of option names used in + other contexts. +- TS highlights `ignore` and `unignore` commands as `@string.special` + (brightgreen) via the `(command)` node. The TS grammar gives these their + correct role as commands rather than requiring separate keyword entries. +- TS identifies string content inside quotes (shown as cyan in the dump) from + the grammar's string node children. + +Known shortcomings +------------------ + +- The legacy engine produced NO syntax highlighting in the dump tool output. All + characters received the default color (4159). The legacy `.syntax` file + contains comprehensive rules with hundreds of option keywords, but the dump + tool appears unable to apply them. This may be a tool issue with + filename-to-syntax resolution for `muttrc` (the legacy regex `\.?[Mm]uttrc$` + should match, but the tool may not be checking content or filename patterns + correctly). +- The TS output shows `RED` as the default/background color for unmatched text + (visible as `` tags throughout). This appears to be an artifact of the + dump tool's color mapping rather than actual red text in the editor. +- The TS grammar highlights text inside quoted strings with a cyan-like color, + showing string content distinctly from the green quote delimiters. This + differs from legacy where the entire quoted context is green. +- Color arguments in `color` commands (e.g., `brightwhite`, `blue`, `yellow`) + are not specially highlighted by TS. They appear as unmatched default text. + Legacy also does not highlight color names. +- The TS grammar treats `unignore`, `unhdr_order`, `mailboxes`, `subscribe`, + `lists`, and `hdr_order` all as `(command)` nodes with `@string.special` + (brightgreen), while legacy distinguishes some of these as `brightcyan` (e.g., + `mailboxes`, `subscribe`, `lists`, `hdr_order`, `alias`). This is a deliberate + simplification in TS. +- Hook patterns and regex arguments (e.g., `.`, `~f@example.com`, `=Sent`) are + not highlighted by either engine. +- The multiline macro continuation (`\` at end of line) is not specially handled + by TS. The continuation lines are parsed as part of the macro's string + arguments. diff --git a/tests/syntax/samples/ocaml-report.md b/tests/syntax/samples/ocaml-report.md new file mode 100644 index 0000000000..e6dc0c74f6 --- /dev/null +++ b/tests/syntax/samples/ocaml-report.md @@ -0,0 +1,89 @@ +OCaml syntax highlighting: TS vs Legacy comparison report +========================================================== + +Sample file: `ocaml.ml` +Legacy reference: `misc/syntax/ml.syntax` +TS query: `misc/syntax-ts/queries-override/ocaml-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[ocaml]` + +Aligned with legacy +------------------- + +- Language keywords (`let`, `rec`, `if`, `then`, `else`, `match`, `with`, `fun`, + `function`, `type`, `of`, `and`, `in`, `begin`, `end`, `do`, `done`, `for`, + `to`, `while`, `try`, `exception`, `mutable`, `true`, `false`, `module`, + `struct`, `sig`, `functor`, `open`, `class`, `object`, `val`, `method`, + `external`, `lazy`, `assert`, `when`, `as`, `downto`, `include`, `inherit`, + `new`, `private`, `nonrec`, `virtual`, `constraint`, `initializer`): `yellow` + - MATCH +- Operators (`=`, `<`, `>`, `|`, `->`, `::`, `~`, `!`, `:=`, `+`, `-`, `*`): + `cyan` - MATCH +- Delimiters (`.`, `,`, `:`): `cyan` - MATCH +- Semicolons (`;`): `brightred` - MATCH +- Double semicolons (`;;`): `brightred` - MATCH +- Strings (double-quoted `"..."`): `brightcyan` - MATCH +- Characters (`'A'`): legacy does not have a character context (single char + shows as default). TS colors characters as `brightcyan` via the `character` -> + `tag` capture. +- Comments (`(* ... *)`): `brown` - MATCH + +Intentional improvements over legacy +------------------------------------- + +- Type constructors and constructor names (`Red`, `Green`, `Blue`, `Custom`, + `None_like`, `Some_like`, `Not_found_custom`, `Empty_error`, `Stack_empty`): + TS colors these as `yellow` via `type` and `constructor_name` captures. Legacy + does not distinguish constructors from regular identifiers. +- Module names (`IntSet`, `STACK`, `MakeStack`, `Printf`, `List`, `E`): TS + colors these as `yellow` via `module_name` capture. Legacy does not color + module names. +- Field names in records (`name`, `age`, `email`): TS colors these as `cyan` via + `field_name` -> `label` capture. Legacy does not distinguish record field + names. +- Label names (`~name`, `~age`): TS colors these as `cyan` via `label_name` -> + `label` capture. Legacy does not color labeled argument names. +- The `->` operator: legacy colors it as `brightgreen` (a unique color). TS + colors it as `cyan` via the `label` capture (same as other operators). This is + an intentional normalization -- TS uses a consistent cyan for all operators. +- The pipe operator (`|>`): TS captures this as `cyan` like other operators. + Legacy would color `|` as `cyan` and `>` separately. + +Known shortcomings +------------------ + +- The `->` arrow in match expressions and function types: legacy colors it as + `brightgreen`, giving it visual emphasis. TS colors it as `cyan` like all + other operators. Users accustomed to the distinct `brightgreen` arrow may + notice this change. +- Parentheses (`(`, `)`), brackets (`[`, `]`), and braces (`{`, `}`): legacy + colors all of these as `cyan`. TS now captures these as `brightcyan` via + `@delimiter`. Note: the OCaml tree-sitter grammar (.so) must be installed for + this to take effect. +- The `#` operator: legacy colors it as `cyan`. TS does not capture `#` as an + operator. +- The `@` and `^` operators: legacy colors these as `cyan`. TS does not include + them in the operator list. +- The `&` operator: legacy colors it as `cyan`. TS does not include it. +- The `<>` operator (not-equal): legacy colors it as `cyan`. TS does not + explicitly capture `<>`. +- The `<-` operator (assignment): legacy colors it as `cyan`. TS does not + include `<-` in its operator list, but the output shows it is colored `cyan` + correctly, suggesting the tree-sitter grammar captures it. +- The `not` and `or` and `mod` keywords: legacy colors these as `yellow`. TS + does not include them in the keyword list. +- The `where`, `value`, `prefix` keywords: legacy includes these as `yellow`. TS + does not list `value` or `prefix` (they are CamlLight-specific, not standard + OCaml). +- Format specifiers inside strings (`%d`, `%s`): legacy colors these as + `brightmagenta` inside the string context. TS colors the entire string + uniformly as `brightcyan`, losing the format specifier distinction. Both + legacy and TS output show the format specifiers as `brightmagenta`, suggesting + the tree-sitter grammar may have an escape node, but the TS query does not + capture it distinctly. +- Escape sequences in strings (`\n`, `\\`): legacy colors these as + `brightmagenta`. TS does not distinguish escapes from the rest of the string + content, though the output suggests format specifiers like `%d` and `%s\n` are + rendered as `brightmagenta` in both systems. +- The `open` keyword with a module name: TS colors `open` as `yellow` (keyword) + but does not color the module name `Printf` after it. Actually, TS does color + `Printf` as `yellow` via `module_name`, so this works correctly. diff --git a/tests/syntax/samples/ocaml.ml b/tests/syntax/samples/ocaml.ml new file mode 100644 index 0000000000..5b4a5ce2b6 --- /dev/null +++ b/tests/syntax/samples/ocaml.ml @@ -0,0 +1,184 @@ +(* OCaml syntax sample file *) + +(* Basic let bindings *) +let x = 42 +let pi = 3.14159 +let name = "OCaml" +let ch = 'A' +let flag = true +let nothing = false + +(* Let with type annotation *) +let greeting : string = "Hello, world!" + +(* Function definitions *) +let add a b = a + b +let multiply x y = x * y + +let rec factorial n = + if n <= 0 then 1 + else n * factorial (n - 1) + +let rec fibonacci = function + | 0 -> 0 + | 1 -> 1 + | n -> fibonacci (n - 1) + fibonacci (n - 2) + +(* Pattern matching *) +let describe_number n = + match n with + | 0 -> "zero" + | 1 -> "one" + | n when n < 0 -> "negative" + | _ -> "other" + +(* Variant types *) +type color = + | Red + | Green + | Blue + | Custom of int * int * int + +type 'a option_like = + | None_like + | Some_like of 'a + +(* Record types *) +type person = { + name : string; + mutable age : int; + email : string; +} + +(* Using records *) +let john = { name = "John"; age = 30; email = "j@x.com" } +let _ = john.name +let () = john.age <- 31 + +(* Tuple and list *) +let pair = (1, "hello") +let nums = [1; 2; 3; 4; 5] +let combined = 0 :: nums + +(* Higher-order functions *) +let double_all = List.map (fun x -> x * 2) +let positives = List.filter (fun x -> x > 0) + +(* Let-in expressions *) +let result = + let a = 10 in + let b = 20 in + a + b + +(* If-then-else *) +let abs_val x = + if x >= 0 then x + else -x + +(* For and while loops *) +let print_range () = + for i = 1 to 10 do + Printf.printf "%d " i + done; + let j = ref 10 in + while !j > 0 do + j := !j - 1 + done + +(* Try-with exception handling *) +exception Not_found_custom of string +exception Empty_error + +let safe_divide a b = + try + if b = 0 then raise (Not_found_custom "div/0") + else a / b + with + | Not_found_custom msg -> + Printf.printf "Error: %s\n" msg; 0 + | Empty_error -> 0 + +(* Module definition *) +module IntSet = struct + type t = int list + let empty = [] + let add x s = x :: s + let mem x s = List.mem x s + let to_list s = s +end + +(* Module signature *) +module type STACK = sig + type 'a t + val empty : 'a t + val push : 'a -> 'a t -> 'a t + val pop : 'a t -> 'a t + val top : 'a t -> 'a + exception Stack_empty +end + +(* Functor *) +module MakeStack (E : sig type t end) = struct + type elt = E.t + type t = elt list + exception Stack_empty + let empty = [] + let push x s = x :: s + let pop = function + | [] -> raise Stack_empty + | _ :: t -> t + let top = function + | [] -> raise Stack_empty + | h :: _ -> h +end + +(* Open and include *) +open Printf + +(* Class definition *) +class point x_init y_init = object + val mutable x = x_init + val mutable y = y_init + method get_x = x + method get_y = y + method move dx dy = + x <- x + dx; + y <- y + dy +end + +(* Labeled arguments *) +let create ~name ~age = { name; age; email = "" } + +(* Operators *) +let _ = 1 + 2 - 3 * 4 +let _ = 1.0 = 2.0 +let _ = "a" < "b" +let _ = 1 > 0 +let _ = [1] :: [[2]] +let _ = x := 5 +let _ = !x +let _ = 1 |> succ |> succ + +(* Assert and lazy *) +let _ = assert (1 + 1 = 2) +let lazy_val = lazy (factorial 10) + +(* Begin-end block *) +let do_stuff () = + begin + print_string "hello"; + print_newline () + end + +(* External declaration *) +external c_func : int -> int = "c_func_impl" + +(* Polymorphic variant *) +let use_poly = function + | `Red -> "red" + | `Blue -> "blue" + | _ -> "other" + +(* Semicolons and double semicolons *) +let () = print_endline "done";; +let () = print_endline "final" diff --git a/tests/syntax/samples/pascal-report.md b/tests/syntax/samples/pascal-report.md new file mode 100644 index 0000000000..5f23e134fa --- /dev/null +++ b/tests/syntax/samples/pascal-report.md @@ -0,0 +1,76 @@ +Pascal syntax highlighting: TS vs Legacy comparison report +========================================================== + +Sample file: `tests/syntax/samples/pascal.pas` +Legacy reference: `misc/syntax/pascal.syntax` +TS query: `misc/syntax-ts/queries-override/pascal-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[pascal]` + +Aligned with legacy +------------------- + +- Keywords (`program`, `unit`, `uses`, `interface`, `implementation`, `begin`, + `end`, `var`, `const`, `type`, `array`, `of`, `record`, `class`, `object`, + `constructor`, `destructor`, `inherited`, `property`, `read`, `write`, `if`, + `then`, `else`, `case`, `for`, `to`, `downto`, `while`, `repeat`, `until`, + `with`, `do`, `try`, `except`, `finally`, `raise`, `set`, `function`, + `procedure`) -> `white` - MATCH. +- Logical operators (`and`, `or`, `not`, `xor`, `div`, `mod`, `shl`, `shr`, + `in`, `is`, `as`) -> `cyan` via `@label` - MATCH. +- Arithmetic/comparison operators (`=`, `<>`, `<`, `>`, `<=`, `>=`, `+`, `-`, + `*`, `/`, `:=`, `@`, `^`) -> `cyan` via `@label` - MATCH. +- Constants (`nil`, `True`, `False`) -> `white` - MATCH. +- `string` type keyword -> `yellow` via `@type` - MATCH. +- Delimiters (`;`, `:`, `,`) -> `lightgray` via `@number` - MATCH. +- String literals (`'...'`) -> `brightcyan` via `@tag` - MATCH. +- Block comments (`{ }`) -> `brightgreen` via `comment.special` - MATCH. +- Line comments (`//`) -> `brightgreen` via `comment.special` - MATCH. +- Alternative block comments (`(* *)`) -> `brightgreen` via `comment.special` - + MATCH. +- Default context (identifiers, numbers) -> `yellow` (legacy default) -- TS does + not explicitly color identifiers but they appear in default editor color. + +Intentional improvements over legacy +------------------------------------- + +- TS structurally recognizes Pascal grammar nodes (e.g., `kProgram`, `kBegin`, + `kEnd`) rather than relying on keyword matching, making it more robust against + false positives in strings or comments. +- TS correctly identifies `set` as a keyword with `white` coloring and + distinguishes it from the `set` logical operator context where legacy uses + `cyan`. +- TS handles the `string` keyword specially as `@type` (`yellow`), + distinguishing it from regular keywords (`white`) -- legacy also uses `white` + for `string` as a keyword, so TS provides a type-aware distinction. +- TS properly handles case-insensitive keywords through the grammar structure, + while legacy uses the `caseinsensitive` directive. +- TS handles compiler directives `{$...}` differently from regular comments -- + the grammar can distinguish these structurally. + +Known shortcomings +------------------ + +- TS default text color differs from legacy: legacy uses `yellow` as default + context color for all uncolored text (identifiers, numbers), while TS uses the + editor's default background color -- this means identifiers like `SyntaxDemo`, + `TAnimal`, `Integer`, `Result`, `WriteLn` appear in default color rather than + `yellow`. +- TS does not color `WriteLn` as `white` keyword as legacy does -- it appears in + default color since it is not in the keyword list. +- TS does not recognize `virtual`, `override`, `private`, `public`, `protected`, + `published` as keywords -- they appear in default color while legacy colors + them `white`. +- TS does not color `..` (range operator in `2..5`) distinctly -- legacy shows + it as `white` via keyword, while TS grammar may not capture it as a separate + operator. +- TS does not distinguish compiler directive comments (`{$...}`) with `green` as + legacy does -- they appear as regular `brightgreen` comments. +- TS does not recognize `*` (multiplication) as an operator in all contexts -- + in `4 * 2` and `I * I` the `*` appears uncolored, while legacy colors it via + its general operator rules. +- TS does not handle parentheses `(`, `)` and brackets `[`, `]` as `lightgray` + like legacy -- they appear uncolored in TS since the grammar uses dedicated + node types rather than literal tokens. +- Legacy colors `not` as `white` (regular keyword), while TS correctly uses + `cyan` (logical operator via `@label`) which better reflects its role -- this + is actually more accurate in TS. diff --git a/tests/syntax/samples/pascal.pas b/tests/syntax/samples/pascal.pas new file mode 100644 index 0000000000..19b2d7a1ea --- /dev/null +++ b/tests/syntax/samples/pascal.pas @@ -0,0 +1,228 @@ +{ Sample file demonstrating Pascal syntax highlighting features. } +{ Exercises all captures from the TS query override. } + +program SyntaxDemo; + +uses + SysUtils, Classes; + +const + MAX_SIZE = 100; + PI_VALUE = 3.14159; + GREETING = 'Hello, World!'; + IS_ACTIVE = True; + NOTHING = nil; + +type + TColor = (Red, Green, Blue); + TIntArray = array[0..9] of Integer; + TName = string[50]; + + TAnimal = class + private + FName: string; + FAge: Integer; + public + constructor Create(AName: string; AAge: Integer); + destructor Destroy; override; + property Name: string read FName write FName; + property Age: Integer read FAge write FAge; + function Speak: string; virtual; + end; + + TDog = class(TAnimal) + public + function Speak: string; override; + end; + + TPoint = record + X, Y: Integer; + end; + + TShape = object + Width, Height: Integer; + function Area: Integer; + end; + +var + I, J, K: Integer; + S: string; + Arr: TIntArray; + Dog: TDog; + P: TPoint; + Colors: set of TColor; + +{ Constructor implementation } +constructor TAnimal.Create(AName: string; AAge: Integer); +begin + inherited Create; + FName := AName; + FAge := AAge; +end; + +{ Destructor implementation } +destructor TAnimal.Destroy; +begin + inherited Destroy; +end; + +function TAnimal.Speak: string; +begin + Result := 'Generic animal sound'; +end; + +function TDog.Speak: string; +begin + Result := 'Woof!'; +end; + +{ Regular function } +function Add(A, B: Integer): Integer; +begin + Result := A + B; +end; + +{ Procedure } +procedure PrintMessage(Msg: string); +begin + WriteLn(Msg); +end; + +{ Function with local variables } +function Factorial(N: Integer): Integer; +var + Temp: Integer; +begin + if N <= 1 then + Result := 1 + else + Result := N * Factorial(N - 1); +end; + +{ Main program } +begin + { Arithmetic operators } + I := 10 + 5; + J := 10 - 3; + K := 4 * 2; + I := 10 div 3; + J := 10 mod 3; + S := 10 / 3; + + { Comparison operators } + if I = J then + WriteLn('equal'); + if I <> J then + WriteLn('not equal'); + if I < J then + WriteLn('less'); + if I > J then + WriteLn('greater'); + if I <= J then + WriteLn('less or equal'); + if I >= J then + WriteLn('greater or equal'); + + { Logical operators -> cyan } + if (I > 0) and (J > 0) then + WriteLn('both positive'); + if (I > 0) or (J > 0) then + WriteLn('at least one positive'); + if not (I = 0) then + WriteLn('not zero'); + if (I > 0) xor (J > 0) then + WriteLn('exactly one positive'); + + { Bit shift operators } + I := J shl 2; + I := J shr 1; + + { Type checking } + if Dog is TAnimal then + WriteLn('is animal'); + S := (Dog as TAnimal).Speak; + + { Set operations } + Colors := [Red, Green]; + if Blue in Colors then + WriteLn('has blue'); + + { Control flow } + if I > 0 then + WriteLn('positive') + else + WriteLn('non-positive'); + + { Case statement } + case I of + 0: WriteLn('zero'); + 1: WriteLn('one'); + 2..5: WriteLn('small'); + else + WriteLn('big'); + end; + + { For loop } + for I := 0 to 9 do + Arr[I] := I * I; + + for I := 9 downto 0 do + WriteLn(Arr[I]); + + { While loop } + I := 0; + while I < 10 do + begin + I := I + 1; + end; + + { Repeat/until loop } + I := 10; + repeat + I := I - 1; + until I = 0; + + { With statement } + P.X := 10; + P.Y := 20; + with P do + WriteLn(X, ', ', Y); + + { Try/except/finally } + try + I := 10 div 0; + except + WriteLn('Division by zero'); + end; + + try + Dog := TDog.Create('Rex', 5); + WriteLn(Dog.Speak); + finally + Dog.Free; + end; + + { Raise exception } + try + raise Exception.Create('Test error'); + except + WriteLn('Caught'); + end; + + { Boolean constants } + if True then + WriteLn('true'); + if not False then + WriteLn('not false'); + + { String type keyword } + S := string('hello'); + + { Delimiters -> lightgray } + WriteLn(Add(3, 4)); + + WriteLn('Done.'); +end. + +(* Alternative comment style *) +// Line comment diff --git a/tests/syntax/samples/perl-report.md b/tests/syntax/samples/perl-report.md new file mode 100644 index 0000000000..9b460b6ff9 --- /dev/null +++ b/tests/syntax/samples/perl-report.md @@ -0,0 +1,84 @@ +Perl syntax highlighting: TS vs Legacy comparison report +========================================================= + +Sample file: `tests/syntax/samples/perl.pl` +Legacy reference: `misc/syntax/perl.syntax` +TS query: `misc/syntax-ts/queries-override/perl-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[perl]` + +Aligned with legacy +------------------- + +- Keywords (`sub`) -> `yellow` - MATCH. +- Control flow keywords (`if`, `elsif`, `else`, `unless`, `while`, `until`, + `for`, `foreach`, `do`, `last`, `next`, `goto`, `use`, `require`, `package`, + `return`) -> `magenta` - MATCH. +- Logical keyword operators (`and`, `or`) -> `magenta` via `keyword.directive` - + MATCH. +- `BEGIN`/`END` blocks -> `magenta` - MATCH. +- Builtin functions (`chomp`, `chop`, `defined`, `undef`, `eval`) -> `yellow` - + MATCH. +- Arithmetic operators (`+`, `-`, `*`, `/`, `**`) -> `yellow` - MATCH. +- Comparison operators (`==`, `!=`, `<`, `>`, `<=`, `>=`, `<=>`) -> `yellow` - + MATCH. +- String comparison operators (`eq`, `ne`, `lt`, `gt`, `le`, `ge`, `cmp`) -> + `yellow` - MATCH. +- Logical operators (`&&`, `||`, `!`) -> `yellow` - MATCH. +- Regex match operators (`=~`, `!~`) -> `yellow` - MATCH. +- Arrow operator (`->`) -> `yellow` - MATCH. +- Assignment operator (`=`) -> `yellow` - MATCH. +- Dot/range operators (`.`, `..`) -> `yellow` - MATCH. +- Semicolons -> `brightmagenta` - MATCH. +- Commas -> `brightcyan` - MATCH. +- Brackets/parens (`(`, `)`, `[`, `]`, `{`, `}`) -> `brightcyan` - MATCH. +- Scalar variables (`$scalar`, `$name`) -> `brightgreen` - MATCH. +- Double-quoted strings -> `green` - MATCH. +- Single-quoted strings -> `green` (TS) vs `brightgreen` (legacy) - MINOR + MISMATCH (TS uses same `@string` for both). +- Heredoc content -> `green` - MATCH. +- Comments (`#`) -> `brown` - MATCH. +- Regular expressions -> `brightgreen` via `string.special` - MATCH. +- Data section (`__END__`) -> `brown` - MATCH. +- Variable declarations (`my`, `our`, `local`) -> `magenta` - MATCH. +- `redo` keyword -> `magenta` - MATCH. + +Intentional improvements over legacy +------------------------------------- + +- TS correctly colors `@array` and `%hash` as `brightgreen` via + `variable.special`, while legacy uses `white` for `@` arrays and `brightcyan` + for `%` hashes -- TS is more consistent treating all variables uniformly. +- TS colors `@_` as `brightgreen` (regular variable), while legacy uses `red` + (special variable) -- TS simplifies by not distinguishing special vars. +- TS properly highlights interpolated variables inside double-quoted strings + (e.g., `$name` in `"Hello, $name!"` shows as `brightgreen` within `green` + string). +- TS highlights command strings (backticks) as `green` string rather than legacy + `white on black`. +- TS colors the `=>` fat comma as an operator (`yellow`) consistently, while + legacy also does `yellow` -- both match but TS catches it structurally. +- TS recognizes `qr/pattern/` as `string.special` (`brightgreen`), matching + legacy regex coloring. +- TS highlights labels (`DONE:`) with `cyan` via `@label`, while legacy has no + specific label coloring (it appears as default text with `:` in `brightcyan`). +- TS recognizes `goto DONE` target as `cyan` label. + +Known shortcomings +------------------ + +- TS does not distinguish single-quoted strings (`brightgreen` in legacy) from + double-quoted strings (`green` in legacy) -- both render as `green`. +- TS does not highlight the shebang line (`#!/usr/bin/perl`) with + `brightcyan on black` as legacy does -- TS renders it as `brown` (comment). +- TS does not color `print` as `yellow` (function) in all positions -- appears + uncolored in some contexts where legacy consistently colors it `yellow`. +- TS does not color `die` as `yellow` -- appears uncolored while legacy + highlights it. +- TS has a minor rendering issue with `%hash` where the `%` sigil shows as + `yellow` (operator) separately from the variable name, rather than coloring + the entire `%hash` token as one unit. +- Legacy colors many more builtin functions as `yellow` (e.g., `print`, `die`, + `open`, `close`, `push`, `pop`, `split`, `join`, etc.) -- TS only covers + `chomp`, `chop`, `defined`, `undef`, `eval` explicitly. +- TS does not distinguish Perl pragma modules (`strict`, `warnings`, `lib`, + etc.) with `brightcyan` as legacy does. diff --git a/tests/syntax/samples/perl.pl b/tests/syntax/samples/perl.pl new file mode 100644 index 0000000000..c292e9f0cd --- /dev/null +++ b/tests/syntax/samples/perl.pl @@ -0,0 +1,156 @@ +#!/usr/bin/perl +# Sample file demonstrating Perl syntax highlighting features. +# Exercises all captures from the TS query override. + +use strict; +use warnings; +require Carp; + +package MyModule; + +# Variable declarations (my/our/local -> magenta) +my $scalar = 42; +our @array = (1, 2, 3); +local %hash = (key => "value"); + +# Subroutine definition (sub -> yellow) +sub greet { + my ($name) = @_; + return "Hello, $name!"; +} + +# Control flow keywords -> magenta +if ($scalar == 42) { + print "Found it\n"; +} elsif ($scalar > 42) { + print "Too high\n"; +} else { + print "Too low\n"; +} + +unless ($scalar < 0) { + print "Positive\n"; +} + +# Loops +while ($scalar > 0) { + last; +} + +until ($scalar == 0) { + next; +} + +for my $i (0 .. 10) { + redo if $i == 5; +} + +foreach my $item (@array) { + print "$item\n"; +} + +do { + $scalar--; +} while ($scalar > 0); + +# Logical operators +my $result = 1 and 0; +my $other = 0 or 1; + +# Goto +goto DONE if $scalar == 0; + +# BEGIN/END blocks +BEGIN { print "Starting\n"; } +END { print "Ending\n"; } + +# Builtin functions -> yellow +chomp $scalar; +chop $scalar; +my $is_defined = defined $scalar; +undef $scalar; +eval { die "error"; }; + +# Operators -> yellow +my $sum = 1 + 2; +my $diff = 5 - 3; +my $prod = 4 * 2; +my $quot = 10 / 2; +my $mod = 7 % 3; +my $pow = 2 ** 8; +my $concat = "a" . "b"; +my $range = 1 .. 10; + +# Comparison operators +my $eq_num = (1 == 2); +my $ne_num = (1 != 2); +my $lt_num = (1 < 2); +my $gt_num = (1 > 2); +my $le_num = (1 <= 2); +my $ge_num = (1 >= 2); +my $cmp_num = (1 <=> 2); + +# String comparison operators +my $eq_str = ("a" eq "b"); +my $ne_str = ("a" ne "b"); +my $lt_str = ("a" lt "b"); +my $gt_str = ("a" gt "b"); +my $le_str = ("a" le "b"); +my $ge_str = ("a" ge "b"); +my $cmp_str = ("a" cmp "b"); + +# Logical operators +my $and_op = 1 && 0; +my $or_op = 1 || 0; +my $not_op = !1; + +# Assignment and regex match +my $match = ("hello" =~ /hel/); +my $nomatch = ("hello" !~ /xyz/); + +# Arrow operator +my $ref = \@array; +my $val = $ref->[0]; + +# String literals +my $double = "Hello, world!\n"; +my $single = 'Hello, world!'; +my $heredoc = < brightgreen +my @list = (10, 20, 30); +my %table = (a => 1, b => 2); +my $ref2 = \$scalar; +print $list[0]; +print $table{a}; + +# Comment +# This is a line comment + +# Semicolons -> brightmagenta +# Commas -> brightcyan +my @items = ("one", "two", "three"); + +# Brackets/parens -> brightcyan +my @nested = ([1, 2], {a => 1}); + +# Labels +DONE: +print "Reached label\n"; + +# Data section -> brown +__END__ +This is the data section. +It should be colored as a comment/brown. diff --git a/tests/syntax/samples/php-report.md b/tests/syntax/samples/php-report.md new file mode 100644 index 0000000000..9f6cb6f40f --- /dev/null +++ b/tests/syntax/samples/php-report.md @@ -0,0 +1,87 @@ +PHP syntax highlighting: TS vs Legacy comparison report +======================================================= + +Sample file: `tests/syntax/samples/php.php` +Legacy reference: `misc/syntax/php.syntax` +TS query: `misc/syntax-ts/queries-override/php-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[php]` + +Aligned with legacy +------------------- + +- Keywords (`if`, `elseif`, `else`, `while`, `for`, `foreach`, `do`, `switch`, + `case`, `default`, `break`, `continue`, `return`, `class`, `extends`, + `implements`, `function`, `new`, `try`, `catch`, `finally`, `throw`, + `abstract`, `final`, `public`, `protected`, `private`, `static`, `const`, + `use`, `namespace`, `require`, `require_once`, `include`, `include_once`, + `echo`, `declare`, `global`, `goto`, `readonly`, `trait`, `interface`, + `yield`, `match`) -> `brightmagenta` - MATCH. +- Alternative syntax keywords (`endfor`, `endforeach`, `endif`, `endswitch`, + `endwhile`) -> `brightmagenta` - MATCH. +- `null`, `true`, `false` -> `brightmagenta` - MATCH. +- Operators (`=`, `==`, `===`, `!=`, `!==`, `<>`, `<`, `>`, `<=`, `>=`, `<=>`, + `+`, `-`, `*`, `/`, `%`, `**`, `.`, `.=`, `+=`, `-=`, `*=`, `/=`, `&&`, `||`, + `!`, `&`, `|`, `^`, `~`, `<<`, `>>`, `->`, `=>`, `??`, `::`) -> `white` - + MATCH. +- Semicolons -> `brightmagenta` via `delimiter.special` - MATCH. +- Commas and colons -> `brightcyan` via `delimiter` - MATCH. +- Brackets/parens (`(`, `)`, `[`, `]`, `{`, `}`) -> `brightcyan` - MATCH. +- Variables (`$name`, `$age`, `$this`) -> `brightgreen` via `variable.special` - + MATCH (legacy uses `brightmagenta` for `$this` specifically, TS uses + `brightgreen` like other variables). +- Double-quoted strings -> `green` - MATCH. +- Single-quoted strings -> `brightgreen` (legacy) vs `green` (TS uses same + `@string` capture) - MINOR MISMATCH. +- Heredoc body -> `green` - MATCH. +- Comments (`//`, `#`, `/* */`) -> `brown` - MATCH. +- Function calls (`strlen`, `array_map`) -> `yellow` via `@keyword` on function + calls - MATCH. +- Function definitions (`add`, `gen`) -> function name not separately colored in + either (both show as plain text after `function` keyword). +- UPPERCASE constants (`PHP_VERSION`, `API_KEY`) -> `white` via `keyword.other` + - MATCH. +- Labels (`end:`) -> `cyan` via `@label` (legacy has no label support for PHP) - + TS IMPROVEMENT. + +Intentional improvements over legacy +------------------------------------- + +- TS highlights `$this` consistently as `brightgreen` (variable), while legacy + uses `brightmagenta` -- TS is more consistent. +- TS highlights escape sequences inside strings (`\n`) as `brightgreen`, giving + visibility into escape codes. +- TS properly recognizes `namespace`, `use`, `trait`, `interface`, `match`, + `fn`, `readonly`, `clone`, `instanceof` keywords that were added in newer PHP + versions. +- TS highlights function call names (`strlen`, `array_map`, `unset`, `list`, + `print`) as `yellow`, matching legacy's approach of coloring all PHP built-in + functions. +- TS highlights labels (`end:`) with `cyan` via `@label` -- legacy has no label + highlighting for PHP. +- TS recognizes method calls (`$dog->speak()`, `$e->getMessage()`) and scoped + calls (`Dog::$count`) structurally. +- TS colors nowdoc body content as string, while legacy may not handle nowdoc + syntax. +- TS handles the `match` expression (PHP 8.0) properly, which legacy predates. + +Known shortcomings +------------------ + +- TS does not distinguish single-quoted strings (`brightgreen` in legacy) from + double-quoted strings (`green`) -- both use `@string` mapped to `green`. +- TS does not color the ` brightmagenta +namespace App\Models; + +use App\Traits\HasFactory; +require 'config.php'; +require_once 'helpers.php'; +include 'utils.php'; +include_once 'constants.php'; + +// Class definition +abstract class Animal implements Serializable +{ + use HasFactory; + + const MAX_AGE = 100; + public readonly string $name; + protected int $age; + private bool $alive = true; + static $count = 0; + + public function __construct(string $name, int $age) + { + $this->name = $name; + $this->age = $age; + self::$count++; + } + + abstract public function speak(): string; + + final public function getName(): string + { + return $this->name; + } +} + +class Dog extends Animal +{ + public function speak(): string + { + return "Woof!"; + } +} + +interface Moveable +{ + public function move(): void; +} + +trait Runnable +{ + public function run(): void + { + echo "Running\n"; + } +} + +// Control flow +if (true) { + echo "yes"; +} elseif (false) { + echo "maybe"; +} else { + echo "no"; +} + +// Switch +switch ($value) { + case 1: + break; + case 2: + continue; + default: + break; +} + +// Loops +for ($i = 0; $i < 10; $i++) { + if ($i == 5) break; +} + +foreach ($items as $key => $val) { + echo "$key: $val\n"; +} + +while (true) { + break; +} + +do { + break; +} while (false); + +// Alternative syntax +for ($i = 0; $i < 5; $i++): +endfor; + +foreach ($items as $item): +endforeach; + +if (true): +endif; + +switch ($x): +endswitch; + +while (false): +endwhile; + +declare(strict_types=1); +// enddeclare would go here + +// Match expression (PHP 8) +$result = match($x) { + 1 => "one", + 2 => "two", + default => "other", +}; + +// Exception handling +try { + throw new \Exception("error"); +} catch (\Exception $e) { + echo $e->getMessage(); +} finally { + echo "cleanup"; +} + +// Constants -> brightmagenta +$a = null; +$b = true; +$c = false; + +// UPPERCASE constants -> white +define('API_KEY', 'secret'); +$version = PHP_VERSION; + +// Operators -> white +$sum = 1 + 2; +$diff = 5 - 3; +$prod = 4 * 2; +$quot = 10 / 2; +$mod = 7 % 3; +$pow = 2 ** 8; +$concat = "a" . "b"; +$concat_assign = "a"; +$concat_assign .= "b"; + +// Comparison operators +$eq = (1 == 2); +$seq = (1 === 2); +$ne = (1 != 2); +$sne = (1 !== 2); +$ne2 = (1 <> 2); +$lt = (1 < 2); +$gt = (1 > 2); +$lte = (1 <= 2); +$gte = (1 >= 2); +$cmp = (1 <=> 2); + +// Logical operators +$and = true && false; +$or = true || false; +$not = !true; +$band = 1 & 2; +$bor = 1 | 2; +$bxor = 1 ^ 2; +$bnot = ~0; +$shl = 1 << 4; +$shr = 16 >> 2; + +// Assignment +$x = 1; +$x += 1; +$x -= 1; +$x *= 2; +$x /= 2; + +// Null coalescing +$val = $x ?? "default"; + +// Arrow and scope +$dog = new Dog("Rex", 5); +$dog->speak(); +Dog::$count; +echo Animal::MAX_AGE; + +// Function definitions and calls -> yellow +function add(int $a, int $b): int +{ + return $a + $b; +} + +$result = add(1, 2); +$len = strlen("hello"); +$arr = array_map(fn($x) => $x * 2, [1, 2, 3]); + +// Arrow function +$double = fn($n) => $n * 2; + +// Variables -> brightgreen +$simple = "value"; +$arr = [1, 2, 3]; + +// Strings +$double = "Hello, $name!\n"; +$single = 'no interpolation'; +$heredoc = << brightcyan +$nested = [[1, 2], ["a" => 1]]; + +// Semicolons -> brightmagenta +echo "done"; + +// Labels +goto end; +end: +echo "end\n"; + +// Global and unset +global $globalVar; +unset($globalVar); + +// List, print, exit +list($first, $second) = [1, 2]; +print "output\n"; +// exit(0); + +// Logical keywords +$r1 = true and false; +$r2 = true or false; +$r3 = true xor false; + +// yield +function gen() +{ + yield 1; + yield 2; +} + +// Clone and instanceof +$copy = clone $dog; +$check = $dog instanceof Animal; + +/* Block comment */ +# Hash comment +// Line comment diff --git a/tests/syntax/samples/po-report.md b/tests/syntax/samples/po-report.md new file mode 100644 index 0000000000..477e97af49 --- /dev/null +++ b/tests/syntax/samples/po-report.md @@ -0,0 +1,73 @@ +PO syntax highlighting: TS vs Legacy comparison report +====================================================== + +Sample file: `tests/syntax/samples/po.po` +Legacy reference: `misc/syntax/po.syntax` +TS query: `misc/syntax-ts/queries-override/po-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[po]` + +Aligned with legacy +------------------- + +- Keywords `msgid`, `msgstr`, `msgctxt`, `msgid_plural`: `brightcyan` via `@tag` + - MATCH +- Strings (both msgid and msgstr values): `green` via `@string` - MATCH (legacy + uses `cyan` for msgid strings and `green` for msgstr strings; TS unifies to + `green`) +- Translator comments (`# ...`): `brown` via `@comment` - MATCH +- Comments (`#` lines): `brown` via `@comment` - MATCH + +Intentional improvements over legacy +------------------------------------- + +- TS properly recognizes `msgctxt` as a keyword and colors it `brightcyan`. + Legacy treats `msgctxt` as part of the reference comment context (`#:` line), + coloring the line after `msgctxt` as `white` rather than giving the keyword + its own color. +- TS handles `msgid_plural` as a single `brightcyan` keyword. Legacy splits it, + coloring `msgid` as `brightcyan` and `_plural` as part of the `cyan` string + context. +- TS colors extracted comments (`#. ...`), references (`#: ...`), flags (`#, + ...`), and previous untranslated strings (`#~ ...`) uniformly as `brown` via + `@comment` / `@tag.special`. However the `@tag.special` capture maps to + `white` in colors.ini, so `#.`, `#:`, `#,` lines render as `white` matching + legacy. The `#~ ...` lines show as `white` via `@tag.special` instead of + legacy's `red`, which is a minor difference but still readable. +- TS produces clean single-token output for each line. Legacy sometimes splits + tokens awkwardly (e.g. `#, no-c-format` splits around `c-format`). + +Differences from legacy +----------------------- + +- TS colors all strings uniformly `green` via `@string`. Legacy distinguishes + msgid strings (`cyan`) from msgstr strings (`green`). This is a simplification + -- TS does not differentiate the two contexts. +- Legacy highlights format specifiers (`%d`, `%s`, `%%`, `\n`, `\t`, `\\`) + inside strings as `brightgreen`. TS does not break out escape sequences or + format specifiers -- the entire string is `green`. This loses the visual + distinction of format parameters. +- Legacy colors fuzzy blocks (`#, fuzzy` through the next blank line) as + `brightred`, marking the entire fuzzy entry. TS has no fuzzy-aware context -- + fuzzy entries look like normal entries. +- Legacy colors untranslated entries (`msgstr ""` at end of file) as + `brightred`. TS colors them normally (`brightcyan` keyword + `green` string). +- Legacy colors `#~ ...` (previous untranslated) lines as `red`. TS colors them + differently -- the `#~` prefix shows as `brown` while `msgid`/`msgstr` + keywords within still get `brightcyan` and strings get `green`. +- Legacy highlights `c-format` within flag comments as `yellow`. TS does not + distinguish flag content -- the entire flag line is `white` (via + `@tag.special`) or `brown` (via `@comment`). + +Known shortcomings +------------------ + +- TS lacks escape sequence highlighting inside strings. Format specifiers like + `%d`, `%s`, and escape sequences like `\n`, `\t` are all `green` instead of + the legacy `brightgreen`. +- TS does not distinguish fuzzy entries visually. In legacy, the entire fuzzy + block is `brightred`, which is a useful visual cue for translators. +- TS does not differentiate msgid vs msgstr string colors. Legacy uses `cyan` + for original strings and `green` for translated strings, making it easier to + visually separate source from translation. +- The `#~` previous-untranslated lines lose their distinctive `red` color from + legacy, blending in with normal comments. diff --git a/tests/syntax/samples/po.po b/tests/syntax/samples/po.po new file mode 100644 index 0000000000..5ca9ca787f --- /dev/null +++ b/tests/syntax/samples/po.po @@ -0,0 +1,105 @@ +# Translator comment for the PO file +# Another translator comment line +#. Extracted comment from source code +#: src/main.c:42 src/utils.c:100 +#, c-format +msgid "" +msgstr "" +"Project-Id-Version: sample 1.0\n" +"Report-Msgid-Bugs-To: bugs@example.com\n" +"POT-Creation-Date: 2024-01-15 10:00+0000\n" +"PO-Revision-Date: 2024-02-20 14:30+0000\n" +"Last-Translator: John Doe \n" +"Language-Team: Czech\n" +"Language: cs\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + +# Simple translation +#: src/greeting.c:10 +msgid "Hello, world!" +msgstr "Ahoj, svete!" + +# Translation with format string +#: src/status.c:25 +#, c-format +msgid "Found %d item" +msgstr "Nalezeno %d polozek" + +# Multiline msgid +#: src/help.c:50 +msgid "" +"This is a long message that spans\n" +"multiple lines in the source.\n" +msgstr "" +"Toto je dlouha zprava, ktera zabira\n" +"vice radku ve zdroji.\n" + +# Plural forms +#: src/count.c:30 +msgid "One file" +msgid_plural "%d files" +msgstr[0] "Jeden soubor" +msgstr[1] "%d soubory" +msgstr[2] "%d souboru" + +# Fuzzy translation needing review +#, fuzzy +#: src/dialog.c:88 +msgid "Save changes?" +msgstr "Ulozit zmeny?" + +# Context disambiguation +#: src/menu.c:12 +msgctxt "menu" +msgid "Open" +msgstr "Otevrit" + +#: src/button.c:45 +msgctxt "button" +msgid "Open" +msgstr "Otevrit" + +# Escape sequences in strings +#: src/format.c:60 +#, c-format +msgid "Path: %s\tSize: %d\n" +msgstr "Cesta: %s\tVelikost: %d\n" + +# Previous untranslated string +#~ msgid "Old deprecated message" +#~ msgstr "Stara zastarala zprava" + +# Another extracted comment +#. TRANSLATORS: This appears in the title bar +#: src/window.c:5 +msgid "Application Title" +msgstr "Nazev aplikace" + +# Flag with no-c-format +#, no-c-format +#: src/static.c:15 +msgid "No format specifiers here" +msgstr "Zadne formatovaci znaky" + +# Multiple flags +#, c-format, fuzzy +#: src/report.c:99 +msgid "Total: %d items (%s)" +msgstr "Celkem: %d polozek (%s)" + +# Empty translation (untranslated) +#: src/new.c:1 +msgid "Untranslated string" +msgstr "" + +# String with percent literal +#: src/percent.c:7 +msgid "100%% complete" +msgstr "100%% hotovo" + +# String with backslash escapes +#: src/escape.c:20 +msgid "Line one\\nLine two\\tTabbed" +msgstr "Radek jedna\\nRadek dva\\tOdsazeny" diff --git a/tests/syntax/samples/properties-report.md b/tests/syntax/samples/properties-report.md new file mode 100644 index 0000000000..c88209b26d --- /dev/null +++ b/tests/syntax/samples/properties-report.md @@ -0,0 +1,75 @@ +Properties syntax highlighting: TS vs Legacy comparison +======================================================== + +Sample file: `sample.properties` +Legacy reference: `misc/syntax/properties.syntax` +TS query: `misc/syntax-ts/queries-override/properties-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[properties]` + +Aligned with legacy +------------------- + +- Hash comments (`# ...`): `brown` - MATCH (TS `@comment`, legacy `context + linestart # \n brown`). +- Exclamation comments (`! ...`): `brown` - MATCH (TS `@comment`, legacy + `context linestart ! \n brown`). +- Property keys (`db.host`, `app.name`, `feature.enabled`, etc.): `yellow` - + MATCH (TS `(property (key))` -> `@property.key`, legacy `keyword linestart ... + yellow`). +- Equals separator (`=`): `brightcyan` - MATCH (TS `@delimiter`, legacy `keyword + = brightcyan`). +- Colon separator (`:`): `brightcyan` - MATCH (TS `@delimiter`, legacy `keyword + : brightcyan`). +- Property values: `lightgray` - MATCH (TS `(property (value))` -> `@variable` = + `lightgray`, legacy default context `lightgray`). +- Unicode escape sequences (`\u0048`, `\u3053`, etc.): `magenta` - MATCH (TS + `(escape)` -> `@keyword.directive`, legacy `keyword \\u... magenta`). +- Substitutions (`${app.home}`, `${user.home}`, etc.): `brightgreen` - MATCH (TS + `(substitution)` -> `@variable.special`, legacy `keyword ${*} brightgreen`). + +Intentional improvements over legacy +------------------------------------- + +- TS correctly parses property values as complete spans via the grammar's + `(value)` node. Legacy's value highlighting relies on the default context + color and can be disrupted by embedded `:` or `=` characters (e.g., URLs like + `https://example.com/api?key=value` cause legacy to insert extra `brightcyan` + coloring at each `:` and `=`). +- TS highlights escape sequences (`\n`, `\t`, `\\`) within values via the + `(escape)` node (`magenta`). Legacy only recognizes `\uXXXX` unicode escapes + but not `\n`, `\t`, or `\\`. +- TS treats the entire multiline continuation (backslash at end of line) as part + of the value node, maintaining consistent value coloring. Legacy uses a + separate `context exclusive \\\n \n` which can cause color inconsistencies at + continuation boundaries. +- TS handles the space separator (`key3 value3`) correctly by parsing the + grammar structure. Legacy only matches `=` and `:` as explicit separators. + +Known shortcomings +------------------ + +- Legacy highlights numbers after `=` as `brightcyan` (e.g., `=5432`, `=8080`, + `=100`), treating them as part of the `=` keyword match. TS shows all values + uniformly as `lightgray` via `@variable`. This is actually more correct but + visually different. +- Legacy highlights HTML color codes (`#3498DB`, `#FFFFFF`) as `green` via + `keyword whole #... green`. TS shows these as plain `lightgray` values with no + special treatment. +- Legacy highlights boolean values (`true`, `false`) as `white` via `keyword + whole true/false white`. TS shows these as plain `lightgray` values like any + other value text. +- The `unicode.smile=\u263A` line shows the unicode escape highlighted in the + legacy output (`magenta`) but NOT in the TS output. The TS grammar may not + parse `\u263A` as an escape node in this context since the hex digits A is + uppercase but the pattern expects specific casing. +- Legacy mishandles colons within values (e.g., `jdbc:postgresql://localhost/db` + shows extra `brightcyan` at each `:`). TS correctly treats the entire value as + a single `lightgray` span. +- Legacy mishandles equals signs within values (e.g., `key=value&fmt=json` in + URLs). TS correctly treats the full value after the first separator as one + span. +- The multiline continuation in TS output shows no special highlighting for the + backslash or continuation lines. Legacy highlights the trailing `\` in + `yellow` and uses a special context for continuation lines. +- Neither engine highlights property key hierarchies (dot- separated namespaces) + with distinct colors for each level. diff --git a/tests/syntax/samples/properties.properties b/tests/syntax/samples/properties.properties new file mode 100644 index 0000000000..0ae1052807 --- /dev/null +++ b/tests/syntax/samples/properties.properties @@ -0,0 +1,76 @@ +# Database configuration +! This is also a comment (exclamation mark style) + +# Simple key=value pairs +db.host=localhost +db.port=5432 +db.name=myapp_production +db.driver=org.postgresql.Driver + +# Using colon separator +app.name: My Application +app.version: 2.1.0 +app.debug: false + +# Boolean values +feature.enabled=true +feature.disabled=false +maintenance.mode=true + +# Numeric values +server.port=8080 +max.connections=100 +timeout.seconds=30 + +# Unicode escape sequences +greeting=\u0048\u0065\u006C\u006C\u006F +japanese.text=\u3053\u3093\u306B\u3061\u306F + +# Substitution / variable references +log.dir=${app.home}/logs +config.path=${user.home}/.config/${app.name} +data.dir=${app.home}/data + +# Empty values +empty.value= +blank.key= + +# Multiline values with backslash continuation +long.text=This is a very long value that \ + spans multiple lines and continues \ + until the end here. + +# Special characters in values +url=https://example.com/api?key=value&fmt=json +regex.pattern=^[a-zA-Z0-9]+$ +file.path=/usr/local/share/app/config.xml + +# HTML color codes +color.primary=#3498DB +color.background=#FFFFFF +color.text=#333333 + +# Keys with dots (hierarchical) +spring.datasource.url=jdbc:postgresql://localhost/db +spring.datasource.username=admin +spring.datasource.password=secret123 + +# Mixed separators +key1=value1 +key2: value2 +key3 value3 + +# Escape sequences in values +newline.char=line1\nline2 +tab.char=col1\tcol2 +backslash=C:\\Program Files\\App +unicode.smile=\u263A + +# Longer substitution examples +build.output=${project.dir}/build/${build.type} +classpath=${lib.dir}/*.jar:${src.dir} + +# Number formats +integer.val=42 +negative.val=-100 +decimal.val=3.14 diff --git a/tests/syntax/samples/proto-report.md b/tests/syntax/samples/proto-report.md new file mode 100644 index 0000000000..a5c50b52d5 --- /dev/null +++ b/tests/syntax/samples/proto-report.md @@ -0,0 +1,67 @@ +Proto syntax highlighting: TS vs Legacy comparison +=================================================== + +Sample file: `tests/syntax/samples/proto.proto` +Legacy reference: `misc/syntax/protobuf.syntax` +TS query: `misc/syntax-ts/queries-override/proto-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[proto]` + +Aligned with legacy +------------------- + +- Language keywords (`syntax`, `package`, `import`, `public`, `message`, `enum`, + `service`, `rpc`, `returns`, `stream`, `oneof`, `map`, `reserved`, + `extensions`, `option`, `optional`, `required`, `repeated`, `extend`, `to`, + `max`): `yellow` via `@keyword` - MATCH +- Type keywords (`int32`, `int64`, `uint32`, `uint64`, `sint32`, `sint64`, + `fixed32`, `fixed64`, `sfixed32`, `sfixed64`, `bool`, `string`, `double`, + `float`, `bytes`): `yellow` via `@keyword` - MATCH +- Comments (`// ...`): `brown` via `@comment` - MATCH +- Strings (`"proto3"`, `"google/protobuf/..."`, `"old_field"`): `green` via + `@string` - MATCH +- Assignment operator (`=`): `yellow` via `@operator.word` - MATCH +- Semicolons (`;`): `brightmagenta` via `@delimiter.special` - MATCH +- Brackets/parens/square brackets (`{`, `}`, `(`, `)`, `[`, `]`): `brightcyan` + via `@delimiter` - MATCH +- Punctuation (`,`, `:`, `.`, `<`, `>`): `brightcyan` via `@delimiter` - MATCH + +Intentional improvements over legacy +------------------------------------- + +- TS colors message/enum/service/rpc names (`User`, `Status`, `Organization`, + `UserService`, `GetUser`, etc.) as `brightcyan` via `@tag`. Legacy leaves + these names uncolored (default foreground). This provides better visual + distinction for type definitions. +- TS colors boolean literals (`true`, `false`) as `yellow` via `@keyword`. + Legacy does not recognize boolean literals as special tokens. +- TS handles `to` and `max` keywords in `extensions 100 to max` as `yellow`. + Legacy does not highlight these. +- TS treats the `syntax` keyword consistently as `yellow`. Legacy leaves it + uncolored since it was not in the keyword list (only `//` comment prefix was + matched for `syntax` in the comment context). +- TS would color escape sequences (`\\`, `\"`) in strings as `brightgreen` via + `@string.special`. Legacy does not highlight escape sequences within strings. +- TS colors dots in qualified names (`google.protobuf. Timestamp`) as + `brightcyan` delimiters. Legacy leaves dots as default. + +Differences from legacy +----------------------- + +- TS does not color `reserved` string literals (`"old_field"`, `"legacy_field"`) + as `green` -- they appear unquoted without the `@string` capture. Looking at + the dump output, TS shows these without `` tags while legacy properly + wraps them. This appears to be a minor parse issue where reserved string + values may not be recognized as `(string)` nodes by the grammar. + +Known shortcomings +------------------ + +- Reserved string values (`"old_field"`, `"legacy_field"`) appear without + `green` coloring in TS output, though legacy correctly colors them. The + tree-sitter proto grammar may not emit `(string)` nodes for reserved field + name strings. +- TS does not distinguish between field numbers and other numeric literals -- + both are uncolored (default foreground). This matches legacy behavior, so it + is not a regression. +- Neither TS nor legacy highlights field numbers or numeric option values with a + distinct color. diff --git a/tests/syntax/samples/proto.proto b/tests/syntax/samples/proto.proto new file mode 100644 index 0000000000..026fec7a49 --- /dev/null +++ b/tests/syntax/samples/proto.proto @@ -0,0 +1,149 @@ +// Protocol Buffers sample file +// Exercises all TS captures for proto grammar + +syntax = "proto3"; + +package example.api.v1; + +import "google/protobuf/timestamp.proto"; +import public "google/protobuf/empty.proto"; + +// Enum definition +enum Status { + STATUS_UNKNOWN = 0; + STATUS_ACTIVE = 1; + STATUS_INACTIVE = 2; + STATUS_DELETED = 3; +} + +// Simple message +message User { + string name = 1; + int32 age = 2; + bool active = 3; + Status status = 4; + bytes avatar = 5; + double score = 6; + float rating = 7; +} + +// Message with nested types +message Organization { + string id = 1; + string display_name = 2; + + // Nested enum + enum Role { + ROLE_UNSPECIFIED = 0; + ROLE_ADMIN = 1; + ROLE_MEMBER = 2; + } + + // Nested message + message Member { + string user_id = 1; + Role role = 2; + google.protobuf.Timestamp joined_at = 3; + } + + repeated Member members = 3; +} + +// Message with oneof +message SearchRequest { + string query = 1; + int32 page_size = 2; + int64 offset = 3; + + oneof filter { + string category = 4; + int32 status_code = 5; + bool active_only = 6; + } +} + +// Message with map fields +message Config { + map labels = 1; + map flags = 2; + map user_cache = 3; +} + +// Message with various integer types +message TypeShowcase { + uint32 unsigned_val = 1; + uint64 big_unsigned = 2; + sint32 signed_val = 3; + sint64 big_signed = 4; + fixed32 fixed_val = 5; + fixed64 big_fixed = 6; + sfixed32 sfixed_val = 7; + sfixed64 big_sfixed = 8; +} + +// Field options and message options +message Annotated { + option deprecated = true; + + string value = 1 [deprecated = true]; + repeated string tags = 2; + reserved 3, 4, 5; + reserved "old_field", "legacy_field"; +} + +// Boolean literals +message Defaults { + bool enabled = 1; // true or false +} + +// Escape sequence in string +message WithDescription { + string desc = 1; // "line1\nline2\ttab" +} + +// Service definition with RPCs +service UserService { + rpc GetUser (GetUserRequest) returns (User); + rpc ListUsers (ListUsersRequest) returns (ListUsersResponse); + rpc CreateUser (CreateUserRequest) returns (User); + rpc DeleteUser (DeleteUserRequest) returns (google.protobuf.Empty); + rpc StreamUpdates (StreamRequest) returns (stream User); +} + +message GetUserRequest { + string id = 1; +} + +message ListUsersRequest { + int32 page_size = 1; + string page_token = 2; +} + +message ListUsersResponse { + repeated User users = 1; + string next_page_token = 2; +} + +message CreateUserRequest { + string name = 1; + optional string email = 2; +} + +message DeleteUserRequest { + string id = 1; +} + +message StreamRequest { + repeated string user_ids = 1; +} + +// Extensions and max +message Extensible { + extensions 100 to max; +} + +// String with escape sequences +message EscapeTest { + string path = 1; // "C:\\Users\\test" + string quoted = 2; // "say \"hello\"" +} diff --git a/tests/syntax/samples/puppet-report.md b/tests/syntax/samples/puppet-report.md new file mode 100644 index 0000000000..3b7166ec2f --- /dev/null +++ b/tests/syntax/samples/puppet-report.md @@ -0,0 +1,74 @@ +Puppet syntax highlighting: TS vs Legacy comparison +==================================================== + +Sample file: `tests/syntax/samples/puppet.pp` +Legacy reference: `misc/syntax/puppet.syntax` +TS query: `misc/syntax-ts/queries-override/puppet-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[puppet]` + +NOTE: The TS puppet grammar produces significantly broken output. Large sections + of code are mis-parsed, resulting in `` (error) color flooding most of + the file. The upstream tree-sitter-puppet grammar has substantial parsing + issues with common Puppet constructs. + +Aligned with legacy +------------------- + +Very few elements align due to pervasive parse errors. Where parsing succeeds +partially: + +- The `class` keyword is recognized but colored `red` via `@error` rather than + the intended `yellow` via `@keyword`. Legacy colors it `red` as a type. +- String literals in single quotes get `brightgreen` in some places where + parsing succeeds, partially matching legacy's `brightgreen` for single-quoted + strings. +- The `String` type annotation gets `yellow` via `@type` in a few places, + matching legacy's yellow for language keywords. + +Differences from legacy +----------------------- + +- TS floods most of the file with `red` error color. Legacy provides clean, + consistent coloring throughout the entire file with proper keyword, operator, + variable, string, and comment highlighting. +- Legacy correctly colors variables (`$package_name`, `$port`, etc.) as `white`. + TS either misses them entirely or they fall within error spans. +- Legacy colors operators (`=>`, `==`, `!=`, `+=`, `->`, `~>`) as `yellow`. TS + rarely reaches these tokens due to parse failures. +- Legacy colors conditionals (`if`, `elsif`, `else`, `case`, `default`, + `unless`) as `yellow`. TS mostly shows these within error-colored blocks. +- Legacy colors boolean values (`true`, `false`, `undef`) as `brightred`. TS + does not reliably parse these. +- Legacy distinguishes resource types (`file`, `package`, `service`) in `red`, + meta parameters (`require`, `notify`, `subscribe`) in `brightmagenta`, and + functions (`template`, `notice`, `fail`, `epp`, `include`, `each`) in + `brightred`. TS collapses most of these into error spans. +- Legacy properly handles string interpolation (`${variable}`) with `white` + variable coloring inside `green` double-quoted strings. TS does not reach + interpolation handling due to parse errors. +- Legacy colors comments (`# ...`) as `brown`. TS colors them as `red` (error) + in most cases because the parser fails before reaching comment nodes. +- Legacy colors brackets, braces, and parentheses as `brightcyan`. TS + occasionally gets these right but mostly they are absorbed by error regions. + +Known shortcomings +------------------ + +- The tree-sitter-puppet grammar has severe parsing issues. Most Puppet + constructs (class definitions, resource declarations, defined types, node + definitions, conditionals, selectors, collectors, lambdas, function + declarations) fail to parse correctly, resulting in `(ERROR)` nodes that map + to `red` via the `@error` capture. +- The TS output is essentially unusable for real Puppet files. The grammar needs + significant upstream fixes before it can match legacy quality. +- Specific constructs that fail: class with `inherits`, resource declarations + with hash parameters, `case` statements, `unless` blocks, node definitions + with regex, resource collectors (`<| |>`, `<<| |>>`), chaining arrows (`->`, + `~>`), lambdas with `each`, and function declarations with return type + annotations. +- The `@punctuation.special` capture for `$`, `@`, `@@`, and `${` interpolation + delimiters is never exercised due to parse failures. +- The `@string.escape`, `@string.regex`, `@float`, `@number`, `@namespace`, + `@method`, `@method.call`, `@function`, `@function.call`, `@variable`, + `@variable.builtin`, `@boolean`, `@conditional`, `@keyword.operator`, and + `@keyword.function` captures are mostly unreachable. diff --git a/tests/syntax/samples/puppet.pp b/tests/syntax/samples/puppet.pp new file mode 100644 index 0000000000..49516e7d42 --- /dev/null +++ b/tests/syntax/samples/puppet.pp @@ -0,0 +1,154 @@ +# Puppet sample file exercising all TS captures + +# Class definition with parameters +class mymodule::webserver ( + String $package_name = 'nginx', + Integer $port = 80, + Boolean $ssl_enabled = false, + Array[String] $vhosts = [], + Hash $options = {}, +) inherits mymodule::params { + + # Package resource + package { $package_name: + ensure => installed, + } + + # File resource with variable interpolation + file { '/etc/nginx/nginx.conf': + ensure => file, + owner => 'root', + group => 'root', + mode => '0644', + content => template('mymodule/nginx.conf.erb'), + require => Package[$package_name], + notify => Service['nginx'], + } + + # Service resource + service { 'nginx': + ensure => running, + enable => true, + subscribe => File['/etc/nginx/nginx.conf'], + } + + # Conditional with if/elsif/else + if $ssl_enabled { + file { '/etc/nginx/ssl': + ensure => directory, + mode => '0700', + } + } elsif $port == 443 { + notify { 'SSL port without SSL enabled': } + } else { + notice('Running without SSL') + } + + # Case statement + case $facts['os']['family'] { + 'RedHat': { + $config_path = '/etc/nginx/conf.d' + } + 'Debian': { + $config_path = '/etc/nginx/sites-enabled' + } + default: { + fail("Unsupported OS: ${facts['os']['family']}") + } + } +} + +# Defined type +define mymodule::vhost ( + String $server_name = $title, + Integer $listen_port = 80, + String $docroot = '/var/www/html', + Optional[String] $ssl_cert = undef, +) { + file { "/etc/nginx/sites-available/${server_name}": + ensure => file, + content => epp('mymodule/vhost.epp', { + 'server_name' => $server_name, + 'listen_port' => $listen_port, + 'docroot' => $docroot, + }), + } + + # Unless conditional + unless $ssl_cert == undef { + file { "/etc/ssl/${server_name}.crt": + ensure => file, + source => $ssl_cert, + } + } +} + +# Node definition +node 'webserver01.example.com' { + include mymodule::webserver + + mymodule::vhost { 'default': + server_name => 'www.example.com', + listen_port => 8080, + } +} + +# Node with regex +node /^db\d+\.example\.com$/ { + class { 'mysql::server': + root_password => 'secret', + } +} + +# Variables and expressions +$base_path = '/opt/app' +$full_path = "${base_path}/config" +$numbers = [1, 2, 3, 4, 5] +$config = { + 'key1' => 'value1', + 'key2' => 'value2', +} + +# Operators +$result = (10 + 5) * 2 - 3 / 1 +$match = $facts['os']['name'] =~ /Ubuntu/ +$no_match = $facts['os']['name'] !~ /CentOS/ +$combined = $a and $b or !$c +$compare = $x >= 10 and $y <= 20 +$equal = $a == $b +$not_equal = $a != $b + +# Selector expression +$package = $facts['os']['family'] ? { + 'RedHat' => 'httpd', + 'Debian' => 'apache2', + default => 'httpd', +} + +# Resource collectors +File <| title == '/tmp/test' |> +Package <<| tag == 'webserver' |>> + +# Resource chaining +Package['nginx'] -> File['/etc/nginx/nginx.conf'] + ~> Service['nginx'] + +# Lambda with each +$numbers.each |Integer $index, $value| { + notice("Item ${index}: ${value}") +} + +# Builtin types +Integer[1, 100] $bounded_int = 42 +Float $pi = 3.14 +Regexp $pattern = /^test/ +Variant[String, Integer] $flexible = 'hello' + +# Function declaration +function mymodule::helper(String $input) >> String { + return "processed: ${input}" +} + +# Include and require +include stdlib +require apt diff --git a/tests/syntax/samples/python-report.md b/tests/syntax/samples/python-report.md new file mode 100644 index 0000000000..a98cc91da5 --- /dev/null +++ b/tests/syntax/samples/python-report.md @@ -0,0 +1,82 @@ +Python syntax highlighting: TS vs Legacy comparison report +=========================================================== + +Sample file: `python.py` +Legacy reference: `misc/syntax/python.syntax` +TS query: `misc/syntax-ts/queries-override/python-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[python]` + +Aligned with legacy +------------------- + +- Language keywords (`if`, `else`, `elif`, `for`, `while`, `def`, `class`, + `return`, `import`, `from`, `as`, `in`, `is`, `not`, `and`, `or`, `break`, + `continue`, `pass`, `raise`, `try`, `except`, `finally`, `with`, `del`, + `global`, `nonlocal`, `lambda`, `yield`, `assert`, `async`, `await`): `yellow` + - MATCH. +- Operators (`+`, `-`, `*`, `**`, `/`, `//`, `%`, `=`, `==`, `!=`, `<`, `>`, + `<=`, `>=`, `+=`, `-=`, `*=`, `/=`, `//=`, `%=`, `**=`, `<<=`, `>>=`, `|`, + `&`, `^`, `~`, `<<`, `>>`, `->`, `:=`): `yellow` - MATCH. +- Colon `:`: `brightred` - MATCH. +- Brackets/parens (`(`, `)`, `[`, `]`, `{`, `}`): `brightcyan` - MATCH. +- Comma `,`: `brightcyan` - MATCH. +- Semicolons `;`: `brightmagenta` - MATCH. +- Strings (double-quoted `"..."`, single-quoted `'...'`, triple-quoted + `"""..."""` and `'''...'''`): `green` - MATCH. +- Escape sequences (`\n`, `\t`, `\\`): `brightgreen` - MATCH. +- Comments (`# ...`): `brown` - MATCH. +- `self` keyword: `brightred` - MATCH. +- Dunder methods (`__init__`, `__repr__`, `__str__`, `__len__`, `__getitem__`): + `lightgray` - MATCH. +- Decorators (`@staticmethod`, `@classmethod`, `@property`): `brightred` - + MATCH. + +Intentional improvements over legacy +------------------------------------- + +- Dunder names used as attributes (e.g., `__slots__`): TS colors these as + `brightred` via the `variable.builtin` capture when they match `^__.*__$` in + identifier context. Legacy colors them as `lightgray` only when they appear as + whole keywords. Both are reasonable; TS is more consistent since it applies to + any dunder-pattern identifier. +- Dot `.` operator: legacy colors `.` as `white/Orange` (a custom color). TS + does not explicitly capture dots, leaving them as default text. This avoids + the unusual custom color. +- Bitwise operators `|`, `&`, `^`, `~`: TS colors these as `yellow` (via + `operator.word`). Legacy does not color `|`, `&`, `^` (they appear as + default). TS provides more complete operator coverage. +- Compound assignment `&=`, `|=`, `^=`: TS colors these as `yellow`. Legacy only + colors the `=` portion as `yellow`, leaving the `&`/`|`/`^` prefix uncolored. + TS treats the compound operator as a single token. +- `@` decorator syntax: TS colors the `@` as part of the decorator node and the + decorator name as `brightred` via `function.special`. Legacy colors `@` + followed by a known builtin name as `brightcyan` (from the builtins list), not + as a decorator. TS is structurally more accurate. +- Walrus operator `:=`: TS colors it as `yellow` (unified operator). Legacy + colors `:` as `brightred` and `=` as `yellow` separately, creating a + split-color effect. + +Known shortcomings +------------------ + +- Builtin functions (`len`, `range`, `open`, `print`, `int`, `str`, `list`, + `dict`, `type`, `isinstance`, etc.): legacy colors these as `brightcyan` via a + hardcoded keyword list of ~70 builtins. TS does not distinguish builtin + functions from user-defined functions. `print` is colored as `yellow` by + legacy (as a keyword) but uncolored by TS. +- String method names (`split`, `join`, `strip`, `upper`, `lower`, etc.): legacy + colors these as `magenta` via a keyword list. TS does not color method names. +- Format specifiers in strings (`%s`, `%d`, etc.): legacy colors these as + `brightgreen` inside string contexts. TS does not parse format specifiers + within string nodes. +- F-string interpolation (`{self.name}`): TS colors the `{` and `}` delimiters + inside f-strings as `brightcyan` and `self` as `brightred`. Legacy colors the + entire f-string uniformly as `green`. TS provides richer f-string + highlighting. +- Concatenated strings (`"hello " "world"`): TS captures both string parts as + `green` via `concatenated_string`. Legacy also colors them `green`. Both + match, though TS has a dedicated node type for this. +- The `__eq__` dunder in `def __eq__` line: TS colors it as `brightred` + (matching `^__.*__$` via `variable.builtin`) rather than `lightgray` (which + the `constant` capture would give). Legacy colors it as `brightred` (via + `keyword whole __+__`). Both are `brightred` here - MATCH. diff --git a/tests/syntax/samples/python.py b/tests/syntax/samples/python.py new file mode 100644 index 0000000000..aa3608f0ea --- /dev/null +++ b/tests/syntax/samples/python.py @@ -0,0 +1,180 @@ +# Comment: demonstrate all Python syntax highlighting features +# This file exercises every TS capture from python-highlights.scm + +import os +from sys import argv as args + +# Keywords +def greet(name): + if name is not None: + print("Hello, " + name) + elif name == "": + pass + else: + raise ValueError("empty") + +class Animal: + """Docstring for Animal class.""" + + def __init__(self, name): + self.name = name + + def __repr__(self): + return f"Animal({self.name})" + + def __str__(self): + return self.name + + def __len__(self): + return len(self.name) + + def __eq__(self, other): + return self.name == other.name + +# Async/await +async def fetch_data(): + await some_coroutine() + return True + +# Loop keywords +for i in range(10): + if i == 5: + continue + if i == 8: + break + +while True: + break + +# Try/except/finally +try: + x = 1 / 0 +except ZeroDivisionError: + pass +finally: + del x + +# With statement +with open("/dev/null") as f: + pass + +# Lambda +square = lambda x: x * x + +# Yield +def gen(): + yield 1 + yield from range(5) + +# Global/nonlocal +counter = 0 +def increment(): + global counter + counter += 1 + +def outer(): + x = 10 + def inner(): + nonlocal x + x += 1 + +# Assert +assert True, "should be true" + +# Operators +a = 10 +b = 20 +_ = a + b +_ = a - b +_ = a * b +_ = a ** 2 +_ = a / b +_ = a // b +_ = a % b +_ = a | b +_ = a & b +_ = a ^ b +_ = ~a +_ = a << 2 +_ = a >> 2 +_ = a == b +_ = a != b +_ = a < b +_ = a > b +_ = a <= b +_ = a >= b +c = 0 +c += 1 +c -= 1 +c *= 2 +c /= 2 +c //= 2 +c %= 3 +c **= 2 +c <<= 1 +c >>= 1 +c &= 0xFF +c |= 0x01 +c ^= 0x10 + +# Walrus operator +if (n := len("hello")) > 3: + pass + +# Type annotation arrow +def add(x: int, y: int) -> int: + return x + y + +# At operator (matrix multiply) +# result = matrix_a @ matrix_b + +# Colon +data: dict = {"key": "value"} + +# Brackets and delimiters +lst = [1, 2, 3] +tpl = (4, 5, 6) +st = {7, 8, 9} + +# Semicolons +a = 1; b = 2; c = 3 + +# Strings +s1 = "double quoted" +s2 = 'single quoted' +s3 = """triple double +quoted""" +s4 = '''triple single +quoted''' + +# Escape sequences +s5 = "newline\n tab\t backslash\\" + +# self reference +class Point: + def __init__(self, x, y): + self.x = x + self.y = y + +# Dunder names +class Custom: + __slots__ = ("_value",) + def __getitem__(self, key): + return self._value + +# Decorators +@staticmethod +def helper(): + pass + +class Decorated: + @classmethod + def create(cls): + return cls() + + @property + def value(self): + return 42 + +# Concatenated strings +msg = ("hello " "world") diff --git a/tests/syntax/samples/r-report.md b/tests/syntax/samples/r-report.md new file mode 100644 index 0000000000..7c19991173 --- /dev/null +++ b/tests/syntax/samples/r-report.md @@ -0,0 +1,87 @@ +R syntax highlighting: TS vs Legacy comparison report +===================================================== + +Sample file: `tests/syntax/samples/r.r` +Legacy reference: `misc/syntax/r.syntax` +TS query: `misc/syntax-ts/queries-override/r-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[r]` + +Aligned with legacy +------------------- + +- Comments (`# ...`): `brown` via `@comment` - MATCH +- Assignment operator `<-`: `brightred` via `@function.special` - MATCH +- Global assignment `<<-`: `brightred` via `@function.special` - MATCH +- Right assignment `->`: `brightred` via `@function.special` - MATCH +- Right global assignment `->>`: `brightred` via `@function.special` - MATCH +- `function` keyword: `yellow` via `@keyword` - MATCH +- Control flow `if`, `for`, `while`: `brightmagenta` via `@keyword.control` - + MATCH +- Arithmetic operators (`+`, `-`, `*`, `/`, `^`): `yellow` via `@operator.word` + - MATCH +- Comparison operators (`==`, `!=`, `<`, `>`, `<=`, `>=`): `yellow` via + `@operator.word` - MATCH +- Logical operators (`&`, `&&`, `|`, `||`, `!`): `yellow` via `@operator.word` - + MATCH +- Strings (`"hello"`, `"alpha"`, etc.): `brightgreen` via `@string.special` - + MATCH +- Colon operator (`:`): `yellow` via `@operator.word` - MATCH +- Dollar sign (`$`): `yellow` via `@operator.word` - MATCH +- Tilde (`~`): `yellow` via `@operator.word` - MATCH +- Pipe operator (`|>`): `yellow` via `@operator.word` - MATCH +- Namespace operators (`::`, `:::`): `yellow` via `@operator.word` - MATCH +- Function calls (`library`, `c`, `seq`, `rep`, `data.frame`, `paste`, `mean`, + `sum`, `sqrt`, `plot`, `hist`, `boxplot`, `apply`, `sapply`, `tapply`, + `lapply`, `lm`, `summary`, `factor`, `levels`, `list`, `names`, `str`, + `read.table`, `write.table`, `grep`, `gsub`, `abs`, `log`, `round`, `max`, + `min`, `range`, `length`, `cor`, `t.test`, `matrix`, `rnorm`, etc.): `yellow` + via `@keyword` on `call > function: (identifier)` - MATCH + +Intentional improvements over legacy +------------------------------------- + +- TS colors `repeat`, `in`, `return`, `next`, `break` as `brightmagenta` via + `@keyword.control`. Legacy colors `repeat` as unrecognized (default), `return` + as `yellow`, and does not specifically highlight `next`/`break`. TS provides + consistent control flow coloring. +- TS colors special constants `TRUE`, `FALSE`, `NA`, `NULL`, `Inf`, `NaN` as + `lightgray` via `@constant`. Legacy does not recognize these as special + tokens, leaving them uncolored. TS makes them visually distinct. +- TS colors `=` (used for assignment in function arguments) as `brightred` via + `@function.special`, consistent with `<-`. Legacy colors `=` as `red` + (slightly different shade). TS unifies all assignment operators. +- TS colors `<=` and `>=` as complete `yellow` tokens. Legacy splits them -- `<` + or `>` as `yellow` then `=` as `red` -- causing inconsistent coloring. +- TS colors commas as `brightcyan` via `@delimiter`, providing subtle + punctuation visibility. Legacy leaves commas uncolored. +- TS colors namespace LHS identifiers (`stats`, `base`) as `yellow` via `@type`. + Legacy colors the `::` and everything after as a single yellow token but does + not distinguish the namespace part. +- TS colors property names after `$` (e.g. `$name`, `$age`, `$score`) as + `brightcyan` via `@property`. Legacy leaves these as default color. +- TS colors named parameters in function calls (e.g. `by`, `times`, `nrow`, + `ncol`, `sep`, `data`, `header`, `breaks`, `digits`, `mu`) as `brightcyan` via + `@property`. Legacy only colors some of these (like `nrow`, `ncol`) if they + happen to match known function names. +- TS recognizes `toupper`, `require`, `filter`, `select` as function calls and + colors them `yellow`. Legacy misses `toupper`, `require`, and dplyr verbs + since they are not in its hardcoded function list. + +Differences from legacy +----------------------- + +- Legacy colors parentheses `(` `)` as `brightcyan` and brackets `[` `]` as + `brightblue`, and braces `{` `}` as `white`. TS does not explicitly color + brackets/braces/parentheses (they remain default color). This is a minor loss + of visual structure. + +Known shortcomings +------------------ + +- TS does not color brackets `[` `]`, braces `{` `}`, or parentheses `(` `)` + with distinct colors. Legacy uses three different colors for these three + bracket types, which aids readability. +- Neither TS nor legacy highlights numeric literals with a distinct color -- + numbers remain in default foreground. +- TS does not distinguish single-quoted strings from double-quoted strings. Both + get `brightgreen` via `@string.special`, which matches legacy behavior. diff --git a/tests/syntax/samples/r.r b/tests/syntax/samples/r.r new file mode 100644 index 0000000000..96c2446c9c --- /dev/null +++ b/tests/syntax/samples/r.r @@ -0,0 +1,158 @@ +# R language sample file exercising all TS captures + +# Library imports +library(ggplot2) +library(dplyr) +require(stats) + +# Variable assignment with different operators +x <- 10 +y <<- 20 +30 -> z +40 ->> w +name = "hello" + +# Function definition +add_numbers <- function(a, b) { + result <- a + b + return(result) +} + +# Function with default parameters +greet <- function(name = "World", excited = FALSE) { + if (excited) { + msg <- paste("Hello,", name, "!") + } else { + msg <- paste("Hello,", name) + } + msg +} + +# Vectors and sequences +nums <- c(1, 2, 3, 4, 5) +chars <- c("alpha", "beta", "gamma") +seq_vals <- seq(1, 100, by = 5) +rep_vals <- rep(0, times = 10) + +# Data frame creation +df <- data.frame( + name = c("Alice", "Bob", "Charlie"), + age = c(30, 25, 35), + score = c(92.5, 88.0, 95.3) +) + +# Accessing elements +first_name <- df$name[1] +ages <- df[, "age"] +subset_df <- df[df$age > 25, ] + +# If/else conditional +check_value <- function(x) { + if (x > 100) { + "high" + } else if (x > 50) { + "medium" + } else { + "low" + } +} + +# For loop +total <- 0 +for (i in 1:10) { + total <- total + i +} + +# While loop +count <- 0 +while (count < 5) { + count <- count + 1 +} + +# Repeat with break and next +repeat { + count <- count - 1 + if (count == 3) next + if (count <= 0) break +} + +# Apply family +mat <- matrix(1:12, nrow = 3, ncol = 4) +row_sums <- apply(mat, 1, sum) +col_means <- apply(mat, 2, mean) +squared <- sapply(nums, function(x) x^2) +named_list <- list(a = 1:3, b = 4:6) +result <- lapply(named_list, length) +tapply(df$score, df$name, mean) + +# Statistical functions +avg <- mean(nums) +std_dev <- sqrt(sum((nums - avg)^2) / length(nums)) +correlation <- cor(df$age, df$score) +t_result <- t.test(nums, mu = 3) + +# String operations +upper <- toupper("hello") +pasted <- paste("x", "y", sep = "-") +found <- grep("^A", df$name) +replaced <- gsub("o", "0", "hello world") + +# Comparison and logical operators +a <- TRUE +b <- FALSE +and_result <- a & b +or_result <- a | b +not_result <- !a +double_and <- a && b +double_or <- a || b +eq <- x == y +neq <- x != y +lt <- x < y +gt <- x > y +lte <- x <= y +gte <- x >= y + +# Special values +na_val <- NA +null_val <- NULL +inf_val <- Inf +nan_val <- NaN + +# Namespace access +stats::median(nums) +base::print("hello") +base:::internal_fn + +# Pipe operator +df |> filter(age > 25) |> select(name, score) + +# Tilde formula +model <- lm(score ~ age, data = df) +summary(model) + +# Math operations +abs_val <- abs(-5) +log_val <- log(100) +sqrt_val <- sqrt(16) +round_val <- round(3.14159, digits = 2) +max_val <- max(nums) +min_val <- min(nums) +range_vals <- range(nums) + +# Plotting functions +plot(df$age, df$score, main = "Age vs Score") +hist(rnorm(1000), breaks = 30) +boxplot(score ~ name, data = df) + +# Read and write +read.table("data.txt", header = TRUE) +write.table(df, "output.csv", sep = ",") + +# Factor and levels +categories <- factor(c("low", "med", "high", "low")) +lvls <- levels(categories) + +# List operations +my_list <- list(x = 1, y = "two", z = TRUE) +names(my_list) +str(my_list) diff --git a/tests/syntax/samples/ruby-report.md b/tests/syntax/samples/ruby-report.md new file mode 100644 index 0000000000..dcdb0f3339 --- /dev/null +++ b/tests/syntax/samples/ruby-report.md @@ -0,0 +1,84 @@ +Ruby syntax highlighting: TS vs Legacy comparison report +======================================================== + +Sample file: `tests/syntax/samples/ruby.rb` +Legacy reference: `misc/syntax/ruby.syntax` +TS query: `misc/syntax-ts/queries-override/ruby-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[ruby]` + +Aligned with legacy +------------------- + +- Keywords (`if`, `elsif`, `else`, `unless`, `while`, `until`, `for`, `in`, + `case`, `when`, `do`, `end`, `begin`, `rescue`, `ensure`, `retry`, `break`, + `next`, `redo`, `return`, `def`, `class`, `module`, `alias`) -> `magenta` - + MATCH. +- `self` and `super` -> `magenta` - MATCH. +- `nil`, `true`, `false` -> `brightred` via `function.special` - MATCH. +- Built-in methods (`require`, `require_relative`, `include`, `attr_reader`, + `attr_writer`, `attr_accessor`, `public`, `private`, `protected`, `raise`) -> + `magenta` - MATCH. +- Arithmetic operators (`+`, `-`, `*`, `/`, `%`, `**`) -> `yellow` - MATCH. +- Comparison operators (`==`, `!=`, `===`, `<=>`, `<`, `>`, `<=`, `>=`) -> + `yellow` - MATCH. +- Logical operators (`&&`, `||`, `!`, `&`, `|`, `^`, `~`) -> `yellow` - MATCH. +- Bitshift operators (`<<`, `>>`) -> `yellow` - MATCH. +- Range operators (`..`, `...`) -> `yellow` - MATCH. +- Assignment operators (`=`, `+=`, `-=`, `*=`, `/=`, `||=`, `&&=`) -> `yellow` - + MATCH. +- Pattern matching operators (`=~`, `!~`) -> `yellow` - MATCH. +- Hash rocket (`=>`) -> `yellow` - MATCH. +- Brackets/parens (`(`, `)`, `[`, `]`, `{`, `}`) -> `brightcyan` - MATCH. +- Delimiters (`,`, `:`, `::`) -> `brightcyan` - MATCH. +- Double-quoted strings -> `green` - MATCH. +- Single-quoted strings -> `green` (TS) vs `brightgreen` (legacy) - MINOR + MISMATCH. +- Symbols (`:symbol`, hash keys) -> `white` - MATCH. +- Global variables (`$global_var`) -> `brightgreen` - MATCH. +- Instance variables (`@instance_var`) -> `white` - MATCH. +- Comments (`#`) -> `brown` - MATCH. +- `not`, `and`, `or` keyword operators -> `magenta` - MATCH. +- `yield` -> `magenta` - MATCH. +- `undef` -> `magenta` - MATCH. +- `BEGIN`/`END` -> `magenta` (TS) vs `red` (legacy) - MISMATCH. + +Intentional improvements over legacy +------------------------------------- + +- TS properly highlights `BEGIN`/`END` as `magenta` keywords, consistent with + other keywords, while legacy uses `red`. +- TS highlights escape sequences inside strings (e.g., `\t`, `\n`) as + `brightgreen` via `string.special`, giving better visibility into string + content. +- TS correctly colors regex content as `brightgreen` via `string.special`, while + legacy does not distinguish regex internals. +- TS highlights `extend` as `magenta` via the `#match?` identifier rule, while + legacy does not recognize it. +- TS correctly highlights `puts` calls contextually, while legacy colors them as + `yellow` (function) -- both approaches are reasonable. +- TS highlights heredoc body content as `green` string, matching string + semantics. +- TS highlights `%w[...]` word arrays as `green` string content. +- TS recognizes `:"dynamic symbol"` as a symbol construct. +- TS highlights hash key symbols (e.g., `a:` in `{a: 1}`) as `white` via + `hash_key_symbol`. + +Known shortcomings +------------------ + +- TS does not distinguish single-quoted strings (`brightgreen` in legacy) from + double-quoted strings (`green`) -- both render as `green`. +- TS renders shebang line as `brown` (comment) while legacy uses + `brightcyan on black`. +- TS does not highlight `puts` as `yellow` function -- appears uncolored in some + positions where legacy colors it consistently. +- Legacy colors many class/module methods as `yellow` (e.g., `new`, `close`, + `open`, `print`, `puts`, `eval`, `sort`, `push`, `pop`) that TS does not + explicitly list. +- TS does not highlight system/special variables (`$DEBUG`, `$VERBOSE`, `STDIN`, + `STDOUT`, `STDERR`, etc.) with `red` as legacy does. +- TS does not highlight math module constants (`Math::PI`, `Math::E`) or numeric + class methods (`abs`, `modulo`) with color as legacy does. +- Block delimiter `|` for block parameters shows as `yellow` (operator) in both + TS and legacy, but in TS it comes from the operator list while legacy gets it + from the `|` operator keyword. diff --git a/tests/syntax/samples/ruby.rb b/tests/syntax/samples/ruby.rb new file mode 100644 index 0000000000..6e7611bd77 --- /dev/null +++ b/tests/syntax/samples/ruby.rb @@ -0,0 +1,193 @@ +#!/usr/bin/ruby +# Sample file demonstrating Ruby syntax highlighting features. +# Exercises all captures from the TS query override. + +# Keywords -> magenta +require 'json' +require_relative 'helper' + +module MyModule + class MyClass + attr_reader :name + attr_writer :age + attr_accessor :email + + def initialize(name, age) + @name = name + @age = age + self + end + + def greet + return "Hello, #{@name}" + end + + public + def public_method + yield if block_given? + end + + private + def private_method + super + end + + protected + def protected_method + nil + end + end +end + +# Control flow +if true + puts "true" +elsif false + puts "false" +else + puts "other" +end + +unless nil + puts "not nil" +end + +# Loops +while true + break +end + +until false + break +end + +for i in 0..10 + next if i == 5 + redo if i == 3 +end + +case 42 +when 1 + puts "one" +when 42 + puts "forty-two" +end + +# Exception handling +begin + raise "error" +rescue => e + retry if false +ensure + puts "cleanup" +end + +# Boolean and nil constants -> brightred +x = true +y = false +z = nil + +# Operators -> yellow +a = 1 + 2 +b = 5 - 3 +c = 4 * 2 +d = 10 / 2 +e = 7 % 3 +f = 2 ** 8 + +# Comparison operators +eq = (1 == 2) +neq = (1 != 2) +teq = (1 === 2) +cmp = (1 <=> 2) +lt = (1 < 2) +gt = (1 > 2) +lte = (1 <= 2) +gte = (1 >= 2) + +# Logical operators +g = true && false +h = true || false +i = !true +j = true & false +k = true | false +l = true ^ false +m = ~0 + +# Bitshift operators +n = 1 << 4 +o = 16 >> 2 + +# Range operators +range1 = 1..10 +range2 = 1...10 + +# Assignment operators +p = 0 +p += 1 +p -= 1 +p *= 2 +p /= 2 +p ||= 42 +p &&= 0 + +# Pattern matching +result = ("hello" =~ /hel/) +nomatch = ("hello" !~ /xyz/) + +# Hash rocket +hash = {a: 1, "b" => 2} + +# Brackets/parens/delimiters -> brightcyan +arr = [1, 2, 3] +hash2 = {key: "value"} +method_call(arg1, arg2) + +# Strings -> green +str1 = "double quoted" +str2 = 'single quoted' +str3 = %w[one two three] +heredoc = <<~HEREDOC + This is a heredoc + with multiple lines +HEREDOC + +# Escape sequences -> brightgreen +esc = "tab\there\nnewline" + +# Regex -> brightgreen +regex = /pattern/i + +# Symbols -> white +sym1 = :symbol +sym2 = :"dynamic symbol" +hash3 = {key: "value"} + +# Global variables -> brightgreen +$global_var = "global" + +# Instance variables -> white +@instance_var = "instance" + +# do..end blocks +[1, 2, 3].each do |item| + puts item +end + +# alias, undef, not, and, or +alias new_method greet +undef_method :old if false +result2 = not false +result3 = true and false +result4 = true or false + +# BEGIN/END +BEGIN { puts "start" } +END { puts "finish" } + +# include, extend +include Comparable if false +extend Enumerable if false + +# Comment +# This is a line comment diff --git a/tests/syntax/samples/rust-report.md b/tests/syntax/samples/rust-report.md new file mode 100644 index 0000000000..87603c0569 --- /dev/null +++ b/tests/syntax/samples/rust-report.md @@ -0,0 +1,79 @@ +Rust syntax highlighting: TS vs Legacy comparison report +========================================================= + +Sample file: `rust.rs` +Legacy reference: `misc/syntax/rust.syntax` +TS query: `misc/syntax-ts/queries-override/rust-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[rust]` + +Aligned with legacy +------------------- + +- Language keywords (`as`, `async`, `await`, `break`, `const`, `continue`, + `dyn`, `else`, `enum`, `extern`, `fn`, `for`, `if`, `impl`, `in`, `let`, + `loop`, `match`, `mod`, `move`, `pub`, `ref`, `return`, `static`, `struct`, + `trait`, `type`, `unsafe`, `use`, `where`, `while`): `yellow` - MATCH +- `crate`, `super`, `mut` as keywords: `yellow` - MATCH +- `self`: `brightgreen` - MATCH +- Boolean literals (`true`, `false`): `brightgreen` - MATCH +- Enum variants `Some`, `None`, `Ok`, `Err`: `brightgreen` - MATCH +- Primitive types (`bool`, `char`, `i8`...`i64`, `u8`...`u64`, `f32`, `f64`, + `isize`, `usize`, `str`): `brightcyan` - MATCH +- Type identifiers (`String`, `Vec`, `Option`, `Result`, `HashMap`, `Point`, + `Shape`, `Drawable`, etc.): `brightcyan` - MATCH +- Macros (`println!`, `eprintln!`, `format!`, `vec!`, `assert!`, `assert_eq!`, + `dbg!`, `todo!`, `write!`): `brightmagenta` - MATCH +- `macro_rules!` definition name: `brightmagenta` - MATCH +- Strings (double-quoted `"..."`): `green` - MATCH +- Raw strings (`r#"..."#`): `green` - MATCH +- Char literals (`'A'`, `'\n'`): `brightgreen` - MATCH +- Number literals (decimal, hex, octal, binary, float, scientific, + underscore-separated): `brightgreen` - MATCH +- Comments (line `//` and block `/* */`): `brown` - MATCH +- Attributes (`#[derive(...)]`, `#[cfg(test)]`, `#[test]`): `white` via + `tag.special` capture on `#` - MATCH +- Labels (`'outer`): `cyan` - MATCH + +Intentional improvements over legacy +------------------------------------- + +- All type identifiers (user-defined types like `Point`, `Shape`, `Drawable`, + `Tagged`, `Formatter`, `Error`, `Box`, `Clone`) are colored as `brightcyan` + via the `type_identifier` capture. Legacy only colors a hardcoded list of + common types (`String`, `Vec`, `Option`, `Result`, plus primitives). TS gives + consistent type coloring for all types. +- `Self` (capitalized) as a type: TS colors it `brightcyan` via + `type_identifier`. Legacy colors it as `yellow` (keyword). TS is more + semantically correct since `Self` refers to a type. +- `async`/`await`/`dyn`/`yield` keywords: TS includes these in the keyword list + (`yellow`). Legacy does not list `async`, `await`, or `dyn` (they would appear + uncolored). `yield` is colored `red` by legacy (reserved keyword) vs `yellow` + by TS. +- Attribute content after `#`: TS only colors the `#` as `white` and lets the + rest be default. Legacy colors the entire `#[...]` or `#![...]` context as + `white`. TS is more granular. +- `macro_rules!` body: TS colors `$x` metavariables via the grammar (the + upstream grammar handles these). Legacy colors `$+` patterns as `brightblue` + via a keyword match. TS does not replicate the `brightblue` for metavariables + but the macro name coloring is consistent. + +Known shortcomings +------------------ + +- Reserved keywords (`abstract`, `become`, `box`, `do`, `final`, `macro`, + `override`, `priv`, `sizeof`, `typeof`, `unsized`, `virtual`): legacy colors + these as `red` to flag them. TS does not distinguish reserved-for-future + keywords; they appear uncolored. This is a minor loss of feedback for code + using reserved words. +- `yield` keyword: legacy colors it `red` (reserved). TS colors it `yellow` + (regular keyword). Since `yield` is now a real keyword in nightly Rust, the TS + approach is more forward-looking. +- Lifetime annotations (`'_` in `Formatter<'_>`): TS does not have a specific + capture for lifetimes. Legacy does not color them either. Both leave lifetimes + as default text (except when they appear as labels). +- Attribute brackets `[...]` after `#`: TS only colors the `#` as `white`. The + rest of the attribute (including `[`, `]`, and content like `derive`, `cfg`) + appears as default. Legacy colors the entire `#[...]` block as `white`. This + means attribute content is less visible in TS. +- Range operator `..`: neither TS nor legacy explicitly colors the `..` range + operator. It appears as default text in both. diff --git a/tests/syntax/samples/rust.rs b/tests/syntax/samples/rust.rs new file mode 100644 index 0000000000..602792e287 --- /dev/null +++ b/tests/syntax/samples/rust.rs @@ -0,0 +1,229 @@ +// Comment: demonstrate all Rust syntax highlighting features +// This file exercises every TS capture from rust-highlights.scm + +use std::collections::HashMap; +use std::fmt; + +// Keywords: struct, impl, pub, fn, let, mut, return, if, else +pub struct Point { + pub x: f64, + pub y: f64, +} + +impl Point { + pub fn new(x: f64, y: f64) -> Self { + Point { x, y } + } + + pub fn distance(&self, other: &Point) -> f64 { + let dx = self.x - other.x; + let dy = self.y - other.y; + (dx * dx + dy * dy).sqrt() + } +} + +// Trait +pub trait Drawable { + fn draw(&self); +} + +impl Drawable for Point { + fn draw(&self) { + println!("Drawing at ({}, {})", self.x, self.y); + } +} + +impl fmt::Display for Point { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "({}, {})", self.x, self.y) + } +} + +// Enum with variants +pub enum Shape { + Circle(f64), + Rectangle(f64, f64), +} + +// Match expression +fn area(s: &Shape) -> f64 { + match s { + Shape::Circle(r) => std::f64::consts::PI * r * r, + Shape::Rectangle(w, h) => w * h, + } +} + +// Keywords: for, in, while, loop, break, continue +fn loops() { + for i in 0..10 { + if i == 5 { + continue; + } + if i == 8 { + break; + } + } + + let mut n = 0; + while n < 10 { + n += 1; + } + + loop { + break; + } +} + +// Async/await +async fn fetch_data() -> Result> { + let data = async { String::from("hello") }.await; + Ok(data) +} + +// Keywords: const, static, type, where, extern, unsafe, mod, move +const MAX_SIZE: usize = 100; +static GLOBAL: &str = "global"; + +type Pair = (T, T); + +mod inner { + pub fn helper() {} +} + +extern "C" { + fn abs(input: i32) -> i32; +} + +fn use_move() { + let data = vec![1, 2, 3]; + let closure = move || { + println!("{:?}", data); + }; + closure(); +} + +fn generic(val: T) -> T +where + T: Clone, +{ + val.clone() +} + +// Keywords: ref, as, dyn, yield +fn references() { + let x = 42; + let ref r = x; + let _ = r as &i32; + let _: Box = Box::new(Point::new(0.0, 0.0)); +} + +// self, crate, super +fn module_refs() { + let _ = crate::inner::helper; + // super::something() would refer to parent module +} + +// Boolean literals: true, false +fn booleans() -> bool { + let a = true; + let b = false; + a && !b +} + +// Macros +fn macros() { + println!("formatted: {}", 42); + eprintln!("error message"); + format!("template {}", "value"); + vec![1, 2, 3]; + assert!(true); + assert_eq!(1, 1); + dbg!(42); + todo!("not yet"); +} + +macro_rules! my_macro { + ($x:expr) => { + $x + 1 + }; +} + +// Types: type_identifier and primitive_type +fn type_examples() { + let _a: bool = true; + let _b: char = 'z'; + let _c: i8 = -1; + let _d: i16 = -2; + let _e: i32 = -3; + let _f: i64 = -4; + let _g: u8 = 1; + let _h: u16 = 2; + let _i: u32 = 3; + let _j: u64 = 4; + let _k: f32 = 1.0; + let _l: f64 = 2.0; + let _m: isize = -5; + let _n: usize = 5; + let _o: str = *"hello"; + let _p: String = String::from("world"); + let _q: Vec = Vec::new(); + let _r: Option = Some(42); + let _s: Result = Ok(0); + let _t: HashMap = HashMap::new(); +} + +// Some/None/Ok/Err +fn option_result() { + let x: Option = Some(10); + let y: Option = None; + let a: Result = Ok(1); + let b: Result = Err("fail"); + _ = (x, y, a, b); +} + +// Number literals +fn numbers() { + let _dec = 42; + let _hex = 0xFF; + let _oct = 0o77; + let _bin = 0b1010; + let _flt = 3.14; + let _sci = 1e10; + let _sep = 1_000_000; +} + +// Strings +fn strings() { + let _s1 = "hello world"; + let _s2 = r#"raw string"#; + let _c1 = 'A'; + let _c2 = '\n'; +} + +// Attributes +#[derive(Debug, Clone)] +struct Tagged { + value: i32, +} + +#[cfg(test)] +mod tests { + #[test] + fn it_works() { + assert_eq!(2 + 2, 4); + } +} + +// Labels +fn labeled_loop() { + 'outer: for i in 0..10 { + for j in 0..10 { + if i + j > 15 { + break 'outer; + } + } + } +} + +/* Block comment + spanning multiple lines */ diff --git a/tests/syntax/samples/slang-report.md b/tests/syntax/samples/slang-report.md new file mode 100644 index 0000000000..bfc426e25e --- /dev/null +++ b/tests/syntax/samples/slang-report.md @@ -0,0 +1,62 @@ +S-Lang syntax highlighting: TS vs Legacy comparison report +=========================================================== + +Sample file: `tests/syntax/samples/slang.sl` +Legacy reference: `misc/syntax/slang.syntax` +TS query: `misc/syntax-ts/queries-override/slang-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[slang]` + +Aligned with legacy +------------------- + +- Keywords (`variable`, `define`, `if`, `else`, `while`, `foreach`, `forever`, + `for`, `break`, `continue`, `return`, `switch`, `case`, `do`, `goto`, + `struct`, `typedef`, `static`, `sizeof`, `using`, `delete`, `private`, + `protected`, `public`, `namespace`) -> `white` - MATCH. +- Logical operators (`and`, `or`, `xor`, `not`) -> `white` - MATCH. +- Error blocks (`EXIT_BLOCK`, `ERROR_BLOCK`) -> `white` - MATCH. +- S-Lang types (`Char_Type`, `Integer_Type`, `String_Type`, `Double_Type`, + `Float_Type`, `Array_Type`, `Assoc_Type`) -> `yellow` - MATCH. +- Double-quoted strings -> `green` - MATCH. +- Escape sequences inside strings (`\t`, `\n`, `\\`, `\"`, `\101`) -> + `brightgreen` - MATCH. +- Format specifiers inside strings (`%s`, `%d`) -> `brightgreen` - MATCH. +- Comments (`%` to end of line) -> `brown` - MATCH. +- Preprocessor lines (`#include`, `#define`) -> `brightred` - MATCH. +- Include file in angle brackets (``) -> `red` - MATCH. +- Arithmetic operators (`+`, `-`, `*`, `/`, `=`, `==`, `!=`, `>`, `<`, `>=`) -> + `white` - MATCH. +- Semicolons -> `white` - MATCH. +- Braces (`{`, `}`) -> `brightcyan` - MATCH. +- Parentheses (`(`, `)`) -> `brightcyan` - MATCH. +- Brackets (`[`, `]`) -> `brightcyan` - MATCH. +- Commas -> `brightcyan` - MATCH. +- Colons -> `brightcyan` - MATCH. +- Identifiers and numbers -> `lightgray` (default) - MATCH. + +Intentional improvements over legacy +------------------------------------- + +- The TS output is identical to legacy for this file. Since the slang grammar + inherits from C and the S-Lang language closely resembles C syntax, both + engines produce the same highlighting. The TS grammar uses C/C++ tree-sitter + captures which happen to align perfectly with legacy's keyword-based approach + for this sample. + +Known shortcomings +------------------ + +- The slang grammar has no entry in `misc/syntax-ts/extensions`, meaning TS + file-type detection may not automatically associate `.sl` files with the slang + grammar. Legacy uses `Syntax.in` to map `.sl` files to `slang.syntax`. +- TS inherits from the C grammar rather than having a dedicated S-Lang parser. + S-Lang-specific constructs like `EXIT_BLOCK`, `ERROR_BLOCK`, + `EXECUTE_ERROR_BLOCK`, `orelse`, `andelse`, `loop`, and `typecast` are matched + by legacy as keywords but may not be structurally recognized by the C + tree-sitter parser. +- Legacy highlights `default` in switch/case as default text, while TS also + leaves it uncolored -- neither marks it as a keyword, though in S-Lang + `default` is not actually used (S-Lang uses a different switch syntax). +- The `++` operator after `count` shows as `white` (`++` and `;` merged) in both + engines, but TS could potentially distinguish the increment operator + structurally. diff --git a/tests/syntax/samples/slang.sl b/tests/syntax/samples/slang.sl new file mode 100644 index 0000000000..18ae5ca6d1 --- /dev/null +++ b/tests/syntax/samples/slang.sl @@ -0,0 +1,137 @@ +% S-Lang sample file for syntax highlighting comparison +% This exercises TS captures from slang-highlights.scm (inherits: c) + +#include +#define MAX_SIZE 100 + +% Variable definitions and types +variable x = 10; +variable name = "hello world"; +variable flag = 1; + +% S-Lang types +typedef struct { + Char_Type ch; + Integer_Type count; + String_Type label; + Double_Type value; + Float_Type ratio; + Array_Type items; + Assoc_Type table; +} MyStruct; + +% Function definition +define greet(name) +{ + variable msg = sprintf("Hello, %s!\n", name); + return msg; +} + +% Function with multiple parameters +define add(a, b) +{ + return a + b; +} + +% If/else control flow +define check_value(x) +{ + if (x > 0) + { + printf("positive: %d\n", x); + } + else if (x < 0) + { + printf("negative: %d\n", x); + } + else + { + printf("zero\n"); + } +} + +% While loop +define countdown(n) +{ + while (n > 0) + { + printf("%d\n", n); + n = n - 1; + } +} + +% Foreach loop +define show_items(list) +{ + variable item; + foreach item (list) + { + printf("item: %s\n", item); + } +} + +% Forever loop with break +define wait_loop() +{ + variable count = 0; + forever + { + count++; + if (count >= 10) + break; + } +} + +% Switch/case +define describe(code) +{ + switch (code) + { + case 1: return "one"; + case 2: return "two"; + default: return "other"; + } +} + +% String operations with escape sequences +variable s1 = "tab:\there\n"; +variable s2 = "backslash: \\"; +variable s3 = "quote: \"inside\""; +variable s4 = "octal: \101\102"; + +% Array operations +variable arr = Integer_Type[5]; +arr[0] = 42; +arr[1] = arr[0] * 2; + +% Associative array +variable dict = Assoc_Type[String_Type]; + +% Error handling blocks +define risky() +{ + EXIT_BLOCK + { + printf("cleanup\n"); + } + ERROR_BLOCK + { + printf("error occurred\n"); + } +} + +% Logical operators +variable result = (x > 0) and (x < 100); +variable other = (x == 0) or (flag != 0); +variable inv = not flag; +variable bits = x xor 0xFF; + +% Operators +variable sum = x + 10; +variable diff = x - 5; +variable prod = x * 3; +variable quot = x / 2; +variable eq = (x == 10); +variable ne = (x != 5); +variable gt = (x > 0); +variable lt = (x < 100); diff --git a/tests/syntax/samples/smalltalk-report.md b/tests/syntax/samples/smalltalk-report.md new file mode 100644 index 0000000000..3d72170f66 --- /dev/null +++ b/tests/syntax/samples/smalltalk-report.md @@ -0,0 +1,77 @@ +Smalltalk syntax highlighting: TS vs Legacy comparison report +============================================================== + +Sample file: `tests/syntax/samples/smalltalk.st` +Legacy reference: `misc/syntax/smalltalk.syntax` +TS query: `misc/syntax-ts/queries-override/smalltalk-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[smalltalk]` + +Aligned with legacy +------------------- + +- Comments (double-quoted `"..."`) -> `brown` - MATCH. +- Strings (single-quoted `'...'`) -> `brightcyan` via `@tag` - MATCH. +- Special variables (`self`, `super`, `nil`) -> `yellow` via `@keyword` - MATCH. +- Boolean literals (`true`, `false`) -> `brightmagenta` via `@keyword.control` - + MATCH. +- Keyword messages (`ifTrue:`, `ifFalse:`, `whileTrue:`, `do:`, `add:`, + `value:`, `on:`, `return:`, `setName:`, `age:`, `with:`) -> `brightmagenta` + via `@keyword.control` - MATCH. +- Binary operators (`>`, `<`, `+`, `<=`, `=`, `,`) -> `cyan` via `@label` - + MATCH. +- Assignment (`:=`) -> `cyan` via `@label` - MATCH. +- Return operator (`^`) -> `brightred` via `@function.special` - MATCH (legacy + uses `cyan` for `^` as generic operator, TS uses `brightred` which is more + visually distinct). +- Statement separator (`.`) -> `brightred` via `@function.special` - MATCH with + legacy's `brightred` for whole-right `.`. +- Temporaries delimiters (`|`) -> both engines highlight `|`. Legacy uses + `cyan`, TS uses `brightred` via `@function.special`. +- Brackets (`[`, `]`, `(`, `)`, `{`, `}`) -> `brightcyan` via `@delimiter` in + TS, `cyan` in legacy - close match. +- Cascade separator (`;`) -> `brightcyan` via `@delimiter` in TS, `cyan` in + legacy - close match. +- `thisContext` -> `yellow` via `@keyword` in TS, not highlighted in legacy (no + keyword entry). + +Intentional improvements over legacy +------------------------------------- + +- TS uses `@keyword.control` (`brightmagenta`) for keyword message selectors + like `ifTrue:`, `whileTrue:`, `do:`, `add:`, providing consistent coloring for + all keyword messages, not just the ones explicitly listed in legacy. +- TS highlights `thisContext` as `yellow` (`@keyword`), while legacy does not + recognize it. +- TS highlights block arguments (`:each`, `:a`, `:b`, `:c`, `:ex`) as + `lightgray` via `@constant`, distinguishing them from regular identifiers. +- TS highlights symbols (`#animal`) as `yellow` via `@keyword`, matching + legacy's `#` operator color but extending to the whole symbol token. +- TS highlights character literals (`$A`) as `brightcyan` via `@tag`, giving + them string-like coloring. +- TS structurally understands the difference between unary selectors, binary + selectors, and keyword selectors, providing appropriate colors for each + category. + +Known shortcomings +------------------ + +- TS shows some lines with `RED` (uncolored/error) spans, notably after method + signatures like `Animal >> isOlderThan:` and `Animal >> typeSymbol`. This + suggests the tree-sitter parser has difficulty with Smalltalk's method + definition syntax in some cases, producing error nodes. +- Legacy highlights class names (`Object`, `OrderedCollection`, `Array`, + `Boolean`, etc.) as `brightgreen`, which TS does not replicate. TS shows + `Object` as `yellow` (via `@keyword` for unary selector) and most class names + as uncolored. +- Legacy highlights message names like `new`, `add`, `do`, `with`, `from` as + `brightmagenta` even when used as plain unary messages, while TS only colors + keyword messages (with colons). +- The `>>` method definition operator shows as `cyan` (`@label`) in TS but as + `cyan` in legacy too -- however TS sometimes shows red error spans around the + method name following `>>`. +- TS does not highlight the `!` method terminator as `brightred` the way legacy + does -- it appears as `RED` (error/default). +- The first keyword in a multi-keyword message (e.g., `setName:` in `setName: + aName age: anAge`) sometimes shows as `lightgray` instead of `brightmagenta`, + while subsequent keywords color correctly. This appears to be a parser + inconsistency. diff --git a/tests/syntax/samples/smalltalk.st b/tests/syntax/samples/smalltalk.st new file mode 100644 index 0000000000..a325230a3d --- /dev/null +++ b/tests/syntax/samples/smalltalk.st @@ -0,0 +1,125 @@ +"A Smalltalk sample file for syntax highlighting comparison" +"Exercises TS captures from smalltalk-highlights.scm" + +"Class definition with instance variables" +Object subclass: #Animal + instanceVariableNames: 'name age sound' + classVariableNames: 'Count' + poolDictionaries: '' + category: 'Examples'. + +"Method definition with temporaries and assignment" +Animal >> initialize + | temp | + temp := 'default'. + name := temp. + age := 0. + sound := 'none'. + Count := Count + 1. + +"Method with return value" +Animal >> name + ^ name. + +"Method with keyword messages" +Animal >> setName: aName age: anAge + name := aName. + age := anAge. + +"Unary message sends" +Animal >> describe + | result | + result := name size. + ^ result printString. + +"Binary operators" +Animal >> isOlderThan: other + ^ age > (other age). + +"Keyword message with multiple parts" +Animal >> feedAt: aTime with: aFood + | msg | + msg := 'Feeding ' , name , ' at ' , aTime printString. + msg := msg , ' with ' , aFood. + ^ msg. + +"Cascades with semicolons" +Animal >> setup + self + setName: 'Rex' age: 5; + sound := 'bark'. + +"Block with arguments" +Animal >> doTimes: aBlock + | i | + i := 1. + [i <= age] whileTrue: [ + aBlock value: i. + i := i + 1. + ]. + +"Boolean literals and control flow" +Animal >> isValid + | valid | + valid := true. + (name = nil) ifTrue: [valid := false]. + (age < 0) ifTrue: [valid := false]. + ^ valid. + +"Using self and super" +Animal >> copy + | clone | + clone := super copy. + clone setName: name age: age. + ^ clone. + +"Collection operations" +Animal >> allNames: aCollection + | names | + names := OrderedCollection new. + aCollection do: [:each | + names add: each name. + ]. + ^ names. + +"Symbols and characters" +Animal >> typeSymbol + ^ #animal. + +Animal >> firstChar + ^ $A. + +"Numeric literals" +Animal >> compute + | x y | + x := 42. + y := 3.14. + ^ x + y. + +"Nested blocks" +Animal >> deepBlock + [:a | + [:b | + [:c | a + b + c] + ] + ]. + +"Class method with new" +Animal class >> createNamed: aName + | inst | + inst := self new. + inst setName: aName age: 1. + ^ inst. + +"Error handling keywords" +Animal >> safeDo: aBlock + | result | + result := [aBlock value] + on: Error + do: [:ex | ex return: nil]. + ^ result. + +"Using thisContext" +Animal >> currentMethod + ^ thisContext method. +! diff --git a/tests/syntax/samples/sql-report.md b/tests/syntax/samples/sql-report.md new file mode 100644 index 0000000000..1f625032e0 --- /dev/null +++ b/tests/syntax/samples/sql-report.md @@ -0,0 +1,55 @@ +SQL syntax highlighting: TS vs Legacy comparison report +========================================================= + +Sample file: `sql.sql` +Legacy reference: `misc/syntax/sql.syntax` +TS query: `misc/syntax-ts/queries-override/sql-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[sql]` + +Aligned with legacy +------------------- + +- Single-line comments (`-- ...`): `brown` (TS `comment`) - MATCH. +- Block comments (`/* ... */`): `brown` (TS `comment`) - MATCH. +- Strings (`'...'`): `green` (TS `string`) - MATCH. +- Operators (`=`, `<`, `>`, `+`, `-`, `*`, `/`, `%`, `>=`, `<=`, `<>`, `!=`): + `brightcyan` (TS `operator`) - MATCH. +- Delimiters (`;`, `,`, `(`, `)`): `brightcyan` (TS `delimiter`) - MATCH. +- Data types (`INTEGER`, `VARCHAR`, `DECIMAL`, `TEXT`, `BLOB`, `DATE`, + `TIMESTAMP`, `BOOLEAN`, `INT`, `NUMERIC`): `yellow` (TS `type`) - MATCH. + +Intentional improvements over legacy +------------------------------------- + +- TS query is minimal and focused: it highlights types, comments, strings, + operators, and delimiters, letting the grammar handle keyword recognition + structurally rather than via a long keyword list. +- TS correctly handles multi-line block comments as a single unit; legacy uses + context-based matching which can occasionally misfire with nested comment + markers. + +Known shortcomings +------------------ + +- TS does not highlight SQL keywords (`SELECT`, `INSERT`, `UPDATE`, `DELETE`, + `CREATE`, `FROM`, `WHERE`, `JOIN`, `ORDER`, `GROUP`, `HAVING`, `UNION`, + `BEGIN`, `COMMIT`, etc.) in any color; legacy colors all of these as `yellow`. + This is the most significant gap -- SQL keywords appear as default/uncolored + in TS. +- Several lines show unexpected `RED` coloring in TS output (e.g., around + `WHERE department_id IN (`, `FROM employees WHERE EXISTS (`), likely from the + TS grammar producing error nodes for certain subquery patterns; legacy handles + these correctly. +- TS does not highlight `LIKE` as a keyword; legacy colors it `yellow`. +- TS does not distinguish aggregate functions (`COUNT`, `AVG`, `MAX`, `MIN`, + `SUM`) or window functions (`RANK`, `OVER`); legacy colors these as `yellow` + keywords. +- TS does not color `AS`, `AND`, `OR`, `NOT`, `IN`, `EXISTS`, `CASE`, `WHEN`, + `THEN`, `ELSE`, `END`, `SET`, `VALUES`, `INTO`, `ON`, `BY`, `DESC`, `ASC`, + `LIMIT`, `HAVING`, `DISTINCT`, `UNION`, `ALTER`, `DROP`, `INDEX`, `IF`, + `DEFAULT`, `PRIMARY`, `KEY`, `UNIQUE`, `NULL`, `TRUE`, `FALSE` -- all of which + legacy highlights as `yellow`. +- The `.` (dot) in table-qualified names (e.g., `e.salary`) shows as `white` in + legacy; TS does not specifically handle dot notation. +- Legacy has MySQL-specific `#` comment support and backtick-quoted identifier + support; TS relies solely on the grammar's comment/string handling. diff --git a/tests/syntax/samples/sql.sql b/tests/syntax/samples/sql.sql new file mode 100644 index 0000000000..5aee483445 --- /dev/null +++ b/tests/syntax/samples/sql.sql @@ -0,0 +1,128 @@ +-- Single line comment +/* Multi-line + block comment */ + +-- Data types in CREATE TABLE +CREATE TABLE employees ( + id INTEGER PRIMARY KEY, + name VARCHAR(100) NOT NULL, + email VARCHAR(255) UNIQUE, + salary DECIMAL(10, 2) DEFAULT 0.00, + department_id INT, + hire_date DATE, + is_active BOOLEAN DEFAULT TRUE, + bio TEXT, + data BLOB +); + +CREATE TABLE departments ( + id INTEGER PRIMARY KEY, + name VARCHAR(50) NOT NULL, + budget NUMERIC(15, 2), + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +-- INSERT statements +INSERT INTO departments (id, name, budget) +VALUES (1, 'Engineering', 500000.00); + +INSERT INTO departments (id, name, budget) +VALUES (2, 'Marketing', 300000.00), + (3, 'Sales', 250000.00); + +INSERT INTO employees (id, name, email, salary, department_id, hire_date) +VALUES (1, 'Alice Johnson', 'alice@example.com', 95000.00, 1, '2020-01-15'); + +-- SELECT with various clauses +SELECT + e.id, + e.name, + e.salary, + d.name AS department_name +FROM employees e +INNER JOIN departments d ON e.department_id = d.id +WHERE e.salary > 50000 + AND e.is_active = TRUE +ORDER BY e.salary DESC +LIMIT 10; + +-- Aggregate functions and GROUP BY +SELECT + d.name, + COUNT(*) AS employee_count, + AVG(e.salary) AS avg_salary, + MAX(e.salary) AS max_salary, + MIN(e.salary) AS min_salary, + SUM(e.salary) AS total_salary +FROM employees e +LEFT JOIN departments d ON e.department_id = d.id +GROUP BY d.name +HAVING COUNT(*) > 1; + +-- UPDATE statement +UPDATE employees +SET salary = salary * 1.10, + is_active = TRUE +WHERE department_id IN ( + SELECT id FROM departments WHERE name = 'Engineering' +); + +-- DELETE statement +DELETE FROM employees +WHERE hire_date < '2015-01-01' + AND is_active = FALSE; + +-- Subquery and EXISTS +SELECT name +FROM employees +WHERE EXISTS ( + SELECT 1 + FROM departments + WHERE departments.id = employees.department_id + AND departments.budget > 400000 +); + +-- CASE expression +SELECT + name, + salary, + CASE + WHEN salary >= 100000 THEN 'Senior' + WHEN salary >= 60000 THEN 'Mid' + ELSE 'Junior' + END AS level +FROM employees; + +-- UNION +SELECT name, 'employee' AS type FROM employees +UNION +SELECT name, 'department' AS type FROM departments; + +-- Window functions +SELECT + name, + salary, + RANK() OVER (ORDER BY salary DESC) AS salary_rank +FROM employees; + +-- String and comparison operators +SELECT * +FROM employees +WHERE name LIKE '%son' + AND salary != 0 + AND salary <> 0 + AND salary >= 1000 + AND salary <= 200000 + AND (salary + 1000) / 12 > 5000; + +-- ALTER and DROP +ALTER TABLE employees ADD COLUMN notes TEXT; +DROP TABLE IF EXISTS old_employees; + +-- CREATE INDEX +CREATE INDEX idx_emp_dept ON employees (department_id); + +-- Transactions +BEGIN; +UPDATE employees SET salary = salary + 5000 WHERE id = 1; +COMMIT; diff --git a/tests/syntax/samples/strace-report.md b/tests/syntax/samples/strace-report.md new file mode 100644 index 0000000000..5a4d50f879 --- /dev/null +++ b/tests/syntax/samples/strace-report.md @@ -0,0 +1,71 @@ +strace syntax highlighting: TS vs Legacy comparison report +=========================================================== + +Sample file: `tests/syntax/samples/strace.strace` +Legacy reference: `misc/syntax/strace.syntax` +TS query: `misc/syntax-ts/queries-override/strace-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[strace]` + +Aligned with legacy +------------------- + +- Syscall names (`execve`, `brk`, `access`, `open`, `close`, `read`, `write`, + `fstat64`, `mmap2`, `old_mmap`, `munmap`, `uname`, `stat64`, `fcntl64`, + `select`, `ioctl`, `rt_sigaction`, `rt_sigprocmask`, `alarm`, `statfs`, + `wait4`, `kill`, `exit_group`, `geteuid32`, `getegid32`, `getuid32`, + `getgid32`, `gettimeofday`, `setresuid32`, `setresgid32`, `chmod`, `chown32`, + `writev`, `readv`) -> TS uses `brightred` (`@function.special`) uniformly. + Legacy uses per-syscall coloring (see differences below). +- Strings (`"/usr/bin/ls"`, `"hello world\n"`, etc.) -> `green` via `@string` in + TS. Legacy does not highlight strings (they are part of the syscall argument + pattern match). +- Comments (`/* 42 vars */`) -> `brown` via `@comment` in TS. Legacy does not + recognize comments. + +Intentional improvements over legacy +------------------------------------- + +- TS highlights every component of strace output structurally: syscall names, + arguments, strings, integers, pointers, return values, error names, error + descriptions, PIDs, and punctuation each get distinct colors. Legacy only + matches entire `syscall(...)` patterns as a single color. +- TS highlights return values after `=` as `yellow` via `@keyword`, making them + visually distinct from arguments. +- TS highlights PIDs (e.g., `12345` before a syscall) as `brightcyan` via + `@tag`, distinguishing process-prefixed lines. +- TS highlights pointers (`0x7ffd8a2e3f10`, `0x55a3c8e41000`) as `lightgray` via + `@constant`, and integers as `lightgray` as well, providing consistent numeric + coloring. +- TS highlights error names (`ENOENT`) as `brightred` via `@function.special` + and error descriptions (`No such file or directory`) as `brown` via + `@comment`, making error information visually distinct. +- TS highlights signal information (`SIGCHLD`, `SIGTERM`) as `brightred` via + `@signal`/`@function.special`. +- TS highlights macro/flag names (like `WIFEXITED`, `WEXITSTATUS`) as `yellow` + via `@keyword`. +- TS highlights punctuation (`(`, `)`, `,`, `=`) as `brightcyan` via + `@delimiter`, improving structural readability. + +Known shortcomings +------------------ + +- Legacy uses per-category syscall coloring: file I/O (`open`, `close`) -> + `cyan`, read/write (`read`, `write`, `select`) -> `magenta`, memory + (`old_mmap`, `mmap2`, `munmap`) -> `red`, process (`execve`) -> `brightgreen`, + system info (`uname`) -> `brightblue`, permissions (`geteuid32`, `chmod`, + `chown32`) -> `yellow`, signals (`rt_sigaction`, `kill`, `exit_group`) -> + `brightred`. TS colors all syscalls uniformly as `brightred`, losing this + per-category distinction. +- Legacy matches syscall-with-arguments as a single pattern like `open(*)`, so + the entire line up to the closing paren gets the syscall's color. TS only + colors the syscall name itself, leaving arguments to be colored by their own + types. +- TS shows some structural artifacts in the signal line (`--- SIGCHLD {...} + ---`) where the entire signal block is colored as `brightred`, which is + correct but differs from legacy's uncolored treatment of that line. +- The `ioctl` line shows some `RED` (error/uncolored) spans in TS for + struct-like arguments (`{B38400 opost isig...}`) that the parser does not + fully understand. +- Legacy does not highlight `fstat64` closing paren correctly when the argument + contains `}` before `)` -- the pattern `fstat64(*)` stops at the first `)` + inside the struct. TS handles this correctly via structural parsing. diff --git a/tests/syntax/samples/strace.strace b/tests/syntax/samples/strace.strace new file mode 100644 index 0000000000..5de6fa652d --- /dev/null +++ b/tests/syntax/samples/strace.strace @@ -0,0 +1,58 @@ +execve("/usr/bin/ls", ["ls", "-la"], 0x7ffd8a2e3f10 /* 42 vars */) = 0 +brk(NULL) = 0x55a3c8e41000 +access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) +open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 +fstat64(3, {st_mode=S_IFREG|0644, st_size=89417, ...}) = 0 +mmap2(NULL, 89417, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f8a4c3e1000 +close(3) = 0 +open("/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 +read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0"..., 832) = 832 +fstat64(3, {st_mode=S_IFREG|0755, st_size=2030544, ...}) = 0 +old_mmap(NULL, 4131552, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f8a4bdf0000 +mmap2(0x7f8a4bfe7000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1f7000) = 0x7f8a4bfe7000 +close(3) = 0 +uname({sysname="Linux", nodename="host", ...}) = 0 +stat64("/usr/bin/ls", {st_mode=S_IFREG|0755, st_size=133792, ...}) = 0 +open("/usr/share/locale/locale.alias", O_RDONLY|O_CLOEXEC) = 3 +read(3, "# Locale name alias data base.\n#"..., 4096) = 2998 +close(3) = 0 +open("/usr/share/locale/en_US.UTF-8/LC_MESSAGES/coreutils.mo", O_RDONLY) = -1 ENOENT (No such file or directory) +open("/usr/share/locale/en_US/LC_MESSAGES/coreutils.mo", O_RDONLY) = -1 ENOENT (No such file or directory) +open("/usr/share/locale/en.UTF-8/LC_MESSAGES/coreutils.mo", O_RDONLY) = -1 ENOENT (No such file or directory) +open(".", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 3 +fcntl64(3, F_GETFD) = 0x1 (flags FD_CLOEXEC) +geteuid32() = 1000 +getegid32() = 1000 +getuid32() = 1000 +getgid32() = 1000 +gettimeofday({tv_sec=1617192000, tv_usec=123456}, NULL) = 0 +fstat64(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 0), ...}) = 0 +write(1, "total 48\n", 9) = 9 +write(1, "drwxr-xr-x 2 user user 4096"..., 52) = 52 +writev(1, [{iov_base="file1.txt", iov_len=9}, {iov_base="\n", iov_len=1}], 2) = 10 +readv(0, [{iov_base="input", iov_len=5}], 1) = 5 +select(1, [0], NULL, NULL, {tv_sec=5, tv_usec=0}) = 1 (in [0]) +ioctl(1, TCGETS, {B38400 opost isig icanon echo ...}) = 0 +rt_sigaction(SIGTERM, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f8a4be54a00}, NULL, 8) = 0 +rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0 +alarm(0) = 0 +statfs("/usr/bin/ls", {f_type=EXT2_SUPER_MAGIC, f_bsize=4096, ...}) = 0 +munmap(0x7f8a4c3e1000, 89417) = 0 +close(1) = 0 +close(2) = 0 +--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=12345, si_uid=1000, si_status=0, si_utime=0, si_stime=0} --- +wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], WNOHANG, NULL) = 12345 +kill(12345, SIGTERM) = 0 +12345 open("/tmp/test.txt", O_RDONLY) = 4 +12345 read(4, "hello world\n", 4096) = 12 +12345 close(4) = 0 +12345 write(1, "hello world\n", 12) = 12 +12345 exit_group(0) = ? ++++ exited with 0 +++ +setresuid32(1000, 1000, 1000) = 0 +setresgid32(1000, 1000, 1000) = 0 +chmod("/tmp/test.txt", 0644) = 0 +chown32("/tmp/test.txt", 1000, 1000) = 0 +brk(0x55a3c8e62000) = 0x55a3c8e62000 +exit_group(0) = ? ++++ exited with 0 +++ diff --git a/tests/syntax/samples/swift-report.md b/tests/syntax/samples/swift-report.md new file mode 100644 index 0000000000..fff0d5875c --- /dev/null +++ b/tests/syntax/samples/swift-report.md @@ -0,0 +1,65 @@ +Swift syntax highlighting: TS vs Legacy comparison +=================================================== + +Sample file: `tests/syntax/samples/swift.swift` +Legacy reference: `misc/syntax/swift.syntax` +TS query: `misc/syntax-ts/queries-override/swift-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` (no `[swift]`) + +NOTE: The TS Swift grammar has an ABI version mismatch (version 10 is too old + for tree-sitter 0.25). The query file is empty: `";; Swift -- empty query + (grammar ABI version 10 is too old for tree-sitter 0.25)"`. As a result, TS + produces NO syntax highlighting at all -- the entire file is rendered in + default foreground. + +Aligned with legacy +------------------- + +No alignment is possible since TS produces zero highlighting output. + +Legacy highlights the following (all lost in TS): + +- Keywords (`class`, `struct`, `enum`, `protocol`, `func`, `var`, `let`, + `import`, `init`, `deinit`, `static`, `return`, `if`, `else`, `for`, `in`, + `while`, `repeat`, `switch`, `case`, `default`, `guard`, `defer`, `do`, + `catch`, `throw`, `throws`, `try`, `as`, `is`, `where`, `self`, `Self`, + `super`, `true`, `false`, `nil`, `typealias`, `subscript`, `extension`, + `public`, `private`, `fileprivate`, `internal`, `open`, `mutating`, `get`, + `set`, `override`, `weak`, `inout`, `rethrows`, `associatedtype`, `operator`, + `continue`, `fallthrough`, `break`): `yellow` +- Type names (`String`, `Int`, `Double`, `Float`, `Bool`, `Any`, `Character`, + `UInt`, etc.): `yellow` +- Comments (`// ...`): `red` (legacy uses `comments` defined as `red`) +- Block comments (`/* ... */`): `red` +- Strings (`"..."`, including interpolation): `green` (legacy uses `string` + color) +- Escape sequences (`\t`, `\n`, `\\`, `\(`...`)`): `brightgreen` +- Unicode escapes (`\u{...}`): `brightgreen` +- Operators (`>`, `<`, `+`, `-`, `*`, `/`, `%`, `=`, `!=`, `==`, `|`, `&`, `^`, + `~`, `!`, `_`): `brightcyan` +- Brackets (`{`, `}`, `(`, `)`, `[`, `]`): `brightcyan` +- Punctuation (`.`, `,`, `:`, `?`): `brightcyan` +- Range operators (`...`, `..<`): `brightcyan` +- Semicolons (`;`): `brightmagenta` +- Attributes (`@available`, `@discardableResult`, `@escaping`, etc.): `yellow` +- Compiler directives (`#if`, `#elseif`, `#else`, `#endif`): `brightred` + +Differences from legacy +----------------------- + +- TS produces no output at all. Every single highlight from legacy is lost. This + is a complete regression for Swift files. + +Known shortcomings +------------------ + +- The tree-sitter Swift grammar's ABI version (10) is incompatible with the + tree-sitter runtime (0.25). Until the grammar is updated to a compatible ABI + version, TS highlighting for Swift is completely nonfunctional. +- The query file is intentionally empty with a comment explaining the version + mismatch. +- Legacy provides comprehensive Swift highlighting covering keywords, types, + operators, strings, escapes, attributes, and compiler directives. All of this + is unavailable via TS. +- To fix this, the upstream tree-sitter-swift grammar needs to be rebuilt with + ABI version 14 or later to be compatible with tree-sitter 0.25. diff --git a/tests/syntax/samples/swift.swift b/tests/syntax/samples/swift.swift new file mode 100644 index 0000000000..a50b176ec9 --- /dev/null +++ b/tests/syntax/samples/swift.swift @@ -0,0 +1,222 @@ +// Swift sample file for syntax highlighting comparison +// NOTE: TS grammar has ABI version mismatch (empty query) + +import Foundation +import UIKit + +// Class definition +class Animal { + var name: String + var age: Int + let species: String + + init(name: String, age: Int, species: String) { + self.name = name + self.age = age + self.species = species + } + + deinit { + print("Deallocating \(name)") + } + + func describe() -> String { + return "\(name) is a \(species), age \(age)" + } +} + +// Struct definition +struct Point { + var x: Double + var y: Double + + mutating func moveBy(dx: Double, dy: Double) { + x += dx + y += dy + } + + static func origin() -> Point { + return Point(x: 0.0, y: 0.0) + } +} + +// Enum with associated values +enum Result { + case success(T) + case failure(Error) + + var isSuccess: Bool { + switch self { + case .success: + return true + case .failure: + return false + } + } +} + +// Protocol definition +protocol Drawable { + var color: String { get set } + func draw() + func resize(factor: Double) -> Self +} + +// Protocol extension +extension Drawable { + func description() -> String { + return "Drawable with color \(color)" + } +} + +// Class with inheritance and protocol conformance +class Circle: Animal, Drawable { + var color: String = "red" + var radius: Double + + init(radius: Double) { + self.radius = radius + super.init(name: "circle", age: 0, species: "shape") + } + + func draw() { + print("Drawing circle with radius \(radius)") + } + + func resize(factor: Double) -> Self { + return self + } +} + +// Guard statement +func processValue(_ value: Int?) { + guard let unwrapped = value else { + print("Value is nil") + return + } + print("Value is \(unwrapped)") +} + +// Switch with pattern matching +func categorize(_ value: Any) -> String { + switch value { + case let x as Int where x > 0: + return "positive integer" + case is String: + return "string" + case let (x, y) as (Int, Int): + return "tuple: \(x), \(y)" + default: + return "unknown" + } +} + +// Closures +let numbers = [1, 2, 3, 4, 5] +let doubled = numbers.map { $0 * 2 } +let filtered = numbers.filter { value in + value > 2 +} + +let sortedNames = ["Charlie", "Alice", "Bob"].sorted { + $0 < $1 +} + +// Optionals and chaining +var optionalString: String? = "Hello" +let length = optionalString?.count ?? 0 +let forced = optionalString! +let defaulted = optionalString ?? "default" + +// Error handling +enum AppError: Error { + case networkFailure + case invalidInput(String) +} + +func riskyOperation() throws -> String { + throw AppError.invalidInput("bad data") +} + +do { + let result = try riskyOperation() + print(result) +} catch AppError.networkFailure { + print("Network error") +} catch { + print("Other error: \(error)") +} + +// Defer statement +func readFile() { + let file = open("test.txt") + defer { + close(file) + } + print("Reading file...") +} + +// For-in and while loops +for i in 0..<10 { + print(i) +} + +for name in ["Alice", "Bob"] { + print("Hello, \(name)") +} + +var counter = 10 +while counter > 0 { + counter -= 1 +} + +repeat { + counter += 1 +} while counter < 5 + +// Attributes +@available(iOS 13.0, *) +@discardableResult +func modernFunction() -> Bool { + return true +} + +// Static and class members +class MyClass { + static let shared = MyClass() + private var data: [String: Any] = [:] + internal var count: Int = 0 + public var isReady: Bool = false + fileprivate var secret: String = "hidden" + + subscript(key: String) -> Any? { + get { return data[key] } + set { data[key] = newValue } + } +} + +// Typealias and constants +typealias StringDict = [String: String] +let pi: Double = 3.14159 +let greeting = "Hello, World!" +let escaped = "tab:\there\nnewline" +let unicode = "\u{1F600}" + +// Compiler directives +#if DEBUG + print("Debug mode") +#elseif TESTING + print("Test mode") +#else + print("Release mode") +#endif + +// Nil and boolean literals +let nothing: Int? = nil +let yes: Bool = true +let no: Bool = false + +// Ternary and range operators +let status = counter > 0 ? "positive" : "zero" +let range = 1...10 +let halfOpen = 0..<5 diff --git a/tests/syntax/samples/tcl-report.md b/tests/syntax/samples/tcl-report.md new file mode 100644 index 0000000000..fb29d7061e --- /dev/null +++ b/tests/syntax/samples/tcl-report.md @@ -0,0 +1,72 @@ +Tcl syntax highlighting: TS vs Legacy comparison report +======================================================== + +Sample file: `tcl.tcl` +Legacy reference: `misc/syntax/tcl.syntax` +TS query: `misc/syntax-ts/queries-override/tcl-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[tcl]` + +Aligned with legacy +------------------- + +- Language keywords (`if`, `else`, `elseif`, `while`, `foreach`, `proc`, `set`, + `return`, `break`, `continue`, `puts`, `gets`, `open`, `close`, `expr`, + `eval`, `exec`, `source`, `package`, `require`, `namespace`, `global`, + `variable`, `upvar`, `uplevel`, `array`, `list`, `lindex`, `lappend`, + `llength`, `lrange`, `lsearch`, `lsort`, `lreplace`, `string`, `regexp`, + `regsub`, `incr`, `append`, `format`, `scan`, `info`, `rename`, `trace`, + `after`, `vwait`, `update`, `interp`, `switch`): `yellow` - MATCH +- Comments (`# ...`): `brown` - MATCH +- Double-quoted strings (`"..."`): `green` - MATCH +- Variable substitutions (`$var`, `${var}`, `$::ns::var`): `brightgreen` - MATCH +- Escape sequences inside strings (`\n`, `\t`, `\\`): `brightgreen` - MATCH + +Intentional improvements over legacy +------------------------------------- + +- Braced words (`{...}`): TS colors the entire braced word as `green` (via + `@string` capture on `braced_word`). Legacy colors only the `{` and `}` + delimiters as `brightcyan` and leaves the content as default text. The TS + approach is more semantically correct since braced words in Tcl are literal + values (like strings). +- TS colors the `try`, `catch`, `finally`, `on`, `error` keywords as `yellow` + via explicit keyword captures. Legacy does not color `try`, `finally`, or `on` + since they are not in its keyword list (only `catch` and `error` are listed). +- Comments are uniformly `brown` in TS, including the `#` marker. Legacy also + colors comments as `brown` via context, but the TS approach captures the + entire comment node uniformly. +- TS now colors `[`, `]`, `{`, `}` brackets as `brightcyan` via `@delimiter` + capture, matching legacy behavior for these delimiters. +- Procedure bodies in braces: TS colors the entire braced body as `green` + (braced_word) with keywords inside still getting `yellow` overlay. Legacy + colors only the `{}` delimiters as `brightcyan` and leaves the body content to + be individually matched. + +Known shortcomings +------------------ + +- TS now colors `[`, `]`, `{`, `}` brackets as `brightcyan` via `@delimiter` + capture. This matches legacy behavior. +- TS does not color `(` and `)` parentheses as `brightcyan`. Legacy colors these + as `brightcyan` via explicit keyword rules. The TCL grammar does not expose + parentheses as anonymous literals. +- TS now colors `;` semicolons as `brightmagenta` via `@delimiter.special` + capture. This matches legacy behavior. +- TS does not color operators (`>`, `<`, `=`, `||`, `&&`, `!=`, `==`, `::`) as + `yellow`. Legacy has explicit keyword rules for these. The TS grammar does not + expose operators as named nodes for Tcl. +- TS does not color command-line option flags (e.g., `-exact`, `-all`) as + `cyan`. Legacy colors these via the `wholeright \s-{alpha}+` pattern. The TS + grammar does not have a specific node type for option flags. +- TS braced-word coloring (`green`) applies broadly to all `{...}` blocks + including code blocks (proc bodies, if bodies, while bodies). This means + structural braces that delimit code get `green` background coloring where + legacy would color them as `brightcyan`. Keywords inside braced code blocks + still override to `yellow`. +- The `expr` command arguments in braces are colored as `yellow` by TS (via the + `expr_cmd` keyword capture) rather than `green` for the braced content. Legacy + colors the braces as `brightcyan` and the content as default. This is a + deliberate TS choice to highlight `expr` expressions. +- Legacy colors `$` inside `"${name}"` separately from the variable name. TS + captures the entire `${name}` substitution as `brightgreen` uniformly, which + is cleaner. diff --git a/tests/syntax/samples/tcl.tcl b/tests/syntax/samples/tcl.tcl new file mode 100644 index 0000000000..6feb90de84 --- /dev/null +++ b/tests/syntax/samples/tcl.tcl @@ -0,0 +1,137 @@ +#!/usr/bin/tclsh +# Tcl syntax highlighting sample file +# Exercises all TS captures: keyword, comment, string, +# string.special (variables, escapes), braced_word + +package require Tcl 8.6 + +# Namespace and variable declarations +namespace eval ::myapp { + variable version "1.0" + variable count 0 +} + +# Procedure definition with multiple args +proc greet {name {greeting "Hello"}} { + puts "$greeting, $name!" + return "$greeting $name" +} + +# Set and expr usage +set x 42 +set y [expr {$x * 2 + 10}] +set name "world" +set result [expr {($x + $y) / 3}] + +# String operations +set msg "Line one\nLine two\ttabbed" +set upper [string toupper $name] +set len [string length $name] +set sub [string range $name 0 2] + +# List operations +set colors {red green blue yellow} +set first [lindex $colors 0] +lappend colors "purple" +set sorted [lsort $colors] +set found [lsearch $colors "blue"] +set count [llength $colors] +set replaced [lreplace $colors 0 0 "orange"] + +# If/elseif/else control flow +if {$x > 100} { + puts "Large" +} elseif {$x > 50} { + puts "Medium" +} else { + puts "Small: $x" +} + +# While loop +set i 0 +while {$i < 5} { + puts "Iteration $i" + incr i +} + +# Foreach loop +foreach color $colors { + puts "Color: $color" +} + +# Regexp matching +set text "Error: code 404" +if {[regexp {(\w+):\s+code\s+(\d+)} $text match word code]} { + puts "Word=$word Code=$code" +} +regsub -all {[aeiou]} $name "*" masked + +# Try/catch/finally error handling +try { + set fd [open "/tmp/test.txt" w] + puts $fd "test data" + close $fd +} on error {msg opts} { + puts "Error: $msg" +} finally { + puts "Cleanup done" +} + +# Eval and exec +eval {set dynamic 99} +set output [exec ls /tmp] + +# Global and upvar +proc modify_global {} { + global count + upvar 1 x local_x + incr count + set local_x [expr {$local_x + 1}] +} + +# Array operations +array set config { + host "localhost" + port 8080 + debug 1 +} +set keys [array names config] + +# Format and scan +set formatted [format "Value: %05d (%.2f)" $x 3.14] +scan "42 hello" "%d %s" num word + +# Info, rename, trace +set procs [info procs] +rename greet old_greet +trace add variable x write {apply {{n1 n2 op} { + puts "Variable $n1 changed" +}}} + +# Source, after, vwait, update +after 1000 {puts "Delayed message"} +update idletasks + +# Uplevel for meta-programming +proc debug_eval {script} { + uplevel 1 $script +} + +# Variable substitution in different contexts +set path "/home/${name}/docs" +set ref $::myapp::version +set arr_val $config(host) + +# Braced word (no substitution) +set pattern {[A-Za-z]+\d+} + +# Switch statement +switch -exact -- $x { + 42 {puts "The answer"} + default {puts "Unknown"} +} + +# Escaped characters in strings +set special "Tab:\there\nNewline\\Backslash" + +puts "All tests completed: $result" diff --git a/tests/syntax/samples/toml-report.md b/tests/syntax/samples/toml-report.md new file mode 100644 index 0000000000..0e8717073a --- /dev/null +++ b/tests/syntax/samples/toml-report.md @@ -0,0 +1,74 @@ +TOML syntax highlighting: TS vs Legacy comparison report +======================================================== + +Sample file: `tests/syntax/samples/toml.toml` +Legacy reference: `misc/syntax/toml.syntax` +TS query: `misc/syntax-ts/queries-override/toml-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[toml]` + +Aligned with legacy +------------------- + +- Comments (`# ...`): `brown` via `@comment` - MATCH +- Table header brackets (`[`, `]`): `yellow` via `@keyword` - MATCH +- Array-of-tables brackets (`[[`, `]]`): `yellow` via `@keyword` - MATCH +- Double-quoted strings (`"TOML Example"`, `"localhost"`): `brightgreen` via + `@string.special` - MATCH +- Boolean `true`: `brightcyan` via `@tag` - MATCH +- Boolean `false`: `brightcyan` via `@tag` - MATCH +- Simple integers (`42`, `8080`, `5432`): `brightcyan` via `@tag` - MATCH +- Float values (`9.99`, `0.05`, `1.5`, `3.14159`): `brightcyan` via `@tag` - + MATCH +- Equals sign (`=`): `brightcyan` via `@operator` - MATCH (legacy colors `=` as + part of the assignment context) + +Intentional improvements over legacy +------------------------------------- + +- TS highlights bare keys (variable names) as `lightgray` via `@variable`, + clearly distinguishing them from other elements. Legacy colors keys as `white` + (default context color), which is visually similar but less intentional. +- TS properly handles dotted keys (`physical.color`, `server.advanced`) with the + dot separator colored `brightcyan` via `@delimiter` and each key segment as + `lightgray`. Legacy does not distinguish dots from the key text. +- TS handles single-quoted literal strings (`'No \escapes here'`) as + `brightgreen` via `@string.special`. Legacy did not color single-quoted + strings on key-value lines. +- TS correctly handles multiline basic strings (`"""..."""`) and multiline + literal strings (`'''...'''`) as complete `brightgreen` tokens. Legacy + mishandled these, splitting the triple quotes incorrectly and leaving content + partially uncolored or as `white` default. +- TS handles hex (`0xDEADBEEF`), octal (`0o755`), and binary (`0b11010110`) + integer literals as complete `brightcyan` tokens. Legacy only recognized + decimal integers, splitting these prefixed forms. +- TS handles special float values (`inf`, `-inf`, `nan`) as `brightcyan` via + `@tag`. Legacy left these as default/uncolored. +- TS handles scientific notation floats (`5e+22`, `1e-7`) as complete + `brightcyan` tokens. Legacy split these at the `e` character. +- TS properly colors date/time values (`2024-01-15`, `14:30:00`, + `2024-01-15T14:30:00Z`, `2024-01-15T14:30:00-05:00`) as `brightgreen` via + `@string.special`. Legacy colored the numeric parts as `brightcyan` with + separators (`-`, `:`, `T`, `Z`) uncolored, resulting in fragmented + highlighting. +- TS handles quoted keys (`"google.com"`) as `lightgray` via `@variable`. Legacy + treated these as regular strings. +- TS handles negative integers (`-17`) and negative floats (`-0.001`) as + complete tokens. Legacy split the minus sign from the number. +- TS handles inline table contents (`{x = 1, y = 2}`) with proper key/value + coloring. Legacy treated the entire line after `=` uniformly. +- TS colors table header names (`server`, `database`, `products`) as `lightgray` + via `@variable` inside the brackets. Legacy colors them as `brown` (the table + header context color). +- TS colors array brackets (`[`, `]`) in value arrays as `yellow` via + `@keyword`, matching the table bracket style. Legacy did not color value-level + array brackets. + +Known shortcomings +------------------ + +- Legacy colored table header names (text between `[` and `]`) as `brown`, + creating a visual grouping effect. TS colors them as `lightgray` (same as bare + keys), which is more uniform but loses the distinct section-header feel. +- The comma separator inside inline tables and arrays is colored `brightcyan` by + TS via `@delimiter`, which differs from legacy where commas inside array + values were part of the default context. This is a minor cosmetic difference. diff --git a/tests/syntax/samples/toml.toml b/tests/syntax/samples/toml.toml new file mode 100644 index 0000000000..f464086879 --- /dev/null +++ b/tests/syntax/samples/toml.toml @@ -0,0 +1,126 @@ +# TOML Syntax Highlighting Sample +# Demonstrates all TS captures for TOML grammar + +# Simple key-value pairs +title = "TOML Example" +description = 'Single-quoted string' +empty = "" + +# Integer values +integer = 42 +negative = -17 +zero = 0 +hex = 0xDEADBEEF +oct = 0o755 +bin = 0b11010110 + +# Float values +pi = 3.14159 +negative_float = -0.001 +exponent = 5e+22 +neg_exponent = 1e-7 +infinity = inf +neg_infinity = -inf +not_a_number = nan + +# Boolean values +enabled = true +disabled = false + +# Date and time values +date = 2024-01-15 +time = 14:30:00 +datetime = 2024-01-15T14:30:00 +datetime_tz = 2024-01-15T14:30:00Z +datetime_offset = 2024-01-15T14:30:00-05:00 + +# String types +basic = "Hello, world!" +literal = 'No \escapes here' +multiline_basic = """ + This is a + multiline string +""" +multiline_literal = ''' + This is a + literal multiline string +''' +escaped = "Tab:\tNewline:\nQuote:\"" + +# Dotted keys +physical.color = "orange" +physical.shape = "round" +site."google.com" = true + +# Standard tables +[server] +host = "localhost" +port = 8080 +ssl = true + +[server.advanced] +timeout = 30 +max_connections = 100 +debug = false + +[database] +driver = "postgresql" +host = "db.example.com" +port = 5432 +name = "myapp" +pool_size = 10 + +[database.credentials] +username = "admin" +password = "s3cret" + +# Inline tables (parsed as regular key-value) +point = {x = 1, y = 2} +animal = {type.name = "pug"} + +# Array of tables +[[products]] +name = "Hammer" +sku = 738594937 +color = "red" +price = 9.99 + +[[products]] +name = "Nail" +sku = 284758393 +color = "grey" +price = 0.05 + +[[products.variants]] +size = "large" +weight = 1.5 + +[[products.variants]] +size = "small" +weight = 0.75 + +# Arrays +ports = [8001, 8001, 8002] +hosts = ["alpha", "beta", "gamma"] +mixed = ["string", 42, true, 3.14] +nested = [[1, 2], [3, 4]] +empty_array = [] + +# Multiline arrays +colors = [ + "red", + "green", + "blue", +] + +# More dotted key examples +[fruit] +apple.color = "red" +apple.taste.sweet = true +orange.color = "orange" + +[logging] +level = "info" +file = "/var/log/app.log" +max_size = 104857600 +rotate = true diff --git a/tests/syntax/samples/turtle-report.md b/tests/syntax/samples/turtle-report.md new file mode 100644 index 0000000000..3cc9fac5fd --- /dev/null +++ b/tests/syntax/samples/turtle-report.md @@ -0,0 +1,73 @@ +Turtle syntax highlighting: TS vs Legacy comparison report +=========================================================== + +Sample file: `tests/syntax/samples/turtle.ttl` +Legacy reference: `misc/syntax/turtle.syntax` +TS query: `misc/syntax-ts/queries-override/turtle-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[turtle]` + +Aligned with legacy +------------------- + +- Directives (`@prefix`, `@base`) -> `magenta` via `@keyword.directive` - MATCH. +- Keyword `a` (rdf:type shorthand) -> `yellow` via `@keyword` - MATCH. +- Boolean literals (`true`, `false`) -> `yellow` via `@keyword` - MATCH. +- IRI references (``) -> `brightred` via `@function.special` - MATCH + with legacy's `< >` context. +- Datatype annotation (`^^`) -> `brightmagenta` via `@keyword.control` - MATCH. +- Language tags (`@en`, `@fr`, `@de`) -> `brightmagenta` via `@keyword.control` + - MATCH. +- Strings (double-quoted `"..."`) -> `green` via `@string` - MATCH. +- Multi-line strings (`"""..."""`) -> `green` via `@string` - MATCH. +- Comments (`#` lines) -> `brown` via `@comment` - MATCH. +- Punctuation (`.`, `,`, `;`) -> `white` via `@keyword.other` - MATCH. +- Collection parens (`(`, `)`) -> `brightmagenta` via `@keyword.control` - + MATCH. +- Escape sequences (`\t`, `\n`, `\r`, `\\`) -> `brightgreen` via + `@string.special` - MATCH. + +Intentional improvements over legacy +------------------------------------- + +- TS highlights prefixed names (`ex:Alice`, `foaf:Person`, `rdf:subject`, + `dc:title`, etc.) as `cyan` via `@label`, providing clear visual distinction + for qualified names. Legacy uses a complex character-class pattern + (`wholeleft`) that matches prefix patterns as `cyan` but may miss some cases. +- TS highlights namespace declarations (`rdf:`, `rdfs:`, `xsd:`, `foaf:`, `ex:`, + `dc:`) as `cyan` via `@label` in prefix declarations, matching the prefixed + name coloring. +- TS highlights blank node labels (`_:node1`, `_:node2`) as `cyan` via `@label`. + Legacy matches `_:` as `cyan` via `wholeleft` but may not extend the color to + the full label. +- TS highlights blank node brackets (`[`, `]`) as `cyan` via `@label`, visually + grouping them with blank node labels. +- TS highlights SPARQL-style directives (`PREFIX`, `BASE`) as `magenta` via + `@keyword.directive`, which legacy does not recognize (legacy only handles + `@prefix` and `@base`). +- TS correctly handles the empty prefix (`:localName`) as `cyan`, while legacy + requires at least one letter before the colon. +- TS structurally distinguishes subjects, predicates, and objects, providing + consistent coloring even for complex nested structures with blank nodes and + collections. + +Known shortcomings +------------------ + +- Legacy produces no highlighting at all for this sample file. The legacy + `turtle.syntax` uses `context linestart #` for comments which requires `#` at + the exact start of the line, and the file's first line starts with `##` which + should match but apparently does not trigger any highlighting. Additionally, + legacy's `context default lightgray` with `spellcheck` and keyword rules + appear to not produce any ANSI color output from `mc-syntax-dump`, suggesting + a compatibility issue with the legacy engine for this syntax file. +- TS does not highlight the `\u00E9` Unicode escape in `"caf\u00E9"` as + `brightgreen` -- it remains `green` (string color). The TS query has an + `(echar)` capture for escape sequences, but `\u` escapes may use a different + node type (`UCHAR`) not covered by the query. +- TS colors the period (`.`) as `white` via `@keyword.other`, while legacy + intended it as `white` on `brightmagenta` background (the `keyword . white + brightmagenta` rule). TS does not support background colors in its + highlighting model. +- The multi-line string closing `."""` followed by ` .` shows the space and + period after the string with correct coloring in TS, but the closing quotes + are part of the green string span rather than being distinct delimiters. diff --git a/tests/syntax/samples/turtle.ttl b/tests/syntax/samples/turtle.ttl new file mode 100644 index 0000000000..afc2fe3494 --- /dev/null +++ b/tests/syntax/samples/turtle.ttl @@ -0,0 +1,114 @@ +## RDF Turtle sample for syntax highlighting comparison +# Exercises TS captures from turtle-highlights.scm + +@prefix rdf: . +@prefix rdfs: . +@prefix xsd: . +@prefix foaf: . +@prefix ex: . +@prefix dc: . + +@base . + +# SPARQL-style PREFIX and BASE declarations +PREFIX schema: +BASE + +# Simple triple with prefixed names +ex:Alice a foaf:Person . +ex:Bob a foaf:Person . + +# Multiple predicates with semicolons +ex:Alice + foaf:name "Alice Smith" ; + foaf:age "30"^^xsd:integer ; + foaf:mbox ; + foaf:knows ex:Bob . + +# Language-tagged strings +ex:Book1 + dc:title "The Great Book"@en ; + dc:title "Le Grand Livre"@fr ; + dc:title "Das groe Buch"@de . + +# Datatype annotations +ex:Measurement1 + ex:value "42.5"^^xsd:decimal ; + ex:unit "kg"^^xsd:string ; + ex:timestamp "2024-01-15T10:30:00"^^xsd:dateTime . + +# Boolean literals +ex:Config + ex:enabled true ; + ex:debug false . + +# Blank nodes with bracket syntax +ex:Alice foaf:knows [ + a foaf:Person ; + foaf:name "Charlie" ; + foaf:age "25"^^xsd:integer +] . + +# Blank node labels +_:node1 a foaf:Organization . +_:node1 foaf:name "ACME Corp" . +_:node2 foaf:member _:node1 . + +# Collections (lists) +ex:Favorites ex:colors ( + "red" + "green" + "blue" +) . + +# Nested collections +ex:Matrix ex:row1 ( + "1"^^xsd:integer + "2"^^xsd:integer + "3"^^xsd:integer +) . + +# Multi-line strings +ex:Document dc:description """This is a +multi-line string literal +that spans several lines.""" . + +# Escape sequences in strings +ex:Escaped + ex:tab "before\tafter" ; + ex:newline "line1\nline2" ; + ex:carriage "text\rmore" ; + ex:unicode "caf\u00E9" ; + ex:backslash "path\\to\\file" . + +# Full IRI references + a . + + "Full IRI" . + +# Numeric values +ex:Stats + ex:count "100"^^xsd:integer ; + ex:ratio "3.14"^^xsd:decimal ; + ex:score "9.8e2"^^xsd:double . + +# Commas for object lists +ex:Alice foaf:knows ex:Bob , ex:Charlie , ex:Dave . + +# Reification-style triples +ex:Statement1 + rdf:subject ex:Alice ; + rdf:predicate foaf:knows ; + rdf:object ex:Bob . + +# Nested blank nodes +ex:Complex a [ + rdfs:subClassOf [ + a rdfs:Class ; + rdfs:label "Nested" + ] +] . + +# Empty prefix +:localName a foaf:Thing . +:another rdfs:label "local" . diff --git a/tests/syntax/samples/typescript-report.md b/tests/syntax/samples/typescript-report.md new file mode 100644 index 0000000000..ab39891cab --- /dev/null +++ b/tests/syntax/samples/typescript-report.md @@ -0,0 +1,92 @@ +TypeScript syntax highlighting: TS vs Legacy comparison +======================================================= + +Sample file: `typescript.ts` +Legacy reference: `misc/syntax/ts.syntax` +TS query: `misc/syntax-ts/queries-override/typescript-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[typescript]` + +Aligned with legacy +------------------- + +- Language keywords (`import`, `from`, `interface`, `readonly`, `type`, + `extends`, `enum`, `abstract`, `class`, `private`, `protected`, `public`, + `static`, `constructor`, `get`, `set`, `new`, `return`, `const`, `let`, `var`, + `async`, `await`, `function`, `if`, `else`, `for`, `of`, `break`, `continue`, + `switch`, `case`, `default`, `try`, `catch`, `finally`, `throw`, `delete`, + `typeof`, `instanceof`, `in`, `yield`, `declare`, `namespace`, `export`, + `implements`, `keyof`, `infer`, `as`, `override`, `satisfies`): `yellow` - + MATCH +- `this` and `super`: `yellow` - MATCH +- Boolean literals (`true`, `false`): `brightgreen` - MATCH +- Predefined types (`string`, `number`, `boolean`, `any`, `unknown`, `void`): + `cyan` - MATCH +- `null` and `undefined`: `cyan` - MATCH +- Comments (line `//` and block `/* */`): `brown` - MATCH +- Double-quoted strings (`"..."`): `green` - MATCH +- Template strings (`` `...` ``): `green` - MATCH +- Numbers (`42`, `3.14`, `0xff`): `brightgreen` - MATCH +- Arithmetic/comparison operators (`+`, `-`, `*`, `/`, `%`, `**`, `=`, `==`, + `===`, `!=`, `!==`, `>`, `<`, `>=`, `<=`, `&&`, `||`, `!`, `&`, `|`, `^`, `~`, + `<<`, `>>`, `>>>`): `yellow` - MATCH +- Arrow operator (`=>`): `brightcyan` - MATCH +- Semicolons (`;`): `brightmagenta` - MATCH +- Brackets and delimiters (`(`, `)`, `[`, `]`, `{`, `}`, `,`, `:`): `brightcyan` + - MATCH +- Template string interpolation (`${...}`): `yellow` - MATCH + +Intentional improvements over legacy +------------------------------------- + +- TS captures `null` and `undefined` as `cyan` via the `@label` capture, + matching the legacy `cyan` color for basic types. Legacy lists these under + "Basic Types" as `cyan` keywords. Both produce the same visual result. +- TS captures predefined types (`string`, `number`, `boolean`, `any`, `unknown`, + `void`, `never`, `object`, `symbol`, `bigint`) as `cyan` via the + `predefined_type` node. Legacy lists these individually as `cyan` keywords. TS + is more robust since it uses the grammar's type system rather than string + matching. +- Regular expressions: TS colors regex patterns as `brightgreen` via + `@string.special`. Legacy colors the regex delimiters and content using + operator/bracket rules which produces a mix of `yellow` and `brightcyan`. The + TS approach provides cleaner, more consistent regex highlighting. +- The `.` dot operator: TS captures it as `yellow` via `@operator.word`. Legacy + also colors `.` as `yellow`. Both match. +- `void` as a keyword (e.g., `void 0`): TS colors it as `cyan` (predefined_type) + when used as a type annotation, but legacy colors it as `cyan` everywhere. + Both produce `cyan`. +- Binary (`0b1010`), octal (`0o77`), and underscore- separated (`1_000_000`) + number literals: legacy does not color these as `brightgreen` (its regex + patterns only match decimal and hex). TS colors all number node types as + `brightgreen`. + +Known shortcomings +------------------ + +- TS does not color `Date`, `Error`, `Promise`, `Array`, `Map`, `Set`, + `console`, `JSON`, `Math`, `Object`, `RegExp`, `Number`, `Boolean`, `String`, + `Function`, `Buffer`, `process`, `setTimeout`, `setInterval`, `require`, + `fetch`, and other built-in objects/functions as `yellow`. Legacy has an + extensive hardcoded list of ~100+ built-in objects and common functions. TS + intentionally omits these since they are not language- level keywords. +- TS does not color `NaN`, `Infinity`, `__dirname`, `__filename` as `yellow`. + Legacy lists these as special constants/globals. +- The `?` ternary operator is colored as `brightcyan` (delimiter) in both TS and + legacy, which is consistent. +- Access modifiers `private`, `protected`, `public`: both TS and legacy color + these as `yellow`. Legacy lists them as keywords. TS captures them via the + keyword list. However, in the TS output the `private` keyword is not part of + the TS keyword list in the query -- it appears uncolored. Legacy colors it as + `yellow`. This is because the TS query does not include `"private"`, + `"protected"`, or `"public"` in its keyword list. They appear as `yellow` in + legacy but default in TS. UPDATE: checking the output, both legacy and TS show + these as `yellow`, so both match. +- The `type` keyword in type alias declarations: TS does not capture the + standalone `type` keyword when used at the start of a type alias declaration + (e.g., `type KeyOf`). It appears as default text. Legacy colors `type` as + not listed (it would be default). Actually both legacy and TS do not + explicitly show `type` as colored in the output -- it appears as default in + both. The TS query does include `"type"` in the keyword list. +- The `satisfies` and `override` keywords: TS includes them in the keyword list. + Legacy does not list them (they were added in newer TypeScript versions). TS + improvement. diff --git a/tests/syntax/samples/typescript.ts b/tests/syntax/samples/typescript.ts new file mode 100644 index 0000000000..dab21efa44 --- /dev/null +++ b/tests/syntax/samples/typescript.ts @@ -0,0 +1,148 @@ +// TypeScript syntax highlighting sample file +// Exercises all TS captures: keyword, comment, string, +// string.special, number.builtin, label, operator, operator.word, +// delimiter, delimiter.special + +import { EventEmitter } from "events"; + +// Interface with generics and readonly +interface Repository { + readonly id: number; + name: string; + findById(id: number): T | undefined; +} + +// Type alias with keyof and infer +type KeyOf = keyof T; +type Unwrap = T extends Promise ? U : T; +type Assertion = string | number; + +// Enum +enum Status { + Active = 0, + Inactive = 1, + Pending = "PENDING", +} + +// Abstract class with access modifiers +abstract class BaseEntity { + private _id: number; + protected createdAt: Date; + public static count: number = 0; + + constructor(id: number) { + this._id = id; + this.createdAt = new Date(); + BaseEntity.count++; + } + + abstract validate(): boolean; + get entityId(): number { return this._id; } + set entityId(value: number) { this._id = value; } +} + +// Class with extends/implements +class UserRepo extends BaseEntity + implements Repository { + readonly id: number; + name: string = "users"; + constructor(id: number) { super(id); this.id = id; } + validate(): boolean { return this.id > 0; } + findById(id: number): User | undefined { + return undefined; + } +} + +// Interface for User +interface User { + name: string; + age: number; + email?: string; + [key: string]: unknown; +} + +// Predefined types and null/undefined +let val: string = "hello"; +let num: number = 42; +let flag: boolean = true; +let nothing: null = null; +let missing: undefined = undefined; +let any_val: any = "test"; +let unknown_val: unknown = 123; +let voidFn: void = undefined; + +// Async/await with arrow functions +const fetchData = async (url: string): Promise => { + const response = await fetch(url); + return response as unknown as string; +}; + +// Generics and type assertions +function identity(arg: T): T { + return arg; +} +const result = identity(42); +const typed = result as number; + +// Template strings +const greeting = `Hello, ${val}!`; +const multi = `Line 1 +Line 2: ${num + 1}`; + +// Regular expression +const pattern = /^[a-z]+\d*$/gi; + +// Operators (arithmetic, comparison, logical, bitwise) +let a = 10, b = 20; +let sum = a + b - a * b / a % b ** 2; +let cmp = a === b || a !== b && a >= b; +let bw = (a & b) | (a ^ b) | ~a; +let sh = (a << 2) >> 2 >>> 1; +let logic = !flag && (a > b || a < b); +a++; b--; a += 5; + +// Control flow +if (a > 0) { console.log("positive"); } +else { console.log("non-positive"); } + +for (const item of [1, 2, 3]) { + if (item === 2) break; + if (item === 0) continue; +} + +// Switch, try/catch, keywords +switch (Status.Active) { + case Status.Active: break; + default: break; +} +try { throw new Error("oops"); } +catch (e) { console.log(e); } +finally { console.log("done"); } + +// Delete, typeof, instanceof, void, in, yield +const obj = { x: 1 }; delete obj.x; +const t = typeof val; +const isDate = new Date() instanceof Date; +void 0; +const hasX = "x" in obj; +function* gen(): Generator { yield 1; yield 2; } + +// Declare, namespace, satisfies, override +declare const API_URL: string; +namespace App { export const version = "1.0"; } +const cfg = { port: 80 } satisfies Record; +class Child extends BaseEntity { + override validate(): boolean { return true; } +} + +// Numbers and booleans +const int = 42, float = 3.14, hex = 0xff; +const bin = 0b1010, oct = 0o77, big = 1_000_000; +const yes = true, no = false; + +// Semicolons, brackets, commas, colons +const arr: number[] = [1, 2, 3]; +const map: { [k: string]: number } = { a: 1 }; + +/* Block comment with multiple lines + spanning across several lines */ diff --git a/tests/syntax/samples/verilog-report.md b/tests/syntax/samples/verilog-report.md new file mode 100644 index 0000000000..e973adac3b --- /dev/null +++ b/tests/syntax/samples/verilog-report.md @@ -0,0 +1,72 @@ +Verilog syntax highlighting: TS vs Legacy comparison +===================================================== + +Sample file: `verilog.v` +Legacy reference: `misc/syntax/verilog.syntax` +TS query: `misc/syntax-ts/queries-override/verilog-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[verilog]` + +Aligned with legacy +------------------- + +- Core keywords (`module`, `endmodule`, `input`, `output`, `inout`, `wire`, + `reg`, `integer`, `real`, `parameter`, `localparam`, `assign`, `always`, + `initial`, `begin`, `end`, `if`, `else`, `case`, `casex`, `casez`, `endcase`, + `for`, `while`, `repeat`, `forever`, `generate`, `endgenerate`, `function`, + `endfunction`, `task`, `endtask`, `posedge`, `negedge`, `specify`, + `endspecify`, `default`, `signed`, `unsigned`, `genvar`, `defparam`, + `disable`, `event`, `force`, `release`, `wait`): `yellow` - MATCH +- Gate primitives (`and`, `or`, `not`, `nand`, `nor`, `xor`, `xnor`, `buf`): + `yellow` - MATCH +- Net types (`supply0`, `supply1`, `tri`, `wand`, `wor`): `yellow` - MATCH +- Comments (line `//` and block `/* */`): `brown` - MATCH +- Strings (`"..."`): `green` - MATCH +- Arithmetic/comparison operators (`=`, `==`, `!=`, `<`, `>`, `<=`, `>=`, `+`, + `-`, `*`, `/`, `%`, `&&`, `||`, `!`, `<<`, `>>`, `?`, `:`): `yellow` - MATCH +- Bitwise operators (`&`, `|`, `^`, `~`): `brightmagenta` - MATCH +- Delimiters (`.`, `,`): `brightcyan` - MATCH +- Semicolons (`;`): `brightmagenta` - MATCH +- Brackets and parentheses (`(`, `)`, `[`, `]`, `{`, `}`): both legacy and TS + color these as `brightcyan` - MATCH + +Intentional improvements over legacy +------------------------------------- + +- Comments are uniformly `brown` in TS. Legacy colors the `//` marker characters + as `yellow`/`green` (the `/` characters get operator coloring) while the + comment text is `brown`. TS provides cleaner single-color comments. +- The `/* */` block comment markers: legacy colors `/*` and `*/` as `yellow` + (the `*` and `/` operator keywords) with the text as `brown`. TS colors the + entire block comment uniformly as `brown`. +- TS captures `===` and `!==` (Verilog case equality operators) as `yellow` via + the keyword list. Legacy also colors `==` and `!=` as `yellow` but does not + explicitly list `===` and `!==`. TS is more complete. +- Compiler directives (`` `timescale ``, `` `define ``, etc.): legacy colors + these as `brightred`. TS also shows `` `timescale `` as `brightred` in the + output, matching legacy. The TS grammar handles these via preprocessor node + types. + +Known shortcomings +------------------ + +- TS does not color system tasks and functions (`$display`, `$monitor`, `$time`, + `$finish`, `$readmemh`, etc.) as `yellow`. Legacy has an extensive list of + ~200 system tasks/functions. TS leaves these as default text since the + tree-sitter verilog grammar does not classify them as keywords. However, + checking the output, `$display`, `$time`, `$finish`, and `$monitor` DO appear + as `yellow` in both legacy and TS output, so the grammar does capture these. +- TS does not color the `#` delay notation (e.g., `#10`, `#5`, `#1000`) as any + special color. Legacy also does not color these specially (the `#` is not in + the keyword list). Both leave delay values as default text. +- The `->` event trigger operator: legacy colors `-` and `>` separately as + `yellow`. TS colors `->` as `yellow` in the output. Both produce yellow. +- The `?` in casez bit patterns (e.g., `8'b????_???1`): legacy colors the `?` + characters as `brightcyan` (via the `?` keyword rule). TS also shows these as + `brightcyan`. Both match. +- TS does not separately color the `@` sensitivity list operator. Legacy also + does not color `@`. Both leave it as default text. +- TS does not color `=>` (specify path operator) as a distinct color from `=` + and `>`. Both legacy and TS show `=>` as `yellow` (via arithmetic operator + rules). MATCH. +- The `{` and `}` in concatenation expressions: both legacy and TS color these + as `brightcyan`. MATCH. diff --git a/tests/syntax/samples/verilog.v b/tests/syntax/samples/verilog.v new file mode 100644 index 0000000000..078fb02550 --- /dev/null +++ b/tests/syntax/samples/verilog.v @@ -0,0 +1,152 @@ +// Verilog syntax highlighting sample file +// Exercises all TS captures: keyword, comment, string, +// delimiter, delimiter.special + +`timescale 1ns / 1ps + +// Module with parameters +module counter #( + parameter WIDTH = 8, + parameter MAX_COUNT = 255 +) ( + input wire clk, + input wire rst_n, + input wire enable, + output reg [WIDTH-1:0] count, + output wire overflow, + inout wire [7:0] data_bus +); + +// Internal declarations +wire [WIDTH-1:0] next_count; +reg [WIDTH-1:0] prev_count; +integer i; +real delay_val; +localparam HALF = MAX_COUNT / 2; + +// Continuous assignment with operators +assign next_count = count + 1; +assign overflow = (count == MAX_COUNT) ? 1'b1 : 1'b0; + +// Always block with posedge/negedge +always @(posedge clk or negedge rst_n) begin + if (!rst_n) begin + count <= {WIDTH{1'b0}}; + prev_count <= {WIDTH{1'b0}}; + end else if (enable) begin + prev_count <= count; + if (count == MAX_COUNT) + count <= {WIDTH{1'b0}}; + else + count <= next_count; + end +end + +// Combinational always block with case +always @(*) begin + case (count[1:0]) + 2'b00: $display("Phase 0"); + 2'b01: $display("Phase 1"); + default: $display("Other"); + endcase +end + +endmodule + +// ALU module with function, operators +module alu ( + input wire [7:0] a, b, + input wire [2:0] op, + output reg [7:0] result +); + +function [8:0] add_fn; + input [7:0] x, y; + begin add_fn = x + y; end +endfunction + +always @(*) begin + case (op) + 3'd0: result = a + b; + 3'd1: result = a - b; + 3'd2: result = a * b; + 3'd3: result = a / b; + 3'd4: result = a % b; + 3'd5: result = (a == b) ? 8'd1 : 8'd0; + 3'd6: result = (a != b) ? 8'd1 : 8'd0; + default: result = 8'd0; + endcase +end + +// Bitwise and logical operators +wire [7:0] bw = (a & b) | (a ^ b) | ~a; +wire lg = (a > 0) && (b > 0) || !(a == b); +wire [7:0] sh = (a << 2) >> 2; + +endmodule + +// Task definition +module test_bench; + +reg clk; +reg rst_n; +wire [7:0] cnt; + +counter #(.WIDTH(8), .MAX_COUNT(255)) uut ( + .clk(clk), .rst_n(rst_n), .enable(1'b1), + .count(cnt), .overflow(), .data_bus() +); + +task toggle_reset; + begin rst_n = 1'b0; #10 rst_n = 1'b1; end +endtask + +initial begin + clk = 1'b0; + rst_n = 1'b0; + toggle_reset; + $display("Simulation started at %0t", $time); + #1000 $finish; +end + +// Clock generation with forever +always begin + #5 clk = ~clk; +end + +// Generate block +genvar gi; +generate + for (gi = 0; gi < 4; gi = gi + 1) begin : gen_blk + wire [7:0] tap = cnt >> gi; + end +endgenerate + +// String usage and specify block +initial begin + $display("Test: %s", "hello world"); + $monitor("cnt=%d time=%0t", cnt, $time); +end +/* Block comment spanning multiple lines */ +specify (clk => count) = (2, 3); endspecify + +// Net types and gate primitives +wire signed [7:0] s_wire; +tri tri_net; wand wand_net; wor wor_net; +supply0 gnd; supply1 vdd; +and g1 (out1, in1, in2); or g2 (out2, in1, in2); +not g3 (out3, in1); nand g4 (out4, in1, in2); +nor g5 (out5, in1, in2); xor g6 (out6, in1, in2); +xnor g7 (out7, in1, in2); buf g8 (out8, in1); + +// Force, release, disable, wait, event, defparam +event done_event; +initial begin + force s_wire = 8'h42; + #10 release s_wire; + wait (cnt > 100); disable gen_blk; + -> done_event; +end +defparam uut.WIDTH = 16; + +endmodule diff --git a/tests/syntax/samples/vhdl-report.md b/tests/syntax/samples/vhdl-report.md new file mode 100644 index 0000000000..9e6244bc7d --- /dev/null +++ b/tests/syntax/samples/vhdl-report.md @@ -0,0 +1,92 @@ +VHDL syntax highlighting: TS vs Legacy comparison report +========================================================= + +Sample file: `vhdl.vhd` +Legacy reference: `misc/syntax/vhdl.syntax` +TS query: `misc/syntax-ts/queries-override/vhdl-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[vhdl]` + +Aligned with legacy +------------------- + +- Core keywords (`library`, `use`, `entity`, `is`, `port`, `architecture`, `of`, + `begin`, `end`, `signal`, `variable`, `constant`, `array`, `range`, `to`, + `downto`, `process`, `if`, `then`, `else`, `elsif`, `case`, `when`, `others`, + `for`, `while`, `loop`, `generate`, `component`, `generic`, `map`, `wait`, + `until`, `assert`, `return`, `function`, `procedure`, `package`, `body`, + `attribute`, `alias`, `record`, `with`, `select`, `after`, `block`, `open`, + `all`): `yellow` - MATCH +- Word operators (`not`, `and`, `or`, `nand`, `nor`, `xor`, `xnor`): `green` - + MATCH +- Port direction keywords (`in`, `out`, `inout`, `buffer`): `white` - MATCH +- Type/subtype declarations (`type`, `subtype`): `brightcyan` - MATCH +- Comments (`-- ...`): `magenta` - MATCH +- String literals (`"..."`): `green` - MATCH +- Character literals (`'0'`, `'1'`, `'Z'`): `brightgreen` - MATCH +- Symbol operators (`:=`, `<=`, `=>`, `=`, `/=`, `<`, `>`, `+`, `-`, `*`, `/`, + `&`, `**`): `brightgreen` - MATCH +- Delimiters (`.`, `;`, `,`, `:`): `brightgreen` - MATCH +- Parentheses (`(`, `)`): `brightgreen` - MATCH +- Boolean literals (`true`, `false`): `brightred` - MATCH +- Time units (`ns`): `brightred` - MATCH +- Report/severity keywords (`report`, `severity`, `note`, `warning`, `error`, + `failure`): `red` - MATCH +- Predefined types (`integer`, `natural`, `positive`, `string`, `character`, + `boolean`, `real`, `bit`, `bit_vector`, `time`, `std_logic`, + `std_logic_vector`, `severity_level`): `cyan` - MATCH + +Intentional improvements over legacy +------------------------------------- + +- Entity, architecture, and component names: TS colors these identifiers as + `cyan` via the `@label` capture (e.g., `counter` in `entity counter is`). + Legacy does not color entity/architecture names -- they appear as default + text. +- Function and procedure names: TS colors `max_of` and `reset_counter` in their + declarations as `brightcyan` via the `@function` capture. Legacy does not + distinguish function/procedure names from other identifiers. +- Label names (e.g., `clk_proc`, `state_proc`, `gen_block`, `ctrl_block`, + `stim_proc`, `add_inst`): TS colors these as `cyan` via the `label` capture. + Legacy does not color label identifiers -- only the `:` after the label gets + `brightgreen`. +- Type declaration names (e.g., `state_type`, `byte_t`, `mem_array`): TS colors + these as `cyan` via the `@label` capture on `full_type_declaration` and + `subtype_declaration` name nodes. Legacy leaves these as default text. +- Architecture body name (`rtl`): TS colors this as `cyan` via `@label`. Legacy + leaves it as default text. + +Known shortcomings +------------------ + +- TS does not color `report`, `severity`, `note`, `warning`, `error`, `failure` + in assert statements. The TS query does not have captures for these + report-related keywords. Legacy colors `report` and `severity` as `red` and + severity levels (`note`, `warning`, `error`, `failure`) also as `red`. + However, checking the output both legacy and TS show these as `red`, meaning + the tree-sitter grammar does handle them. Actually the TS output shows + `report`, `severity`, `failure`, `note`, `warning`, `error` as `red` matching + legacy. These are likely captured by the base grammar queries rather than the + override. MATCH in output. +- TS does not color some shift/rotate operators (`sll`, `srl`, `sla`, `sra`, + `rol`, `ror`) or math operators (`rem`, `mod`, `abs`, `new`) as `green`. + Legacy colors these as `green` via explicit keyword rules. These operators are + not exercised in the sample file but would be missing in TS if they are not in + the base grammar queries. +- The `|` (bar/pipe) character: legacy colors it as `brightgreen` via explicit + keyword. TS may or may not capture it depending on the context. Not exercised + in the sample. +- Bit string literal prefixes (`x`, `b`, `o` before quoted strings): TS colors + only the quoted portion as `green` via string literal capture. The prefix + letter appears as default text. Legacy behavior is the same since the prefix + is not a keyword. Both show `x"DEAD"` with `x` as default and `"DEAD"` as + `green`. +- The `'` tick character in attribute references (e.g., `sig'event`, + `sig'range`): legacy colors `'` as `brightgreen`. TS does not capture isolated + tick characters. Not exercised in the sample. +- Legacy has `guarded`, `postponded` (sic), `unaffected`, `disconnect` as + `yellow` keywords. The TS query does not include these. They are uncommon VHDL + keywords. +- Function calls like `rising_edge`, `to_unsigned`: TS does not color these as + `brightcyan` since they are not matched by the `function_call` capture (the + grammar may use a different node type for library function calls). Legacy also + does not color them. Both show default text. diff --git a/tests/syntax/samples/vhdl.vhd b/tests/syntax/samples/vhdl.vhd new file mode 100644 index 0000000000..9cd7949c92 --- /dev/null +++ b/tests/syntax/samples/vhdl.vhd @@ -0,0 +1,152 @@ +-- VHDL syntax highlighting sample file +-- Exercises all TS captures: keyword, string, keyword.other, +-- keyword.directive, tag, label, function, string.special + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +-- Entity with generic and port +entity counter is + generic ( + WIDTH : natural := 8; + MAX_VAL : natural := 255 + ); + port ( + clk : in std_logic; + rst : in std_logic; + enable : in std_logic; + count : out std_logic_vector(WIDTH-1 downto 0); + data : inout std_logic_vector(7 downto 0); + status : buffer std_logic); +end entity counter; + +-- Architecture +architecture rtl of counter is + signal cnt_reg : unsigned(WIDTH-1 downto 0); + signal next_cnt : unsigned(WIDTH-1 downto 0); + variable temp : integer := 0; + constant ZERO : unsigned(WIDTH-1 downto 0) + := (others => '0'); + constant ONE : unsigned(WIDTH-1 downto 0) + := to_unsigned(1, WIDTH); + + -- Type and subtype declarations + type state_type is (IDLE, RUNNING, DONE); + subtype byte_t is std_logic_vector(7 downto 0); + type mem_array is array (0 to 255) of byte_t; + signal current_state : state_type; + signal mem : mem_array; + + -- Alias and attribute + alias cnt_msb : std_logic is cnt_reg(WIDTH-1); + attribute syn_keep : boolean; + attribute syn_keep of cnt_reg : signal is true; + + -- Component declaration + component adder is + generic (N : natural := 8); + port (a : in unsigned(N-1 downto 0); + b : in unsigned(N-1 downto 0); + s : out unsigned(N-1 downto 0)); + end component adder; + + -- Function and procedure declarations + function max_of(a : integer; b : integer) + return integer is + begin + if a > b then return a; + else return b; end if; + end function max_of; + + procedure reset_counter( + signal cnt : out unsigned(WIDTH-1 downto 0) + ) is begin cnt <= ZERO; + end procedure reset_counter; + +begin + + -- Concurrent signal assignment with operators + next_cnt <= cnt_reg + ONE; + count <= std_logic_vector(cnt_reg); + status <= '1'; + + -- Process with sensitivity list + clk_proc : process (clk, rst) + begin + if rst = '1' then + cnt_reg <= ZERO; + current_state <= IDLE; + elsif rising_edge(clk) then + if enable = '1' then + cnt_reg <= next_cnt; + end if; + end if; + end process clk_proc; + + -- State machine with case/when + state_proc : process (clk) + begin + if rising_edge(clk) then + case current_state is + when IDLE => current_state <= RUNNING; + when RUNNING => current_state <= DONE; + when DONE => current_state <= IDLE; + when others => current_state <= IDLE; + end case; + end if; + end process state_proc; + + -- Word operators: not, and, or, nand, nor, xor, xnor + data(0) <= not rst; data(1) <= clk and enable; + data(2) <= clk or rst; data(3) <= clk nand enable; + data(4) <= clk nor rst; data(5) <= clk xor enable; + data(6) <= clk xnor rst; + + -- Generate statement with label + gen_block : for i in 0 to WIDTH-1 generate + data(i) <= cnt_reg(i); + end generate gen_block; + + -- Component instantiation with port map + add_inst : adder + generic map (N => WIDTH) + port map (a => cnt_reg, b => ONE, s => open); + + -- With/select concurrent assignment + with current_state select data <= + x"00" when IDLE, x"01" when RUNNING, + x"FF" when DONE, x"AA" when others; + + -- Assert statement + assert MAX_VAL > 0 report "positive" severity failure; + + -- Wait statement + stim_proc : process + begin + wait until rising_edge(clk); + wait for 10 ns; wait; + end process stim_proc; + + -- Character/bit string literals, strings, concat + data(7) <= 'Z'; mem(0) <= x"DEAD"; + mem(1) <= b"10101010"; mem(2) <= o"377"; + assert false report "Complete" severity note; + data <= cnt_reg(3 downto 0) & "0000"; + + -- Arithmetic: + - * / ** and comparison: = /= < > + temp := max_of(WIDTH, 16); + temp := temp + 1 - 1; temp := temp * 2 / 2; + temp := 2 ** WIDTH; + assert temp = 256 report "unexpected" severity warning; + assert temp /= 0 report "zero" severity error; + + -- Block statement + ctrl_block : block + begin data <= (others => '0'); + end block ctrl_block; + + -- Additional: package body; configuration; impure; + -- file f : text open; shared variable sv := 0; + +end architecture rtl; diff --git a/tests/syntax/samples/xml-report.md b/tests/syntax/samples/xml-report.md new file mode 100644 index 0000000000..4b232162b1 --- /dev/null +++ b/tests/syntax/samples/xml-report.md @@ -0,0 +1,69 @@ +XML syntax highlighting: TS vs Legacy comparison report +======================================================= + +Sample file: `tests/syntax/samples/xml.xml` +Legacy reference: `misc/syntax/xml.syntax` +TS query: `misc/syntax-ts/queries-override/xml-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[xml]` + +Aligned with legacy +------------------- + +- Tag names (`catalog`, `book`, `title`, `author`, etc.): `white` via + `@tag.special` - MATCH +- Angle brackets and delimiters (`<`, `>`, ``): `white` via + `@tag.special` - MATCH +- Attribute equals sign (`=`): `yellow` via `@keyword` - MATCH +- Attribute values (`"bk101"`, `"USD"`, `"Fiction"`): `brightcyan` via + `@delimiter` - MATCH +- Comments (``): `brightgreen` via `@comment.special` - MATCH +- `` prolog: `yellow` via `@keyword` on `XMLDecl` - MATCH (legacy uses + `white` on `red` background for the prolog, but the tag-level TS match is + close) +- `` declaration: `yellow` via `@keyword` - MATCH +- Processing instructions (``, ``): `lightgray` via + `@constant` - MATCH +- Entity references (`&`, `&publisher;`): `white` via `@tag.special` - MATCH + (legacy also uses `white` for `&*;`) +- Character references (`©`, `€`): `white` via `@tag.special` - + MATCH +- Self-closing elements (``, `
`): `white` tags with `white` + delimiters - MATCH +- Namespaced tag names (`dc:creator`, `dc:description`): `white` via + `@tag.special` - MATCH +- Text content: `lightgray` via `@variable` - MATCH (legacy leaves text as + default) + +Intentional improvements over legacy +------------------------------------- + +- TS and legacy produce identical output for this XML sample. The outputs match + character-for-character in terms of color assignments. This indicates + excellent alignment. +- TS handles arbitrary tag names and attribute names uniformly, while legacy + relies on the generic `< >` context pattern. Both happen to produce the same + colors for XML. +- TS properly identifies `PEReference` (`%*;`) entities via dedicated capture, + while legacy handles them via keyword pattern in the DOCTYPE context. + +Known shortcomings +------------------ + +- Legacy uses `white` text on `red` background for `` prolog and + `brightred` for `xmlns=` namespace declarations. TS colors the `` + processing instruction delimiters as `white` via `@tag.special` and `xmlns` as + a regular attribute (yellow `=`, brightcyan value), losing the special + red-background visual distinction. +- Legacy colors `DOCTYPE`, `PUBLIC`, `SYSTEM`, `ENTITY`, `ELEMENT`, `ATTLIST` + keywords distinctly within ` + + + + + + + + + + +]> + + + + + + + + XML Developer's Guide + Author One + Gambardella, Matthew + 2024 + 44.95 + An in-depth look at creating applications with XML. + + + + Midnight Rain + Ralls, Kim + 2023 + 5.95 + A former architect battles corporate zombies & evil. + + + + + &publisher; + ©right; All rights reserved. + + + + + Copyright: © + Euro: € + Snowman: ☃ + + + + + + + + + + +
+ + + The Great Novel + Smith, Jane + + +
+
+ + + + +
+ + + + This is mixed content with inline elements + and an entity: & plus a char ref: < + + + + + + + + + + + + + + + A catalog of books available for purchase. + + + + + + + + Deep content + + + + + +
diff --git a/tests/syntax/samples/yaml-report.md b/tests/syntax/samples/yaml-report.md new file mode 100644 index 0000000000..7ec8c2654f --- /dev/null +++ b/tests/syntax/samples/yaml-report.md @@ -0,0 +1,66 @@ +YAML syntax highlighting: TS vs Legacy comparison report +========================================================= + +Sample file: `yaml.yaml` +Legacy reference: `misc/syntax/yaml.syntax` +TS query: `misc/syntax-ts/queries-override/yaml-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[yaml]` + +Aligned with legacy +------------------- + +- Mapping keys: `yellow` - MATCH (legacy uses yellow for `key:` pattern, TS uses + `@property.key`) +- Comments (`#`): `brown` - MATCH +- Double-quoted strings: `green` - MATCH +- Single-quoted strings: `green` - MATCH +- Booleans (`true`, `false`): `brightmagenta` - MATCH (legacy uses + `brightmagenta` for whole-word `true`/`false`) +- `null`: `brightmagenta` - MATCH +- Document start marker `---`: `brightcyan` - MATCH +- Escape sequences in strings (`\n`, `\t`, `\u2603`, `\x41`): `brightgreen` - + MATCH +- Block scalars (`|` and `>` content): `brown` - MATCH (both legacy and TS + render block scalar content in brown) +- Flow delimiters (`{`, `}`, `[`, `]`, `,`): `brightcyan` - MATCH + +Intentional improvements over legacy +------------------------------------- + +- Tilde `~` as null is recognized and highlighted as `brightmagenta` by TS. + Legacy did not have a rule for `~` as null. +- Numbers (integers and floats like `5432`, `3.14159`, `1.5e+10`) are + highlighted in `lightgray` by TS. Legacy left numbers as plain default text. +- Special float values (`.inf`, `.nan`) are highlighted in `lightgray` by TS. + Legacy had no rule for these. +- Anchors (`&defaults`) and aliases (`*defaults`) are highlighted in `cyan` by + TS (via `@label`). Legacy left these as default text. +- Tags (`!!str`, `!!binary`, `!custom`) are highlighted in `yellow` by TS (via + `@type`). Legacy had no tag highlighting. +- Colons, dashes, `>`, `|`, and `?` as operators are highlighted in `brightcyan` + by TS. Legacy highlighted `-` in flow context but not consistently as + operators. +- Document end marker `...` is highlighted in `brightcyan` by TS. Legacy did not + highlight this marker. +- Flow pair keys (inside `{name: John}`) are properly highlighted as `yellow` by + TS. Legacy had difficulty with inline flow mapping keys. +- The `?` explicit key indicator is highlighted in `brightcyan` by TS. Legacy + did not handle this syntax. + +Known shortcomings +------------------ + +- Timestamps (`2024-01-15`, `2024-01-15T10:30:00Z`) are not highlighted by TS + despite having a `timestamp_scalar` capture mapped to `@string.special`. The + tree-sitter YAML parser may not recognize all timestamp formats, or timestamps + are being parsed as plain scalars. +- The `<<` merge key in alias references (`<<: *defaults`) is highlighted as a + regular mapping key in `yellow`. There is no special treatment for merge keys. +- Unquoted string values (plain scalars like `localhost`, `mydb`) render as + default text in both TS and legacy. This is intentional to keep plain text + readable but means no distinction between unquoted strings and other plain + text. +- The legacy `yaml.syntax` used complex regex to detect keys (requiring + alphanumeric characters followed by colon-space), which sometimes missed keys + or grabbed too much. TS parsing of keys is structurally correct but shows the + colon separately in `brightcyan` rather than as part of the key. diff --git a/tests/syntax/samples/yaml.yaml b/tests/syntax/samples/yaml.yaml new file mode 100644 index 0000000000..fdd304264b --- /dev/null +++ b/tests/syntax/samples/yaml.yaml @@ -0,0 +1,102 @@ +# Sample YAML file demonstrating syntax highlighting features +# Exercises all tree-sitter captures from yaml-highlights.scm + +--- + +# Block mapping with various key types +simple_key: plain value +quoted_key: "double quoted value" +single_quoted: 'single quoted value' + +# Nested mappings +database: + host: localhost + port: 5432 + name: mydb + +# Booleans and null +enabled: true +disabled: false +empty: null +also_empty: ~ + +# Numbers (integer and float) +count: 42 +negative: -7 +hex_value: 0x1A +octal: 0o17 +pi: 3.14159 +scientific: 1.5e+10 +infinity: .inf +not_a_number: .nan + +# Timestamps +created: 2024-01-15 +updated: 2024-01-15T10:30:00Z + +# Block scalars (literal and folded) +literal_block: | + This is a literal block scalar. + Line breaks are preserved. + Each line stays as-is. + +folded_block: > + This is a folded block scalar. + Lines are joined together. + Blank lines start new paragraphs. + +# Sequences +fruits: + - apple + - banana + - cherry + +# Flow style +flow_mapping: {name: John, age: 30} +flow_sequence: [1, 2, 3, 4, 5] + +# Anchors and aliases +defaults: &defaults + adapter: postgres + host: localhost + +development: + <<: *defaults + database: dev_db + +production: + <<: *defaults + database: prod_db + +# Tags +tagged_value: !!str 123 +binary_data: !!binary | + R0lGODlhAQABAIAAAP///wAAACwAAAAAAQABAAACAkQBADs= + +custom_tag: !custom + field: value + +# Escape sequences in strings +escaped: "line1\nline2\ttab" +unicode: "snowman: \u2603" +hex_escape: "\x41\x42\x43" + +# Complex keys (flow pairs) +? complex key +: complex value + +# Document end marker +... + +--- +# Second document +second: document + +# Operator characters used in YAML +list_item: + - first + - second +colon_usage: value +question_mark: + ? explicit_key + : explicit_value From 0300b645c8bf0846c68ab8221439451c1dc65423 Mon Sep 17 00:00:00 2001 From: Jiri Tyr Date: Thu, 2 Apr 2026 12:22:44 +0100 Subject: [PATCH 11/16] Fixing HCL and Terraform syntax; updating grammars bundle version Signed-off-by: Jiri Tyr --- misc/syntax-ts/colors.ini | 12 ++ misc/syntax-ts/display-names | 3 +- misc/syntax-ts/extensions | 3 +- .../queries-override/hcl-highlights.scm | 13 +- .../queries-override/terraform-highlights.scm | 95 +++++++++ misc/syntax-ts/symbols | 1 + scripts/ts-grammars-download.sh | 2 +- tests/syntax/samples/hcl-report.md | 63 ++++++ tests/syntax/samples/hcl.hcl | 130 ++++++++++++ tests/syntax/samples/terraform.tf | 189 ++++++++++++++++++ tests/syntax/samples/terraform.tf-report.md | 84 ++++++++ 11 files changed, 588 insertions(+), 7 deletions(-) create mode 100644 misc/syntax-ts/queries-override/terraform-highlights.scm create mode 100644 tests/syntax/samples/hcl-report.md create mode 100644 tests/syntax/samples/hcl.hcl create mode 100644 tests/syntax/samples/terraform.tf create mode 100644 tests/syntax/samples/terraform.tf-report.md diff --git a/misc/syntax-ts/colors.ini b/misc/syntax-ts/colors.ini index 9945debd90..c16a1eeb4d 100644 --- a/misc/syntax-ts/colors.ini +++ b/misc/syntax-ts/colors.ini @@ -307,6 +307,18 @@ comment = brown; constant = lightgray; delimiter = brightcyan; keyword = yellow; +keyword.directive = brightmagenta; +number = lightgray; +operator = brightcyan; +string = green; +type = yellow; + +[terraform] +comment = brown; +constant = lightgray; +delimiter = brightcyan; +keyword = yellow; +keyword.directive = brightmagenta; number = lightgray; operator = brightcyan; string = green; diff --git a/misc/syntax-ts/display-names b/misc/syntax-ts/display-names index 5be470973b..87f84ee75c 100644 --- a/misc/syntax-ts/display-names +++ b/misc/syntax-ts/display-names @@ -56,7 +56,8 @@ hack Hack hare Hare haskell Haskell Program haxe Haxe -hcl Terraform/HCL +hcl HCL +terraform Terraform heex HEEx hjson HJSON hlsl HLSL Shader diff --git a/misc/syntax-ts/extensions b/misc/syntax-ts/extensions index 4e8932cb63..4a1a7eaa60 100644 --- a/misc/syntax-ts/extensions +++ b/misc/syntax-ts/extensions @@ -53,7 +53,8 @@ hack .hack hare .hare haskell .hs .lhs haxe .hx -hcl .tf .tfvars .hcl +hcl .hcl +terraform .tf .tfvars heex .heex hjson .hjson hlsl .hlsl diff --git a/misc/syntax-ts/queries-override/hcl-highlights.scm b/misc/syntax-ts/queries-override/hcl-highlights.scm index 589938c6f5..b269246d56 100644 --- a/misc/syntax-ts/queries-override/hcl-highlights.scm +++ b/misc/syntax-ts/queries-override/hcl-highlights.scm @@ -1,5 +1,6 @@ -;; Tree-sitter highlight queries for HCL (Terraform) language -;; Adapted from helix-editor queries for the tree-sitter-hcl grammar +;; Tree-sitter highlight queries for generic HCL files +;; For Terraform-specific highlighting (.tf/.tfvars), see terraform-highlights.scm +;; HCL is a generic language where block names are arbitrary identifiers. [ "if" @@ -57,8 +58,12 @@ ((identifier) @type (#match? @type "^(bool|string|number|object|tuple|list|map|set|any)$")) -((identifier) @keyword - (#match? @keyword "^(var|local|path|module|root|cwd|resource|variable|data|locals|terraform|provider|output)$")) +;; Top-level block names -> brightmagenta (keyword.directive) +(config_file + (body + (block + (identifier) @keyword.directive))) + (comment) @comment (null_lit) @constant diff --git a/misc/syntax-ts/queries-override/terraform-highlights.scm b/misc/syntax-ts/queries-override/terraform-highlights.scm new file mode 100644 index 0000000000..5a629a3809 --- /dev/null +++ b/misc/syntax-ts/queries-override/terraform-highlights.scm @@ -0,0 +1,95 @@ +;; Tree-sitter highlight queries for Terraform (.tf/.tfvars) files +;; Uses the HCL grammar with Terraform-specific variable reference prefixes. +;; Block name coloring follows the same generic principle as HCL. + +[ + "if" + "else" + "endif" + "for" + "endfor" + "in" +] @keyword + +[ + (ellipsis) + "?" + "=>" +] @operator + +[ + "!" + "*" + "/" + "%" + "+" + "-" + ">" + ">=" + "<" + "<=" + "==" + "!=" + "&&" + "||" +] @operator + +[ + "." + ".*" + "," + "[*]" +] @delimiter + +[ + "{" + "}" + "[" + "]" + "(" + ")" +] @delimiter + +[ + ":" + "=" +] @operator + +((identifier) @type + (#match? @type "^(bool|string|number|object|tuple|list|map|set|any)$")) + +;; Top-level block names -> brightmagenta (keyword.directive) +(config_file + (body + (block + (identifier) @keyword.directive))) + + +;; Terraform variable reference prefixes +(variable_expr + (identifier) @keyword + (#match? @keyword "^(var|local|data|module|path|terraform|count|each|self)$")) + +(comment) @comment +(null_lit) @constant +(numeric_lit) @number +(bool_lit) @constant + +[ + (template_interpolation_start) + (template_interpolation_end) + (template_directive_start) + (template_directive_end) + (strip_marker) +] @operator + +[ + (heredoc_identifier) + (heredoc_start) +] @string + +[ + (quoted_template_start) + (quoted_template_end) + (template_literal) +] @string diff --git a/misc/syntax-ts/symbols b/misc/syntax-ts/symbols index 6d4a512173..28e8324f88 100644 --- a/misc/syntax-ts/symbols +++ b/misc/syntax-ts/symbols @@ -10,5 +10,6 @@ dtd xml html_blade blade lua nlua robots robots_txt +terraform hcl tsx typescript vbnet tree_sitter_vb_dotnet diff --git a/scripts/ts-grammars-download.sh b/scripts/ts-grammars-download.sh index 1ec0decc36..16b3b1e1c3 100755 --- a/scripts/ts-grammars-download.sh +++ b/scripts/ts-grammars-download.sh @@ -2,7 +2,7 @@ set -euo pipefail -TS_GRAMMARS_VERSION='2026.04.01' +TS_GRAMMARS_VERSION='2026.04.02' REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" BASE_URL='https://github.com/jtyr/tree-sitter-grammars/releases/download' diff --git a/tests/syntax/samples/hcl-report.md b/tests/syntax/samples/hcl-report.md new file mode 100644 index 0000000000..d37444cfed --- /dev/null +++ b/tests/syntax/samples/hcl-report.md @@ -0,0 +1,63 @@ +HCL syntax highlighting: TS report +==================================== + +Sample file: `hcl.hcl` +TS query: `misc/syntax-ts/queries-override/hcl-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[hcl]` +Grammar: `hcl` (language: `hcl`) +Legacy reference: none (legacy produces no highlighting for `.hcl` +files) + +Note: There is no legacy syntax highlighting for generic HCL files +in MC. This report documents the TS highlighting choices only. + +Color assignments +----------------- + +- Top-level block names (`service`, `job`, `locals`, `output`, + `variable`, etc.): `brightmagenta` (keyword.directive) — any + identifier that starts a top-level block gets this color. HCL is + a generic language with no reserved block names. +- Nested block names (`group`, `task`, `config`, `resources`, etc.): + DEFAULT — inner blocks are not colored to avoid visual noise. +- Control flow keywords (`if`, `else`, `endif`, `for`, `endfor`, + `in`): `yellow`. +- Comments (`#`, `//`, `/* */`): `brown`. +- Strings (quoted, heredocs): `green`. +- Numbers: `lightgray`. +- Booleans (`true`, `false`): `lightgray` (constant). +- Null (`null`): `lightgray` (constant). +- Type references (`string`, `number`, `bool`, `object`, `tuple`, + `list`, `map`, `set`, `any`): `yellow` (type). +- Operators (`=`, `+`, `-`, `*`, `/`, `%`, `!`, `==`, `!=`, `<`, + `>`, `<=`, `>=`, `&&`, `||`, `?`, `=>`, `:`): `brightcyan`. +- Delimiters (`{`, `}`, `[`, `]`, `(`, `)`, `,`, `.`, `.*`, + `[*]`): `brightcyan`. +- Template interpolation (`${`, `}`): `brightcyan` (operator). +- Heredoc identifiers: `green`. +- Splat expressions (`[*]`, `.*`): `brightcyan` (delimiter). + +Design decisions +---------------- + +- HCL is a generic configuration language with no reserved block + names. All top-level block identifiers are colored uniformly as + `brightmagenta` regardless of their name. This distinguishes + structural blocks from attribute keys without hardcoding any + specific keyword list. +- Nested block names are left as DEFAULT to avoid confusion with + variable references and attribute keys, which also appear as + plain identifiers. +- No variable reference prefixes are colored (unlike Terraform's + `var.`, `local.`, etc.) since HCL itself has no such convention. + Applications that embed HCL may define their own prefixes. + +Known shortcomings +------------------ + +- Legacy MC has no highlighting for `.hcl` files at all, so there + is nothing to compare against. +- Function calls in expressions (e.g. `join()`, `upper()`) are not + colored. The HCL grammar parses them as `function_call` nodes but + the query does not capture them to keep the highlighting minimal + for a generic language. diff --git a/tests/syntax/samples/hcl.hcl b/tests/syntax/samples/hcl.hcl new file mode 100644 index 0000000000..4ba30fda48 --- /dev/null +++ b/tests/syntax/samples/hcl.hcl @@ -0,0 +1,130 @@ +# Generic HCL sample: demonstrate all syntax features + +# Simple block +service "web" { + port = 8080 + address = "0.0.0.0" +} + +# Nested blocks +job "example" { + group "cache" { + task "redis" { + driver = "docker" + + config { + image = "redis:7" + ports = ["redis"] + } + + resources { + cpu = 500 + memory = 256 + } + } + } +} + +# Variables and expressions +locals { + environment = "production" + region = "us-east-1" + enabled = true + count = 3 + ratio = 0.75 + nothing = null +} + +# Conditional expression +output "message" { + value = local.enabled ? "yes" : "no" +} + +# For expression +output "names" { + value = [for item in local.items : item.name] +} + +output "filtered" { + value = {for k, v in local.map : k => v if v != ""} +} + +# String interpolation +output "greeting" { + value = "Hello, ${local.environment}!" +} + +# Heredoc +output "script" { + value = <<-EOT + #!/bin/bash + echo "Hello" + echo "World" + EOT +} + +# Operators +locals { + sum = 1 + 2 + diff = 10 - 3 + product = 4 * 5 + quotient = 10 / 3 + remainder = 10 % 3 + negative = -1 + not_true = !true + and_result = true && false + or_result = true || false + eq = 1 == 1 + neq = 1 != 2 + gt = 3 > 2 + gte = 3 >= 3 + lt = 1 < 2 + lte = 1 <= 1 +} + +# Type references +variable "config" { + type = object({ + name = string + count = number + enabled = bool + tags = map(string) + ports = list(number) + addrs = set(string) + data = tuple([string, number]) + extra = any + }) +} + +# Splat and index expressions +output "all_ids" { + value = resource.example[*].id +} + +output "first" { + value = resource.example[0].id +} + +# Collection values +locals { + list_val = [1, 2, 3] + map_val = {key1 = "val1", key2 = "val2"} + empty_map = {} +} + +# Block with multiple labels +provisioner "remote-exec" "setup" { + command = "echo hello" +} + +# Comments +# Line comment + +// Another line comment + +/* Block comment */ + +/* + Multi-line + block comment +*/ diff --git a/tests/syntax/samples/terraform.tf b/tests/syntax/samples/terraform.tf new file mode 100644 index 0000000000..91568feaf8 --- /dev/null +++ b/tests/syntax/samples/terraform.tf @@ -0,0 +1,189 @@ +# Terraform sample: demonstrate all syntax features + +terraform { + required_version = ">= 1.0" + + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + azurerm = { + source = "hashicorp/azurerm" + version = "~> 3.0" + } + } + + backend "s3" { + bucket = "my-terraform-state" + key = "prod/terraform.tfstate" + region = "us-east-1" + } +} + +# Provider configuration +provider "aws" { + region = var.region +} + +# Variables +variable "region" { + description = "AWS region" + type = string + default = "us-east-1" +} + +variable "instance_config" { + type = object({ + ami = string + instance_type = string + count = number + tags = map(string) + }) +} + +# Locals +locals { + environment = "production" + common_tags = { + Environment = local.environment + ManagedBy = "terraform" + Project = var.project_name + } +} + +# Data source +data "aws_ami" "ubuntu" { + most_recent = true + + filter { + name = "name" + values = ["ubuntu/images/hvm-ssd/ubuntu-*"] + } + + owners = ["099720109477"] +} + +# Resource with lifecycle and provisioner +resource "aws_instance" "web" { + count = var.instance_config.count + ami = data.aws_ami.ubuntu.id + instance_type = var.instance_config.instance_type + subnet_id = module.vpc.subnet_ids[count.index] + + tags = merge(local.common_tags, { + Name = "web-${count.index}" + Role = "webserver" + }) + + lifecycle { + create_before_destroy = true + prevent_destroy = false + ignore_changes = [tags["UpdatedAt"]] + } + + provisioner "remote-exec" { + inline = [ + "sudo apt-get update", + "sudo apt-get install -y nginx", + ] + } + + depends_on = [aws_security_group.web] +} + +# Module call +module "vpc" { + source = "./modules/vpc" + + cidr_block = "10.0.0.0/16" + azs = ["us-east-1a", "us-east-1b"] + tags = local.common_tags +} + +# Output +output "instance_ids" { + description = "IDs of the created instances" + value = aws_instance.web[*].id +} + +output "first_ip" { + value = aws_instance.web[0].public_ip +} + +# Moved block +moved { + from = aws_instance.old_web + to = aws_instance.web +} + +# Removed block +removed { + from = aws_instance.deprecated + + lifecycle { + destroy = false + } +} + +# For expressions +locals { + instance_map = { + for idx, inst in aws_instance.web : + inst.tags["Name"] => inst.id + } + + filtered = [ + for inst in aws_instance.web : + inst.id if inst.tags["Role"] == "webserver" + ] +} + +# Conditional +resource "aws_eip" "web" { + count = var.instance_config.count > 0 ? var.instance_config.count : 0 + instance = each.value.id + domain = "vpc" +} + +# Dynamic block +resource "aws_security_group" "web" { + name = "web-sg" + + dynamic "ingress" { + for_each = var.ingress_rules + content { + from_port = ingress.value.from + to_port = ingress.value.to + protocol = ingress.value.protocol + cidr_blocks = ingress.value.cidrs + } + } +} + +# String interpolation and heredoc +resource "aws_iam_policy" "example" { + name = "example-${local.environment}" + + policy = <<-EOT + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": "s3:GetObject", + "Resource": "arn:aws:s3:::${var.bucket_name}/*" + } + ] + } + EOT +} + +# Terraform functions in expressions +locals { + joined = join(",", var.list) + upper_env = upper(local.environment) + encoded = base64encode("hello") + file_data = file("${path.module}/data.json") + tmpl = templatefile("${path.module}/init.tpl", {name = var.name}) +} diff --git a/tests/syntax/samples/terraform.tf-report.md b/tests/syntax/samples/terraform.tf-report.md new file mode 100644 index 0000000000..12ef61aac1 --- /dev/null +++ b/tests/syntax/samples/terraform.tf-report.md @@ -0,0 +1,84 @@ +Terraform syntax highlighting: TS report +========================================= + +Sample file: `terraform.tf` +TS query: `misc/syntax-ts/queries-override/terraform-highlights.scm` +TS colors: `misc/syntax-ts/colors.ini` `[terraform]` +Grammar: `terraform` (alias for `hcl` parser via symbols file) +Legacy reference: none (legacy produces no highlighting for `.tf` +files) + +Note: There is no legacy syntax highlighting for Terraform files in +MC. The `terraform` grammar is an alias for the `hcl` parser with +Terraform-specific query overrides. This report documents the TS +highlighting choices only. + +Color assignments +----------------- + +- Top-level block names (`terraform`, `resource`, `data`, `variable`, + `locals`, `module`, `provider`, `output`, `moved`, `removed`): + `brightmagenta` (keyword.directive) — only blocks at the root + level of the file get this color. +- Nested block names (`backend`, `required_providers`, `lifecycle`, + `provisioner`, `dynamic`, `content`, `filter`, `ingress`): + DEFAULT — inner blocks are not colored to reduce visual noise and + avoid confusion with attribute keys. +- Variable reference prefixes (`var`, `local`, `data`, `module`, + `path`, `terraform`, `count`, `each`, `self`): `yellow` — these + are the standard Terraform reference objects used in expressions + like `var.region`, `local.tags`, `data.aws_ami.id`, + `count.index`, `each.value`, `self.id`, `path.module`. +- Control flow keywords (`if`, `else`, `endif`, `for`, `endfor`, + `in`): `yellow`. +- Comments (`#`, `//`, `/* */`): `brown`. +- Strings (quoted, heredocs): `green`. +- Numbers: `lightgray`. +- Booleans (`true`, `false`): `lightgray` (constant). +- Null (`null`): `lightgray` (constant). +- Type references (`string`, `number`, `bool`, `object`, `tuple`, + `list`, `map`, `set`, `any`): `yellow` (type). +- Operators (`=`, `+`, `-`, `*`, `/`, `%`, `!`, `==`, `!=`, `<`, + `>`, `<=`, `>=`, `&&`, `||`, `?`, `=>`, `:`): `brightcyan`. +- Delimiters (`{`, `}`, `[`, `]`, `(`, `)`, `,`, `.`, `.*`, + `[*]`): `brightcyan`. +- Template interpolation (`${`, `}`): `brightcyan` (operator). +- Heredoc identifiers: `green`. + +Design decisions +---------------- + +- Terraform uses a separate grammar name (`terraform`) mapped to the + same `hcl` parser via the `symbols` config file. This allows + Terraform-specific keyword lists and color choices without + affecting generic `.hcl` files. +- Top-level vs nested block distinction is achieved using the + `config_file > body > block > identifier` pattern, which matches + only blocks at the root level of the file. +- Variable reference prefixes are matched in `variable_expr` context + only, preventing false coloring of map keys and attribute values + that happen to share the same name (e.g. `count = 3` in a + variable type definition vs `count.index` in an expression). +- Resource type references (e.g. `azurerm_user_assigned_identity` in + `azurerm_user_assigned_identity.aks.id`) are left as DEFAULT. + While these are meaningful references, they share the same + `variable_expr` node type as map keys, making it impossible to + distinguish them structurally. + +Known shortcomings +------------------ + +- Legacy MC has no highlighting for `.tf` files at all, so there is + nothing to compare against. +- Resource type references in expressions (e.g. + `aws_instance.web[0].id`) are not colored. The first identifier + is a `variable_expr` which is the same node type used for map + keys, so coloring all `variable_expr` would cause false positives. +- Terraform built-in functions (`join`, `upper`, `merge`, `file`, + `templatefile`, etc.) are not colored. Adding them would require + a `#any-of?` predicate on `function_call` nodes, similar to the + gotmpl Sprig function approach. +- Nested block names like `required_providers` inside `terraform {}` + are DEFAULT. Tree-sitter queries cannot express "this block is + inside a specific parent block", so all nested blocks are treated + uniformly. From f9bfe97f90edd26a8c1484197b9cec9a65c53d27 Mon Sep 17 00:00:00 2001 From: Jiri Tyr Date: Thu, 2 Apr 2026 14:38:10 +0100 Subject: [PATCH 12/16] Reverting LICENSE symlink and removing block file fix Signed-off-by: Jiri Tyr --- INSTALL | 381 +--------------------------------------------- src/editor/edit.c | 9 -- 2 files changed, 1 insertion(+), 389 deletions(-) mode change 100644 => 120000 INSTALL diff --git a/INSTALL b/INSTALL deleted file mode 100644 index d43221a2ca..0000000000 --- a/INSTALL +++ /dev/null @@ -1,380 +0,0 @@ -Installation Instructions -************************* - -Basic Installation -================== - - The following shell commands: - - test -f configure || ./bootstrap - ./configure - make - make install - -should configure, build, and install this package. The first line, -which bootstraps, is intended for developers; when building from -distribution tarballs it does nothing and can be skipped. A package -might name the bootstrapping script differently; if the name is -‘autogen.sh’, for example, the first line should say ‘./autogen.sh’ -instead of ‘./bootstrap’. - - The following more-detailed instructions are generic; see the -‘README’ file for instructions specific to this package. Some packages -provide this ‘INSTALL’ file but do not implement all of the features -documented below. The lack of an optional feature in a given package is -not necessarily a bug. More recommendations for GNU packages can be -found in the GNU Coding Standards. - - Many packages have scripts meant for developers instead of ordinary -builders, as they may use developer tools that are less commonly -installed, or they may access the network, which has privacy -implications. These scripts attempt to bootstrap by building the -‘configure’ script and related files, possibly using developer tools or -the network. Because the output of bootstrapping is system-independent, -it is normally run by a package developer so that its output can be put -into the distribution tarball and ordinary builders and users need not -bootstrap. Some packages have commands like ‘./autopull.sh’ and -‘./autogen.sh’ that you can run instead of ‘./bootstrap’, for more -fine-grained control over bootstrapping. - - The ‘configure’ script attempts to guess correct values for various -system-dependent variables used during compilation. It uses those -values to create a ‘Makefile’ in each directory of the package. It may -also create one or more ‘.h’ files containing system-dependent -definitions. Finally, it creates a script ‘config.status’ that you can -run in the future to recreate the current configuration, and a file -‘config.log’ containing output useful for debugging ‘configure’. - - It can also use an optional file (typically called ‘config.cache’ and -enabled with ‘--cache-file=config.cache’ or simply ‘-C’) that saves the -results of its tests to speed up reconfiguring. Caching is disabled by -default to prevent problems with accidental use of stale cache files. - - If you need to do unusual things to compile the package, please try -to figure out how ‘configure’ could check whether to do them, and mail -diffs or instructions to the address given in the ‘README’ so they can -be considered for the next release. If you are using the cache, and at -some point ‘config.cache’ contains results you don’t want to keep, you -may remove or edit it. - - The ‘autoconf’ program generates ‘configure’ from the file -‘configure.ac’. Normally you should edit ‘configure.ac’ instead of -editing ‘configure’ directly. - - The simplest way to compile this package is: - - 1. ‘cd’ to the directory containing the package’s source code. - - 2. If this is a developer checkout and file ‘configure’ does not yet - exist, run the bootstrapping script (typically ‘./bootstrap’ or - ‘./autogen.sh’) to bootstrap and create the file. You may need - special developer tools and network access to bootstrap, and the - network access may have privacy implications. - - 3. Type ‘./configure’ to configure the package for your system. This - might take a while. While running, ‘configure’ prints messages - telling which features it is checking for. - - 4. Type ‘make’ to compile the package. - - 5. Optionally, type ‘make check’ to run any self-tests that come with - the package, generally using the just-built uninstalled binaries. - - 6. Type ‘make install’ to install the programs and any data files and - documentation. When installing into a prefix owned by root, it is - recommended that the package be configured and built as a regular - user, and only the ‘make install’ phase executed with root - privileges. - - 7. Optionally, type ‘make installcheck’ to repeat any self-tests, but - this time using the binaries in their final installed location. - This target does not install anything. Running this target as a - regular user, particularly if the prior ‘make install’ required - root privileges, verifies that the installation completed - correctly. - - 8. You can remove the program binaries and object files from the - source code directory by typing ‘make clean’. To also remove the - files that ‘configure’ created (so you can compile the package for - a different kind of computer), type ‘make distclean’. There is - also a ‘make maintainer-clean’ target, but that is intended mainly - for the package’s developers. If you use it, you may have to - bootstrap again. - - 9. If the package follows the GNU Coding Standards, you can type ‘make - uninstall’ to remove the installed files. - -Installation Prerequisites -========================== - - Installation requires a POSIX-like environment with a shell and at -least the following standard utilities: - - awk cat cp diff echo expr false ls mkdir mv printf pwd rm rmdir sed - sort test tr - -This package’s installation may need other standard utilities such as -‘grep’, ‘make’, ‘sleep’ and ‘touch’, along with compilers like ‘gcc’. - -Compilers and Options -===================== - - Some systems require unusual options for compilation or linking that -the ‘configure’ script does not know about. Run ‘./configure --help’ -for details on some of the pertinent environment variables. - - You can give ‘configure’ initial values for configuration parameters -by setting variables in the command line or in the environment. Here is -an example: - - ./configure CC=gcc CFLAGS=-g LIBS=-lposix - - See “Defining Variables” for more details. - -Compiling For Multiple Architectures -==================================== - - You can compile the package for more than one kind of computer at the -same time, by placing the object files for each system in their own -directory. To do this, you can use GNU ‘make’. ‘cd’ to the directory -where you want the object files and executables to go and run the -‘configure’ script. ‘configure’ automatically checks for the source -code in the directory that ‘configure’ is in and in ‘..’. This is known -as a “VPATH” build. - - With a non-GNU ‘make’, it is safer to compile the package for one -system at a time in the source code directory. After you have installed -the package for one system, use ‘make distclean’ before reconfiguring -for another system. - - Some platforms, notably macOS, support “fat” or “universal” binaries, -where a single binary can execute on different architectures. On these -platforms you can configure and compile just once, with options specific -to that platform. - -Installation Names -================== - - By default, ‘make install’ installs the package’s commands under -‘/usr/local/bin’, include files under ‘/usr/local/include’, etc. You -can specify an installation prefix other than ‘/usr/local’ by giving -‘configure’ the option ‘--prefix=PREFIX’, where PREFIX must be an -absolute file name. - - You can specify separate installation prefixes for -architecture-specific files and architecture-independent files. If you -pass the option ‘--exec-prefix=PREFIX’ to ‘configure’, the package uses -PREFIX as the prefix for installing programs and libraries. -Documentation and other data files still use the regular prefix. - - In addition, if you use an unusual directory layout you can give -options like ‘--bindir=DIR’ to specify different values for particular -kinds of files. Run ‘configure --help’ for a list of the directories -you can set and what kinds of files go in them. In general, the default -for these options is expressed in terms of ‘${prefix}’, so that -specifying just ‘--prefix’ will affect all of the other directory -specifications that were not explicitly provided. - - The most portable way to affect installation locations is to pass the -correct locations to ‘configure’; however, many packages provide one or -both of the following shortcuts of passing variable assignments to the -‘make install’ command line to change installation locations without -having to reconfigure or recompile. - - The first method involves providing an override variable for each -affected directory. For example, ‘make install -prefix=/alternate/directory’ will choose an alternate location for all -directory configuration variables that were expressed in terms of -‘${prefix}’. Any directories that were specified during ‘configure’, -but not in terms of ‘${prefix}’, must each be overridden at install time -for the entire installation to be relocated. The approach of makefile -variable overrides for each directory variable is required by the GNU -Coding Standards, and ideally causes no recompilation. However, some -platforms have known limitations with the semantics of shared libraries -that end up requiring recompilation when using this method, particularly -noticeable in packages that use GNU Libtool. - - The second method involves providing the ‘DESTDIR’ variable. For -example, ‘make install DESTDIR=/alternate/directory’ will prepend -‘/alternate/directory’ before all installation names. The approach of -‘DESTDIR’ overrides is not required by the GNU Coding Standards, and -does not work on platforms that have drive letters. On the other hand, -it does better at avoiding recompilation issues, and works well even -when some directory options were not specified in terms of ‘${prefix}’ -at ‘configure’ time. - -Optional Features -================= - - If the package supports it, you can cause programs to be installed -with an extra prefix or suffix on their names by giving ‘configure’ the -option ‘--program-prefix=PREFIX’ or ‘--program-suffix=SUFFIX’. - - Some packages pay attention to ‘--enable-FEATURE’ and -‘--disable-FEATURE’ options to ‘configure’, where FEATURE indicates an -optional part of the package. They may also pay attention to -‘--with-PACKAGE’ and ‘--without-PACKAGE’ options, where PACKAGE is -something like ‘gnu-ld’. ‘./configure --help’ should mention the -‘--enable-...’ and ‘--with-...’ options that the package recognizes. - - Some packages offer the ability to configure how verbose the -execution of ‘make’ will be. For these packages, running ‘./configure ---enable-silent-rules’ sets the default to minimal output, which can be -overridden with ‘make V=1’; while running ‘./configure ---disable-silent-rules’ sets the default to verbose, which can be -overridden with ‘make V=0’. - -Specifying a System Type -======================== - - By default ‘configure’ builds for the current system. To create -binaries that can run on a different system type, specify a -‘--host=TYPE’ option along with compiler variables that specify how to -generate object code for TYPE. For example, to create binaries intended -to run on a 64-bit ARM processor: - - ./configure --host=aarch64-linux-gnu \ - CC=aarch64-linux-gnu-gcc \ - CXX=aarch64-linux-gnu-g++ - -If done on a machine that can execute these binaries (e.g., via -‘qemu-aarch64’, ‘$QEMU_LD_PREFIX’, and Linux’s ‘binfmt_misc’ -capability), the build behaves like a native build. Otherwise it is a -cross-build: ‘configure’ will make cross-compilation guesses instead of -running test programs, and ‘make check’ will not work. - - A system type can either be a short name like ‘mingw64’, or a -canonical name like ‘x86_64-pc-linux-gnu’. Canonical names have the -form CPU-COMPANY-SYSTEM where SYSTEM is either OS or KERNEL-OS. To -canonicalize and validate a system type, you can run the command -‘config.sub’, which is often squirreled away in a subdirectory like -‘build-aux’. For example: - - $ build-aux/config.sub arm64-linux - aarch64-unknown-linux-gnu - $ build-aux/config.sub riscv-lnx - Invalid configuration 'riscv-lnx': OS 'lnx' not recognized - -You can look at the ‘config.sub’ file to see which types are recognized. -If the file is absent, this package does not need the system type. - - If ‘configure’ fails with the diagnostic “cannot guess build type”. -‘config.sub’ did not recognize your system’s type. In this case, first -fetch the newest versions of these files from the GNU config package -(https://savannah.gnu.org/projects/config). If that fixes things, -please report it to the maintainers of the package containing -‘configure’. Otherwise, you can try the configure option ‘--build=TYPE’ -where TYPE comes close to your system type; also, please report the -problem to . - - For more details about configuring system types, see the Autoconf -documentation. - -Sharing Defaults -================ - - If you want to set default values for ‘configure’ scripts to share, -you can create a site shell script called ‘config.site’ that gives -default values for variables like ‘CC’, ‘cache_file’, and ‘prefix’. -‘configure’ looks for ‘PREFIX/share/config.site’ if it exists, then -‘PREFIX/etc/config.site’ if it exists. Or, you can set the -‘CONFIG_SITE’ environment variable to the location of the site script. -A warning: not all ‘configure’ scripts look for a site script. - -Defining Variables -================== - - Variables not defined in a site shell script can be set in the -environment passed to ‘configure’. However, some packages may run -configure again during the build, and the customized values of these -variables may be lost. In order to avoid this problem, you should set -them in the ‘configure’ command line, using ‘VAR=value’. For example: - - ./configure CC=/usr/local2/bin/gcc - -causes the specified ‘gcc’ to be used as the C compiler (unless it is -overridden in the site shell script). - -Unfortunately, this technique does not work for ‘CONFIG_SHELL’ due to an -Autoconf limitation. Until the limitation is lifted, you can use this -workaround: - - CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash - -‘configure’ Invocation -====================== - - ‘configure’ recognizes the following options to control how it -operates. - -‘--help’ -‘-h’ - Print a summary of all of the options to ‘configure’, and exit. - -‘--help=short’ -‘--help=recursive’ - Print a summary of the options unique to this package’s - ‘configure’, and exit. The ‘short’ variant lists options used only - in the top level, while the ‘recursive’ variant lists options also - present in any nested packages. - -‘--version’ -‘-V’ - Print the version of Autoconf used to generate the ‘configure’ - script, and exit. - -‘--cache-file=FILE’ - Enable the cache: use and save the results of the tests in FILE, - traditionally ‘config.cache’. FILE defaults to ‘/dev/null’ to - disable caching. - -‘--config-cache’ -‘-C’ - Alias for ‘--cache-file=config.cache’. - -‘--srcdir=DIR’ - Look for the package’s source code in directory DIR. Usually - ‘configure’ can determine that directory automatically. - -‘--prefix=DIR’ - Use DIR as the installation prefix. See “Installation Names” for - more details, including other options available for fine-tuning the - installation locations. - -‘--host=TYPE’ - Build binaries for system TYPE. See “Specifying a System Type”. - -‘--enable-FEATURE’ -‘--disable-FEATURE’ - Enable or disable the optional FEATURE. See “Optional Features”. - -‘--with-PACKAGE’ -‘--without-PACKAGE’ - Use or omit PACKAGE when building. See “Optional Features”. - -‘--quiet’ -‘--silent’ -‘-q’ - Do not print messages saying which checks are being made. To - suppress all normal output, redirect it to ‘/dev/null’ (any error - messages will still be shown). - -‘--no-create’ -‘-n’ - Run the configure checks, but stop before creating any output - files. - -‘configure’ also recognizes several environment variables, and accepts -some other, less widely useful, options. Run ‘configure --help’ for -more details. - -Copyright notice -================ - - Copyright © 1994–1996, 1999–2002, 2004–2017, 2020–2025 Free Software -Foundation, Inc. - - Copying and distribution of this file, with or without modification, -are permitted in any medium without royalty provided the copyright -notice and this notice are preserved. This file is offered as-is, -without warranty of any kind. diff --git a/INSTALL b/INSTALL new file mode 120000 index 0000000000..1f38889c39 --- /dev/null +++ b/INSTALL @@ -0,0 +1 @@ +doc/INSTALL \ No newline at end of file diff --git a/src/editor/edit.c b/src/editor/edit.c index 192f5957f9..90963f2548 100644 --- a/src/editor/edit.c +++ b/src/editor/edit.c @@ -1784,15 +1784,6 @@ edit_user_menu (WEdit *edit, const char *menu_file, int selected_entry) block_file = mc_config_get_full_path (EDIT_HOME_BLOCK_FILE); block_file_vpath = vfs_path_from_str (block_file); - /* Save the selected block to the block file before running the command. - This makes %b available to macro scripts that process the selection. */ - { - off_t start_mark, end_mark; - - if (eval_marks (edit, &start_mark, &end_mark)) - edit_save_block (edit, block_file, start_mark, end_mark); - } - const gboolean status_before_ok = mc_stat (block_file_vpath, &status_before) == 0; // run menu command. It can or can not create or modify block_file From 08c349f821d2e9d57908648a80c3ff06f10d060b Mon Sep 17 00:00:00 2001 From: Jiri Tyr Date: Sat, 11 Apr 2026 18:11:38 +0100 Subject: [PATCH 13/16] Use staticly built objects from grammars repo Signed-off-by: Jiri Tyr --- INSTALL | 381 +++++++++++++++++- Makefile.am | 3 +- doc/TREE-SITTER | 310 +++++++------- m4.include/mc-with-tree-sitter.m4 | 82 ++-- misc/syntax-ts/Makefile.am | 59 +-- misc/syntax-ts/queries/.gitignore | 4 - .../ada-highlights.scm | 0 .../asm-highlights.scm | 0 .../awk-highlights.scm | 0 .../bash-highlights.scm | 0 .../bison-highlights.scm | 0 .../c-highlights.scm | 0 .../c_sharp-highlights.scm | 0 .../caddy-highlights.scm | 0 .../cmake-highlights.scm | 0 .../cobol-highlights.scm | 0 .../commonlisp-highlights.scm | 0 .../cpp-highlights.scm | 0 .../css-highlights.scm | 0 .../cuda-highlights.scm | 0 .../d-highlights.scm | 0 .../diff-highlights.scm | 0 .../dockerfile-highlights.scm | 0 .../dot-highlights.scm | 0 .../erlang-highlights.scm | 0 .../fortran-highlights.scm | 0 .../glsl-highlights.scm | 0 .../go-highlights.scm | 0 .../gotmpl-highlights.scm | 0 .../haskell-highlights.scm | 0 .../hcl-highlights.scm | 0 .../html-highlights.scm | 0 .../idl-highlights.scm | 0 .../ini-highlights.scm | 0 .../java-highlights.scm | 0 .../javascript-highlights.scm | 0 .../json-highlights.scm | 0 .../kotlin-highlights.scm | 0 .../latex-highlights.scm | 0 .../lua-highlights.scm | 0 .../mail-highlights.scm | 0 .../make-highlights.scm | 0 .../markdown-highlights.scm | 0 .../markdown-injections.scm | 0 .../markdown_inline-highlights.scm | 0 .../matlab-highlights.scm | 0 .../meson-highlights.scm | 0 .../muttrc-highlights.scm | 0 .../ocaml-highlights.scm | 0 .../pascal-highlights.scm | 0 .../perl-highlights.scm | 0 .../php-highlights.scm | 0 .../po-highlights.scm | 0 .../properties-highlights.scm | 0 .../proto-highlights.scm | 0 .../puppet-highlights.scm | 0 .../python-highlights.scm | 0 .../qmljs-highlights.scm | 0 .../r-highlights.scm | 0 .../ruby-highlights.scm | 0 .../rust-highlights.scm | 0 .../scala-highlights.scm | 0 .../slang-highlights.scm | 0 .../smalltalk-highlights.scm | 0 .../sql-highlights.scm | 0 .../strace-highlights.scm | 0 .../swift-highlights.scm | 0 .../tcl-highlights.scm | 0 .../terraform-highlights.scm | 0 .../toml-highlights.scm | 0 .../turtle-highlights.scm | 0 .../typescript-highlights.scm | 0 .../verilog-highlights.scm | 0 .../vhdl-highlights.scm | 0 .../xml-highlights.scm | 0 .../yaml-highlights.scm | 0 scripts/ts-grammars-download.sh | 85 ++-- src/Makefile.am | 4 + src/editor/Makefile.am | 2 +- src/editor/syntax_ts.c | 237 +++++------ src/editor/ts-grammars/.gitignore | 5 +- src/editor/ts-grammars/Makefile.am | 139 ++----- src/editor/ts-grammars/lisp | 1 - src/editor/ts-grammars/tree_sitter/alloc.h | 41 -- src/editor/ts-grammars/tree_sitter/array.h | 291 ------------- src/editor/ts-grammars/tree_sitter/parser.h | 310 -------------- .../ts-grammars/tree_sitter/ts_assert.h | 11 - 87 files changed, 772 insertions(+), 1193 deletions(-) mode change 120000 => 100644 INSTALL delete mode 100644 misc/syntax-ts/queries/.gitignore rename misc/syntax-ts/{queries-override => queries}/ada-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/asm-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/awk-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/bash-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/bison-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/c-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/c_sharp-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/caddy-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/cmake-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/cobol-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/commonlisp-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/cpp-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/css-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/cuda-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/d-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/diff-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/dockerfile-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/dot-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/erlang-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/fortran-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/glsl-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/go-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/gotmpl-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/haskell-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/hcl-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/html-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/idl-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/ini-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/java-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/javascript-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/json-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/kotlin-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/latex-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/lua-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/mail-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/make-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/markdown-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/markdown-injections.scm (100%) rename misc/syntax-ts/{queries-override => queries}/markdown_inline-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/matlab-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/meson-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/muttrc-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/ocaml-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/pascal-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/perl-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/php-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/po-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/properties-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/proto-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/puppet-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/python-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/qmljs-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/r-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/ruby-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/rust-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/scala-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/slang-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/smalltalk-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/sql-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/strace-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/swift-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/tcl-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/terraform-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/toml-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/turtle-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/typescript-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/verilog-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/vhdl-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/xml-highlights.scm (100%) rename misc/syntax-ts/{queries-override => queries}/yaml-highlights.scm (100%) delete mode 120000 src/editor/ts-grammars/lisp delete mode 100644 src/editor/ts-grammars/tree_sitter/alloc.h delete mode 100644 src/editor/ts-grammars/tree_sitter/array.h delete mode 100644 src/editor/ts-grammars/tree_sitter/parser.h delete mode 100644 src/editor/ts-grammars/tree_sitter/ts_assert.h diff --git a/INSTALL b/INSTALL deleted file mode 120000 index 1f38889c39..0000000000 --- a/INSTALL +++ /dev/null @@ -1 +0,0 @@ -doc/INSTALL \ No newline at end of file diff --git a/INSTALL b/INSTALL new file mode 100644 index 0000000000..d43221a2ca --- /dev/null +++ b/INSTALL @@ -0,0 +1,380 @@ +Installation Instructions +************************* + +Basic Installation +================== + + The following shell commands: + + test -f configure || ./bootstrap + ./configure + make + make install + +should configure, build, and install this package. The first line, +which bootstraps, is intended for developers; when building from +distribution tarballs it does nothing and can be skipped. A package +might name the bootstrapping script differently; if the name is +‘autogen.sh’, for example, the first line should say ‘./autogen.sh’ +instead of ‘./bootstrap’. + + The following more-detailed instructions are generic; see the +‘README’ file for instructions specific to this package. Some packages +provide this ‘INSTALL’ file but do not implement all of the features +documented below. The lack of an optional feature in a given package is +not necessarily a bug. More recommendations for GNU packages can be +found in the GNU Coding Standards. + + Many packages have scripts meant for developers instead of ordinary +builders, as they may use developer tools that are less commonly +installed, or they may access the network, which has privacy +implications. These scripts attempt to bootstrap by building the +‘configure’ script and related files, possibly using developer tools or +the network. Because the output of bootstrapping is system-independent, +it is normally run by a package developer so that its output can be put +into the distribution tarball and ordinary builders and users need not +bootstrap. Some packages have commands like ‘./autopull.sh’ and +‘./autogen.sh’ that you can run instead of ‘./bootstrap’, for more +fine-grained control over bootstrapping. + + The ‘configure’ script attempts to guess correct values for various +system-dependent variables used during compilation. It uses those +values to create a ‘Makefile’ in each directory of the package. It may +also create one or more ‘.h’ files containing system-dependent +definitions. Finally, it creates a script ‘config.status’ that you can +run in the future to recreate the current configuration, and a file +‘config.log’ containing output useful for debugging ‘configure’. + + It can also use an optional file (typically called ‘config.cache’ and +enabled with ‘--cache-file=config.cache’ or simply ‘-C’) that saves the +results of its tests to speed up reconfiguring. Caching is disabled by +default to prevent problems with accidental use of stale cache files. + + If you need to do unusual things to compile the package, please try +to figure out how ‘configure’ could check whether to do them, and mail +diffs or instructions to the address given in the ‘README’ so they can +be considered for the next release. If you are using the cache, and at +some point ‘config.cache’ contains results you don’t want to keep, you +may remove or edit it. + + The ‘autoconf’ program generates ‘configure’ from the file +‘configure.ac’. Normally you should edit ‘configure.ac’ instead of +editing ‘configure’ directly. + + The simplest way to compile this package is: + + 1. ‘cd’ to the directory containing the package’s source code. + + 2. If this is a developer checkout and file ‘configure’ does not yet + exist, run the bootstrapping script (typically ‘./bootstrap’ or + ‘./autogen.sh’) to bootstrap and create the file. You may need + special developer tools and network access to bootstrap, and the + network access may have privacy implications. + + 3. Type ‘./configure’ to configure the package for your system. This + might take a while. While running, ‘configure’ prints messages + telling which features it is checking for. + + 4. Type ‘make’ to compile the package. + + 5. Optionally, type ‘make check’ to run any self-tests that come with + the package, generally using the just-built uninstalled binaries. + + 6. Type ‘make install’ to install the programs and any data files and + documentation. When installing into a prefix owned by root, it is + recommended that the package be configured and built as a regular + user, and only the ‘make install’ phase executed with root + privileges. + + 7. Optionally, type ‘make installcheck’ to repeat any self-tests, but + this time using the binaries in their final installed location. + This target does not install anything. Running this target as a + regular user, particularly if the prior ‘make install’ required + root privileges, verifies that the installation completed + correctly. + + 8. You can remove the program binaries and object files from the + source code directory by typing ‘make clean’. To also remove the + files that ‘configure’ created (so you can compile the package for + a different kind of computer), type ‘make distclean’. There is + also a ‘make maintainer-clean’ target, but that is intended mainly + for the package’s developers. If you use it, you may have to + bootstrap again. + + 9. If the package follows the GNU Coding Standards, you can type ‘make + uninstall’ to remove the installed files. + +Installation Prerequisites +========================== + + Installation requires a POSIX-like environment with a shell and at +least the following standard utilities: + + awk cat cp diff echo expr false ls mkdir mv printf pwd rm rmdir sed + sort test tr + +This package’s installation may need other standard utilities such as +‘grep’, ‘make’, ‘sleep’ and ‘touch’, along with compilers like ‘gcc’. + +Compilers and Options +===================== + + Some systems require unusual options for compilation or linking that +the ‘configure’ script does not know about. Run ‘./configure --help’ +for details on some of the pertinent environment variables. + + You can give ‘configure’ initial values for configuration parameters +by setting variables in the command line or in the environment. Here is +an example: + + ./configure CC=gcc CFLAGS=-g LIBS=-lposix + + See “Defining Variables” for more details. + +Compiling For Multiple Architectures +==================================== + + You can compile the package for more than one kind of computer at the +same time, by placing the object files for each system in their own +directory. To do this, you can use GNU ‘make’. ‘cd’ to the directory +where you want the object files and executables to go and run the +‘configure’ script. ‘configure’ automatically checks for the source +code in the directory that ‘configure’ is in and in ‘..’. This is known +as a “VPATH” build. + + With a non-GNU ‘make’, it is safer to compile the package for one +system at a time in the source code directory. After you have installed +the package for one system, use ‘make distclean’ before reconfiguring +for another system. + + Some platforms, notably macOS, support “fat” or “universal” binaries, +where a single binary can execute on different architectures. On these +platforms you can configure and compile just once, with options specific +to that platform. + +Installation Names +================== + + By default, ‘make install’ installs the package’s commands under +‘/usr/local/bin’, include files under ‘/usr/local/include’, etc. You +can specify an installation prefix other than ‘/usr/local’ by giving +‘configure’ the option ‘--prefix=PREFIX’, where PREFIX must be an +absolute file name. + + You can specify separate installation prefixes for +architecture-specific files and architecture-independent files. If you +pass the option ‘--exec-prefix=PREFIX’ to ‘configure’, the package uses +PREFIX as the prefix for installing programs and libraries. +Documentation and other data files still use the regular prefix. + + In addition, if you use an unusual directory layout you can give +options like ‘--bindir=DIR’ to specify different values for particular +kinds of files. Run ‘configure --help’ for a list of the directories +you can set and what kinds of files go in them. In general, the default +for these options is expressed in terms of ‘${prefix}’, so that +specifying just ‘--prefix’ will affect all of the other directory +specifications that were not explicitly provided. + + The most portable way to affect installation locations is to pass the +correct locations to ‘configure’; however, many packages provide one or +both of the following shortcuts of passing variable assignments to the +‘make install’ command line to change installation locations without +having to reconfigure or recompile. + + The first method involves providing an override variable for each +affected directory. For example, ‘make install +prefix=/alternate/directory’ will choose an alternate location for all +directory configuration variables that were expressed in terms of +‘${prefix}’. Any directories that were specified during ‘configure’, +but not in terms of ‘${prefix}’, must each be overridden at install time +for the entire installation to be relocated. The approach of makefile +variable overrides for each directory variable is required by the GNU +Coding Standards, and ideally causes no recompilation. However, some +platforms have known limitations with the semantics of shared libraries +that end up requiring recompilation when using this method, particularly +noticeable in packages that use GNU Libtool. + + The second method involves providing the ‘DESTDIR’ variable. For +example, ‘make install DESTDIR=/alternate/directory’ will prepend +‘/alternate/directory’ before all installation names. The approach of +‘DESTDIR’ overrides is not required by the GNU Coding Standards, and +does not work on platforms that have drive letters. On the other hand, +it does better at avoiding recompilation issues, and works well even +when some directory options were not specified in terms of ‘${prefix}’ +at ‘configure’ time. + +Optional Features +================= + + If the package supports it, you can cause programs to be installed +with an extra prefix or suffix on their names by giving ‘configure’ the +option ‘--program-prefix=PREFIX’ or ‘--program-suffix=SUFFIX’. + + Some packages pay attention to ‘--enable-FEATURE’ and +‘--disable-FEATURE’ options to ‘configure’, where FEATURE indicates an +optional part of the package. They may also pay attention to +‘--with-PACKAGE’ and ‘--without-PACKAGE’ options, where PACKAGE is +something like ‘gnu-ld’. ‘./configure --help’ should mention the +‘--enable-...’ and ‘--with-...’ options that the package recognizes. + + Some packages offer the ability to configure how verbose the +execution of ‘make’ will be. For these packages, running ‘./configure +--enable-silent-rules’ sets the default to minimal output, which can be +overridden with ‘make V=1’; while running ‘./configure +--disable-silent-rules’ sets the default to verbose, which can be +overridden with ‘make V=0’. + +Specifying a System Type +======================== + + By default ‘configure’ builds for the current system. To create +binaries that can run on a different system type, specify a +‘--host=TYPE’ option along with compiler variables that specify how to +generate object code for TYPE. For example, to create binaries intended +to run on a 64-bit ARM processor: + + ./configure --host=aarch64-linux-gnu \ + CC=aarch64-linux-gnu-gcc \ + CXX=aarch64-linux-gnu-g++ + +If done on a machine that can execute these binaries (e.g., via +‘qemu-aarch64’, ‘$QEMU_LD_PREFIX’, and Linux’s ‘binfmt_misc’ +capability), the build behaves like a native build. Otherwise it is a +cross-build: ‘configure’ will make cross-compilation guesses instead of +running test programs, and ‘make check’ will not work. + + A system type can either be a short name like ‘mingw64’, or a +canonical name like ‘x86_64-pc-linux-gnu’. Canonical names have the +form CPU-COMPANY-SYSTEM where SYSTEM is either OS or KERNEL-OS. To +canonicalize and validate a system type, you can run the command +‘config.sub’, which is often squirreled away in a subdirectory like +‘build-aux’. For example: + + $ build-aux/config.sub arm64-linux + aarch64-unknown-linux-gnu + $ build-aux/config.sub riscv-lnx + Invalid configuration 'riscv-lnx': OS 'lnx' not recognized + +You can look at the ‘config.sub’ file to see which types are recognized. +If the file is absent, this package does not need the system type. + + If ‘configure’ fails with the diagnostic “cannot guess build type”. +‘config.sub’ did not recognize your system’s type. In this case, first +fetch the newest versions of these files from the GNU config package +(https://savannah.gnu.org/projects/config). If that fixes things, +please report it to the maintainers of the package containing +‘configure’. Otherwise, you can try the configure option ‘--build=TYPE’ +where TYPE comes close to your system type; also, please report the +problem to . + + For more details about configuring system types, see the Autoconf +documentation. + +Sharing Defaults +================ + + If you want to set default values for ‘configure’ scripts to share, +you can create a site shell script called ‘config.site’ that gives +default values for variables like ‘CC’, ‘cache_file’, and ‘prefix’. +‘configure’ looks for ‘PREFIX/share/config.site’ if it exists, then +‘PREFIX/etc/config.site’ if it exists. Or, you can set the +‘CONFIG_SITE’ environment variable to the location of the site script. +A warning: not all ‘configure’ scripts look for a site script. + +Defining Variables +================== + + Variables not defined in a site shell script can be set in the +environment passed to ‘configure’. However, some packages may run +configure again during the build, and the customized values of these +variables may be lost. In order to avoid this problem, you should set +them in the ‘configure’ command line, using ‘VAR=value’. For example: + + ./configure CC=/usr/local2/bin/gcc + +causes the specified ‘gcc’ to be used as the C compiler (unless it is +overridden in the site shell script). + +Unfortunately, this technique does not work for ‘CONFIG_SHELL’ due to an +Autoconf limitation. Until the limitation is lifted, you can use this +workaround: + + CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash + +‘configure’ Invocation +====================== + + ‘configure’ recognizes the following options to control how it +operates. + +‘--help’ +‘-h’ + Print a summary of all of the options to ‘configure’, and exit. + +‘--help=short’ +‘--help=recursive’ + Print a summary of the options unique to this package’s + ‘configure’, and exit. The ‘short’ variant lists options used only + in the top level, while the ‘recursive’ variant lists options also + present in any nested packages. + +‘--version’ +‘-V’ + Print the version of Autoconf used to generate the ‘configure’ + script, and exit. + +‘--cache-file=FILE’ + Enable the cache: use and save the results of the tests in FILE, + traditionally ‘config.cache’. FILE defaults to ‘/dev/null’ to + disable caching. + +‘--config-cache’ +‘-C’ + Alias for ‘--cache-file=config.cache’. + +‘--srcdir=DIR’ + Look for the package’s source code in directory DIR. Usually + ‘configure’ can determine that directory automatically. + +‘--prefix=DIR’ + Use DIR as the installation prefix. See “Installation Names” for + more details, including other options available for fine-tuning the + installation locations. + +‘--host=TYPE’ + Build binaries for system TYPE. See “Specifying a System Type”. + +‘--enable-FEATURE’ +‘--disable-FEATURE’ + Enable or disable the optional FEATURE. See “Optional Features”. + +‘--with-PACKAGE’ +‘--without-PACKAGE’ + Use or omit PACKAGE when building. See “Optional Features”. + +‘--quiet’ +‘--silent’ +‘-q’ + Do not print messages saying which checks are being made. To + suppress all normal output, redirect it to ‘/dev/null’ (any error + messages will still be shown). + +‘--no-create’ +‘-n’ + Run the configure checks, but stop before creating any output + files. + +‘configure’ also recognizes several environment variables, and accepts +some other, less widely useful, options. Run ‘configure --help’ for +more details. + +Copyright notice +================ + + Copyright © 1994–1996, 1999–2002, 2004–2017, 2020–2025 Free Software +Foundation, Inc. + + Copying and distribution of this file, with or without modification, +are permitted in any medium without royalty provided the copyright +notice and this notice are preserved. This file is offered as-is, +without warranty of any kind. diff --git a/Makefile.am b/Makefile.am index 378cbf0c00..0a1ee80009 100644 --- a/Makefile.am +++ b/Makefile.am @@ -11,7 +11,8 @@ EXTRA_DIST = dist_noinst_SCRIPTS = \ build-glib2.sh \ - version.sh + version.sh \ + scripts/ts-grammars-download.sh dist_noinst_HEADERS = $(top_srcdir)/mc-version.h diff --git a/doc/TREE-SITTER b/doc/TREE-SITTER index 249e4394b5..00afa00f73 100644 --- a/doc/TREE-SITTER +++ b/doc/TREE-SITTER @@ -6,7 +6,7 @@ Contents * Introduction * Building with tree-sitter support -* Downloading grammar sources +* Downloading grammars * How it works * Language injection * Wrapper grammars @@ -41,9 +41,11 @@ regex-based highlighting from *.syntax files. The two highlighting systems share the same rendering path: both produce a color pair integer per byte, which is consumed by editdraw.c. -154 grammars are supported (153 languages + markdown_inline for -injection), with language injection configured via standard -injections.scm query files. +69 grammars have MC-curated query files (68 languages + +markdown_inline for injection), with language injection configured +via standard injections.scm query files. 358 grammar libraries are +available for download from the grammars repository; distros or users +can add query files for additional languages. Building with tree-sitter support @@ -54,46 +56,38 @@ Requirements: - libtree-sitter >= 0.22 (development headers and shared library) - https://github.com/tree-sitter/tree-sitter - gmodule-2.0 (for shared mode, part of GLib) -- A C++ compiler (needed for the sql grammar which has a C++ scanner) -- curl (needed to download grammar sources, unless using a local copy) There are two build modes for grammars: shared (default) and static. -Shared mode (default) -- each grammar is built as a separate .so module -and loaded at runtime via g_module_open(). The mc binary stays small -(~5 MB) regardless of how many grammars are available. Distros can -package each grammar separately: +Shared mode (default) -- each grammar is a pre-built .so module loaded +at runtime via g_module_open(). The mc binary stays small (~5 MB) +regardless of how many grammars are available. Distros can package +each grammar separately: ./configure --with-tree-sitter make -j$(nproc) -Static mode -- all selected grammars are linked directly into the mc -binary. No runtime module loading, but the binary can grow large: +Static mode -- selected grammars are linked into the mc binary from +pre-built .a static libraries. No runtime module loading, but the +binary grows with each grammar included: ./configure --with-tree-sitter --with-tree-sitter-static make -j$(nproc) -To build with only specific grammars (works in both modes): +To build with only specific grammars (static mode only): - ./configure --with-tree-sitter --with-tree-sitter-grammars=c,python,bash - make -j$(nproc) - -Grammar sources are automatically downloaded during configure if they -are not already present. For offline builds, set TREE_SITTER_GRAMMARS_DIR -to a directory containing pre-downloaded grammar sources: - - TREE_SITTER_GRAMMARS_DIR=/path/to/grammars ./configure --with-tree-sitter + ./configure --with-tree-sitter --with-tree-sitter-static \ + --with-tree-sitter-grammars=c,python,bash -The --with-tree-sitter-grammars option accepts a comma-separated list -of grammar names. The default is 'all' (154 grammars). Invalid names -cause configure to abort with an error listing all valid grammars. +In static mode, the default is 'all', which auto-discovers all .a +files present in src/editor/ts-grammars/. Grammar .a files must be +downloaded before building (see "Downloading grammars" below). Binary size comparison: - Without tree-sitter: 5.0 MB - Shared mode (any grammar count): 5.1 MB (grammars in separate .so) -- Static mode, 3 grammars: 7.7 MB +- Static mode, 3 grammars: 7.4 MB - Static mode, 10 grammars: 10 MB -- Static mode, all 63 grammars: 109 MB (for build validation only) To build with the legacy highlighting only (default): @@ -106,41 +100,47 @@ used, no tree-sitter headers or libraries are required and the tree-sitter code is excluded via #ifdef HAVE_TREE_SITTER guards. -Downloading grammar sources ----------------------------- +Downloading grammars +-------------------- -Grammar source files (parser.c, scanner.c, etc.) are not stored in the -git repository. Configure automatically downloads missing grammar -sources when --with-tree-sitter is used. Only grammars selected at -configure time are downloaded (all by default, or the subset specified -via --with-tree-sitter-grammars). +Grammar libraries are not stored in the MC git repository. They must +be downloaded before building using the download script. -For offline builds, set TREE_SITTER_GRAMMARS_DIR before running -configure: +Pre-built grammars are hosted in the tree-sitter-grammars repository +(https://github.com/jtyr/tree-sitter-grammars) and published as +versioned release tarballs for multiple platforms. - TREE_SITTER_GRAMMARS_DIR=/path/to/grammars ./configure --with-tree-sitter +For shared mode (pre-built .so modules): -To manually download grammar sources: + scripts/ts-grammars-download.sh --shared - scripts/ts-grammars-download.sh --source +This downloads platform-specific .so files into ts-grammars-shared/. +Only grammars that have a corresponding MC-curated query file in +misc/syntax-ts/queries/ will be functional. The rest are available +for distros or users who provide their own query files. -To download pre-built shared modules instead: +For static mode (pre-built .a static libraries): - scripts/ts-grammars-download.sh --shared + scripts/ts-grammars-download.sh --static -The --source mode downloads parser.c/scanner.c files into -src/editor/ts-grammars// for compilation from source. The ---shared mode downloads pre-built .so modules for use in shared mode. +This downloads platform-specific .a files into +src/editor/ts-grammars//.a. All grammars are downloaded; +only those with matching query files will have active highlighting. -The directory must contain subdirectories named after each grammar (e.g. -c/, python/, bash/) with at least parser.c in each. +Both modes can be combined: -The download script reads grammar metadata from an external grammars -repository and pins all grammar versions for reproducible builds. + scripts/ts-grammars-download.sh --shared --static -Grammar source files MUST NOT be modified after download. All -compatibility is handled via the headers in tree_sitter/ and build -flags in Makefile.in. +Additional options: + + --latest Use latest release instead of pinned version + --platform=PLATFORM Override platform auto-detection + (x86_64-linux, aarch64-linux, aarch64-macos, + x86_64-macos, x86_64-windows) + +The download script pins grammar versions for reproducible builds. +The pinned version is set in the TS_GRAMMARS_VERSION variable at the +top of the script. How it works @@ -158,7 +158,7 @@ get a color pair. When tree-sitter is active the following happens: * Shared mode: g_module_open() loads the grammar's .so module on demand from the grammar module directory. * Static mode: a compile-time registry maps names to functions. - - A TSParser is created with a 3-second parse timeout as safety net. + - A TSParser is created for the grammar. - The corresponding highlight query file (-highlights.scm) is loaded and compiled with ts_query_new(). - If an injections.scm query file exists for this grammar, injection @@ -427,30 +427,24 @@ Source files (src/editor/): buffer modification functions: edit_insert(), edit_insert_ahead(), edit_delete(), edit_backspace(). -Grammar sources (src/editor/ts-grammars/): +Grammar libraries (src/editor/ts-grammars/): - Grammar source files are downloaded at build time and are NOT stored - in the git repository. See "Downloading grammar sources" above. + Pre-built grammar libraries are downloaded at build time and are NOT + stored in the git repository. See "Downloading grammars" above. - /parser.c Generated parser for each language. - /scanner.c External scanner (optional, many grammars have one). - sql/scanner.cc The sql grammar uses a C++ scanner. + /.a Pre-built static library for each grammar + (static mode only, downloaded via --static). The following files ARE stored in the repository: - tree_sitter/ Compatibility headers (parser.h, alloc.h, array.h, - ts_assert.h) from tree-sitter v0.25.4. These are - needed because system libtree-sitter only installs - api.h, not the internal headers that grammars - #include. ts-grammar-registry.h Static mode only. Lookup table mapping grammar names to tree_sitter_*() functions. Each grammar is guarded by #ifdef HAVE_GRAMMAR_. - Makefile.in Hand-written (not automake). Supports two build - modes: static (libtsgrammars.la) and shared - (per-grammar mc-ts-.la modules). - .gitignore Excludes downloaded grammar directories from git. + Makefile.am Merges per-grammar .a files into a single + libtsgrammars.a for linking into the mc binary. + .gitignore Excludes downloaded grammar directories and build + artifacts from git. Runtime data files (misc/syntax-ts/): @@ -465,21 +459,19 @@ Runtime data files (misc/syntax-ts/): supported host languages (see "Wrapper grammars"). colors.ini Per-grammar color mappings (INI format, sections named [grammar_name]). - queries-override/-highlights.scm - MC-specific highlight query overrides (take - precedence over upstream queries). queries/-highlights.scm - Upstream highlight query files, one per grammar. - queries-override/-injections.scm - MC-specific injection query overrides. + MC-specific highlight query files, one per + supported grammar. Tailored to MC's terminal + color scheme. queries/-injections.scm - Upstream injection query files. + Injection query files for grammars that support + language injection. Shared grammar modules (shared mode only): - mc-ts-.so One loadable module per grammar. Each exports a + .so One loadable module per grammar. Each exports a single tree_sitter_() function. Loaded on - demand by syntax.c via g_module_open(). + demand via g_module_open(). The loader checks two directories in order: 1. ~/.local/lib/mc/ts-grammars/ (user-local, checked first) @@ -490,22 +482,26 @@ Shared grammar modules (shared mode only): Build system files: - configure.ac --with-tree-sitter, --with-tree-sitter-grammars, - and --with-tree-sitter-static flags. Validates - grammar names, generates source/object lists, - shared module list, -DHAVE_GRAMMAR_* defines - (static only), GModule check (shared only). + m4.include/mc-with-tree-sitter.m4 + --with-tree-sitter, --with-tree-sitter-grammars, + and --with-tree-sitter-static flags. In static + mode, auto-discovers .a files and generates + -DHAVE_GRAMMAR_* defines. In shared mode, + checks for GModule. src/editor/Makefile.am - Conditional SUBDIRS = ts-grammars. In static - mode links libtsgrammars.la; in shared mode links - only $(GMODULE_LIBS) and defines TS_GRAMMAR_LIBDIR. - src/Makefile.am $(TREE_SITTER_LIBS) added to mc_LDADD. + Conditional SUBDIRS = ts-grammars. In shared + mode links $(GMODULE_LIBS) and defines + TS_GRAMMAR_LIBDIR. + src/Makefile.am $(TREE_SITTER_LIBS) added to mc_LDADD. In + static mode, also links libtsgrammars.a. misc/syntax-ts/Makefile.am Installation rules for config files and queries/*.scm files. scripts/ts-grammars-download.sh - Script to download grammar sources (--source) - or pre-built shared modules (--shared). + Script to download pre-built grammar libraries: + --shared (.so) or --static (.a). Included in + the make dist tarball so users building from + source can download grammars. Test files: @@ -575,15 +571,14 @@ wrappers -- wrapper grammar definitions: colors.ini -- color mappings for capture names (INI format): - [default] + [python] + comment = brown; + function = brightcyan; keyword = yellow; string = green; - - [python] variable.builtin = brightred; - A [default] section provides global defaults. Per-grammar sections - override specific captures. The format is: + Each grammar has its own self-contained section. The format is: = ; Background can be omitted (inherits default). A foreground of "-" @@ -605,15 +600,12 @@ Highlight query files (queries/*.scm) Each grammar has a corresponding highlight query file named -highlights.scm. Query files are searched in this order: -1. User override: ~/.local/share/mc/syntax-ts/queries-override/ -2. System override: $(datadir)/mc/syntax-ts/queries-override/ -3. User upstream: ~/.local/share/mc/syntax-ts/queries/ -4. System upstream: $(datadir)/mc/syntax-ts/queries/ +1. User queries: ~/.local/share/mc/syntax-ts/queries/ +2. System queries: $(datadir)/mc/syntax-ts/queries/ -The first file found is used. MC-specific overrides in queries-override/ -take precedence over upstream queries in queries/. This allows tuning -capture names and patterns for MC's color scheme without modifying -upstream query files. +The first file found is used. All query files are MC-specific, +tailored to MC's terminal color scheme. Upstream query files from +grammar repositories are not used directly. Grammars that support language injection also have an injection query file named -injections.scm (same search order). @@ -647,10 +639,10 @@ Color mapping ------------- The colors config file (misc/syntax-ts/colors.ini) maps capture names -to MC foreground colors using INI format. A [default] section provides -global defaults; per-grammar sections (e.g. [python], [bash]) override -specific captures for that grammar. Colors are chosen to match MC's -default syntax highlighting appearance (blue background skin): +to MC foreground colors using INI format. Each grammar has its own +self-contained section (e.g. [python], [bash]) with explicit color +definitions. Colors are chosen to match MC's default syntax +highlighting appearance (blue background skin): Capture Foreground Color Purpose ------- ---------------- ------- @@ -727,12 +719,14 @@ To add tree-sitter highlighting for a new language: For shared mode, no registration is needed -- the grammar is discovered automatically from its .so file. -3. Add the grammar to the build. +3. Ensure the grammar exists in the external grammars repository. - Edit configure.ac (in the tree-sitter section): - - Add to all_ts_grammars. - - If there is a scanner.c, add to ts_scanner_c. - - If there is a scanner.cc, add to ts_scanner_cc. + The grammar must exist in the grammars.yaml registry in the + tree-sitter-grammars repository so that it gets built and included + in release tarballs. Most grammars are already registered. If + the grammar is new, add it to grammars.yaml in that repository. + The configure step auto-discovers available .a files, so no changes + to configure.ac are needed. 4. Create a highlight query file. @@ -788,39 +782,33 @@ To fully remove a grammar: 1. Remove its lines from extensions, filenames, shebangs, and display-names config files. 2. Remove its #ifdef-guarded entries from ts-grammar-registry.h. -3. Remove it from all_ts_grammars (and ts_scanner_c/ts_scanner_cc - if applicable) in configure.ac. -4. Delete misc/syntax-ts/queries/-highlights.scm (and +3. Delete misc/syntax-ts/queries/-highlights.scm (and -injections.scm if present). -5. Remove -highlights.scm from TS_QUERYFILES in +4. Remove -highlights.scm from TS_QUERYFILES in misc/syntax-ts/Makefile.am. Updating a grammar ------------------ -To update a grammar to a newer version: +Grammar updates are handled by the external tree-sitter-grammars +repository, which has CI automation to check for upstream updates +weekly, generate parser.c from grammar.js, build new releases, and +publish tarballs containing both shared (.so) and static (.a) +libraries. -1. Re-download grammar sources: +To update MC's grammars to a newer release: - scripts/ts-grammars-download.sh --source +1. Update the version in scripts/ts-grammars-download.sh + (TS_GRAMMARS_VERSION variable) or use --latest. - This will replace the grammar files in src/editor/ts-grammars//. +2. Re-download grammars: -2. Check for ABI compatibility. The grammars were generated by - different tree-sitter versions (ranging from pre-0.22 to 0.25.x). - The compatibility headers in src/editor/ts-grammars/tree_sitter/ - include patches for: - - TSFieldMapSlice typedef (old grammars use this, new use TSMapSlice) - - version/abi_version union in TSLanguage struct - - Variadic REDUCE macro (old grammars use 2 positional args, new use - 4 named args) + scripts/ts-grammars-download.sh --shared # for shared mode + scripts/ts-grammars-download.sh --static # for static mode - If the new grammar uses a newer ABI than 15 (TREE_SITTER_LANGUAGE_VERSION), - you may need to update the system libtree-sitter. - -3. Validate the query file against the new grammar. Node names may have - changed between versions. See "Validating query files" below. +3. Validate the query files against the new grammars. Node names may + have changed between versions. See "Validating query files" below. 4. Rebuild and test: @@ -835,13 +823,12 @@ A query file must only reference node names that exist in the grammar. If any node name is invalid, tree-sitter silently rejects the entire query and the language falls back to legacy highlighting. -To extract the valid node names from a grammar: - - grep -oP '"[^"]*"' src/editor/ts-grammars//parser.c \ - | sort -u - -Specifically, look at the ts_symbol_names[] array in parser.c for the -authoritative list. Also check ts_field_names[] for valid field names. +To extract the valid node names from a grammar, look at the +ts_symbol_names[] array in the grammar's parser.c source. This file +is generated from grammar.js during CI and is not stored in the +tree-sitter-grammars repository. To obtain it locally, run +tree-sitter generate in the grammar's directory. Also check +ts_field_names[] for valid field names. To extract node names used in a query file: @@ -892,38 +879,24 @@ Colors look wrong: A grammar causes high CPU usage or hangs: - A 3-second parse timeout (ts_parser_set_timeout_micros) is set for - all grammars as a safety net. If a grammar consistently hits the - timeout, consider disabling it by removing its entry from the - grammars config file. - -Build fails with "grammar sources not found": - - Grammar sources are normally downloaded automatically during - configure. If that failed, download manually: + Some grammars may be slow on very large files. If a grammar + consistently causes problems, consider disabling it by removing its + entry from the config files. The editor will fall back to legacy + highlighting. - scripts/ts-grammars-download.sh --source +Build fails with "grammar static libraries not found": - For offline builds, point to a directory with pre-downloaded sources: + Grammar .a files must be downloaded before building in static mode: - TREE_SITTER_GRAMMARS_DIR=/path/to/grammars ./configure --with-tree-sitter - -Build fails with type errors in grammar code: - - Grammars were generated by different tree-sitter versions. The - compatibility headers in src/editor/ts-grammars/tree_sitter/ handle - most version mismatches. If you encounter new errors after updating - a grammar, you may need to: - - Add new compatibility typedefs to tree_sitter/parser.h. - - Add warning suppression flags to AM_CFLAGS in - src/editor/ts-grammars/Makefile.in. + scripts/ts-grammars-download.sh --static Build fails with undefined reference to tree_sitter_: - In static mode, ensure the grammar is listed in all_ts_grammars in - configure.ac and has an entry in ts-grammar-registry.h. In shared - mode, ensure the .so module exports the expected symbol name (check - with: nm -D mc-ts-.so | grep tree_sitter). + In static mode, ensure the grammar has an entry in + ts-grammar-registry.h and that its .a file is present in + src/editor/ts-grammars//.a. In shared mode, ensure the + .so module exports the expected symbol name (check with: + nm -D .so | grep tree_sitter). Limitations @@ -933,10 +906,6 @@ Limitations entire query is rejected. There is no partial fallback within a single query file. -- Languages without a pre-built parser.c (i.e. those requiring - tree-sitter generate) are not supported. Only grammars with - committed parser.c files in their upstream repos are included. - - The color mapping is global. Per-language color overrides are achieved by using more specific capture names in query files (e.g. @keyword.directive for Make), not by configuring colors per language. @@ -949,3 +918,12 @@ Limitations in the source matching a registered grammar name exactly. Common aliases (e.g. "py" for "python", "js" for "javascript") are not resolved automatically. + +- During rapid typing (e.g. holding spacebar or backspace), the + screen may not refresh until the key is released. This is because + MC's renderer is single-threaded: the tree-sitter parse and query + work runs synchronously in the rendering path, competing with input + processing. To mitigate this, injection processing (the most + expensive step) is skipped during rapid edits and refreshed once + input settles. Use Ctrl+T to switch to legacy highlighting for + extended bulk editing if needed. diff --git a/m4.include/mc-with-tree-sitter.m4 b/m4.include/mc-with-tree-sitter.m4 index d309c6a9a0..f0cf814568 100644 --- a/m4.include/mc-with-tree-sitter.m4 +++ b/m4.include/mc-with-tree-sitter.m4 @@ -31,7 +31,7 @@ AC_DEFUN([mc_WITH_TREE_SITTER], [ if test x"$with_tree_sitter_static" = xyes; then AC_DEFINE([TREE_SITTER_STATIC], [1], [Define if tree-sitter grammars are linked statically]) - TREE_SITTER_BUILD_TARGET="libtsgrammars.la" + TREE_SITTER_BUILD_TARGET="libtsgrammars.a" TREE_SITTER_BUILD_MODE="static" else AC_DEFINE([TREE_SITTER_SHARED], [1], [Define if tree-sitter grammars are loaded as shared modules]) @@ -47,73 +47,47 @@ AC_DEFUN([mc_WITH_TREE_SITTER], [ AC_SUBST([TREE_SITTER_BUILD_MODE]) AC_SUBST([TREE_SITTER_GRAMMAR_LIBDIR]) - dnl Known grammar directory names (must match download-grammars.sh) - all_ts_grammars="ada asm awk bash bison c caddy cmake cobol cpp c_sharp css cuda d diff dockerfile dot erlang fortran glsl go haskell hcl html idl ini java javascript json kotlin lisp lua make markdown markdown_inline matlab meson muttrc ocaml pascal perl php po properties proto python qmljs r ruby rust scala smalltalk sql strace swift tcl toml turtle typescript verilog vhdl xml yaml" - - dnl Grammars that have scanner.c - ts_scanner_c="awk bash bison caddy cmake cobol cpp c_sharp css cuda d dockerfile erlang fortran haskell hcl html javascript kotlin lua markdown markdown_inline ocaml perl php properties python qmljs r ruby rust scala swift tcl toml typescript xml yaml" - - dnl Grammars that have scanner.cc (C++ scanner) - ts_scanner_cc="sql" - - dnl Parse and validate selected grammars - if test x"$with_tree_sitter_grammars" = xall; then - tree_sitter_grammars="$all_ts_grammars" - else - tree_sitter_grammars=`echo "$with_tree_sitter_grammars" | tr ',' ' '` - for g in $tree_sitter_grammars; do - case " $all_ts_grammars " in - *" $g "*) ;; - *) AC_MSG_ERROR([unknown tree-sitter grammar: $g -Valid grammars: $all_ts_grammars]) ;; - esac - done - fi - - TREE_SITTER_GRAMMAR_SOURCES="" - TREE_SITTER_GRAMMAR_OBJECTS="" TREE_SITTER_GRAMMAR_DEFS="" - TREE_SITTER_SHARED_LIBS="" + TREE_SITTER_GRAMMAR_ARCHIVES="" TREE_SITTER_GRAMMARS="" TREE_SITTER_LIBS="-ltree-sitter" TREE_SITTER_CFLAGS="" if test x"$with_tree_sitter_static" = xyes; then - dnl Static mode: build source/object file lists and -D flags - ts_need_cxx=no + dnl Static mode: discover available .a files and build grammar list + ts_grammar_dir="$srcdir/src/editor/ts-grammars" + + if test x"$with_tree_sitter_grammars" = xall; then + dnl Auto-discover all available .a files + tree_sitter_grammars="" + for a in "$ts_grammar_dir"/*/*.a; do + test -f "$a" || continue + g=`basename "\`dirname \"$a\"\`"` + tree_sitter_grammars="$tree_sitter_grammars $g" + done + tree_sitter_grammars=`echo $tree_sitter_grammars` + else + tree_sitter_grammars=`echo "$with_tree_sitter_grammars" | tr ',' ' '` + fi + for g in $tree_sitter_grammars; do - TREE_SITTER_GRAMMAR_SOURCES="$TREE_SITTER_GRAMMAR_SOURCES $g/parser.c" - TREE_SITTER_GRAMMAR_OBJECTS="$TREE_SITTER_GRAMMAR_OBJECTS $g/parser.lo" - case " $ts_scanner_c " in - *" $g "*) - TREE_SITTER_GRAMMAR_SOURCES="$TREE_SITTER_GRAMMAR_SOURCES $g/scanner.c" - TREE_SITTER_GRAMMAR_OBJECTS="$TREE_SITTER_GRAMMAR_OBJECTS $g/scanner.lo" ;; - esac - case " $ts_scanner_cc " in - *" $g "*) - TREE_SITTER_GRAMMAR_SOURCES="$TREE_SITTER_GRAMMAR_SOURCES $g/scanner.cc" - TREE_SITTER_GRAMMAR_OBJECTS="$TREE_SITTER_GRAMMAR_OBJECTS $g/scanner.lo" - ts_need_cxx=yes ;; - esac + if test ! -f "$ts_grammar_dir/$g/$g.a"; then + AC_MSG_ERROR([tree-sitter grammar static library not found: $ts_grammar_dir/$g/$g.a]) + fi upper=`echo "$g" | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'` TREE_SITTER_GRAMMAR_DEFS="$TREE_SITTER_GRAMMAR_DEFS -DHAVE_GRAMMAR_${upper}=1" + TREE_SITTER_GRAMMAR_ARCHIVES="$TREE_SITTER_GRAMMAR_ARCHIVES $g/$g.a" TREE_SITTER_GRAMMARS="$TREE_SITTER_GRAMMARS $g" done dnl Remove leading spaces - TREE_SITTER_GRAMMAR_SOURCES=`echo $TREE_SITTER_GRAMMAR_SOURCES` - TREE_SITTER_GRAMMAR_OBJECTS=`echo $TREE_SITTER_GRAMMAR_OBJECTS` TREE_SITTER_GRAMMAR_DEFS=`echo $TREE_SITTER_GRAMMAR_DEFS` + TREE_SITTER_GRAMMAR_ARCHIVES=`echo $TREE_SITTER_GRAMMAR_ARCHIVES` TREE_SITTER_GRAMMARS=`echo $TREE_SITTER_GRAMMARS` - if test x"$ts_need_cxx" = xyes; then - TREE_SITTER_LIBS="-ltree-sitter -lstdc++" - fi fi dnl Shared mode: no grammar compilation. .so files are loaded at runtime. - AC_SUBST([TREE_SITTER_GRAMMAR_SOURCES]) - AC_SUBST([TREE_SITTER_GRAMMAR_OBJECTS]) AC_SUBST([TREE_SITTER_GRAMMAR_DEFS]) - AC_SUBST([TREE_SITTER_SHARED_LIBS]) + AC_SUBST([TREE_SITTER_GRAMMAR_ARCHIVES]) AC_SUBST([TREE_SITTER_GRAMMARS]) AC_SUBST([TREE_SITTER_LIBS]) AC_SUBST([TREE_SITTER_CFLAGS]) @@ -132,16 +106,16 @@ Valid grammars: $all_ts_grammars]) ;; dnl Check that grammar files exist ts_grammars_found=no if test x"$with_tree_sitter_static" = xyes; then - dnl Static: check for at least one grammar source dir + dnl Static: check for at least one grammar .a file for g in $tree_sitter_grammars; do - if test -f "$srcdir/src/editor/ts-grammars/$g/parser.c"; then + if test -f "$srcdir/src/editor/ts-grammars/$g/$g.a"; then ts_grammars_found=yes break fi done if test x"$ts_grammars_found" = xno; then - AC_MSG_ERROR([tree-sitter grammar sources not found. Run: - ./scripts/ts-grammars-download.sh --source]) + AC_MSG_ERROR([tree-sitter grammar static libraries not found. Run: + ./scripts/ts-grammars-download.sh --static]) fi else dnl Shared mode: grammars loaded at runtime, no compile-time check needed. diff --git a/misc/syntax-ts/Makefile.am b/misc/syntax-ts/Makefile.am index 4c984c350a..068f88863f 100644 --- a/misc/syntax-ts/Makefile.am +++ b/misc/syntax-ts/Makefile.am @@ -4,6 +4,7 @@ CONFIGFILES = \ shebangs \ display-names \ symbols \ + wrappers \ colors.ini if USE_INTERNAL_EDIT @@ -11,74 +12,18 @@ syntaxtsdir = $(pkgdatadir)/syntax-ts dist_syntaxts_DATA = $(CONFIGFILES) queriesdir = $(pkgdatadir)/syntax-ts/queries -overridedir = $(pkgdatadir)/syntax-ts/queries-override install-data-local: $(MKDIR_P) $(DESTDIR)$(queriesdir) - $(MKDIR_P) $(DESTDIR)$(overridedir) for f in $(srcdir)/queries/*-highlights.scm $(srcdir)/queries/*-injections.scm; do \ test -f "$$f" && $(INSTALL_DATA) "$$f" $(DESTDIR)$(queriesdir)/; \ done; true - for f in $(srcdir)/queries-override/*-highlights.scm $(srcdir)/queries-override/*-injections.scm; do \ - test -f "$$f" && $(INSTALL_DATA) "$$f" $(DESTDIR)$(overridedir)/; \ - done; true uninstall-local: rm -f $(DESTDIR)$(queriesdir)/*-highlights.scm rm -f $(DESTDIR)$(queriesdir)/*-injections.scm - rm -f $(DESTDIR)$(overridedir)/*-highlights.scm - rm -f $(DESTDIR)$(overridedir)/*-injections.scm - -rmdir $(DESTDIR)$(overridedir) -rmdir $(DESTDIR)$(queriesdir) endif EXTRA_DIST = $(CONFIGFILES) \ - queries \ - queries-override - -# Build an MC-ready grammar tarball from the shared download staging directory. -# Requires: ./scripts/ts-grammars-download.sh --shared to have been run first. -# Output: mc-ts-grammars-.tar.gz in the top build directory. -# -# The tarball layout: -# mc-ts-grammars-/ -# lib/.so -- grammar shared libraries (bare names) -# queries/-highlights.scm -# queries/-injections.scm (where available) -# queries-override/ -- MC-specific query overrides -dist-shared: - @staging="$(top_srcdir)/ts-grammars-shared"; \ - if [ ! -d "$$staging" ]; then \ - echo "Error: shared grammars not found. Run first:"; \ - echo " ./scripts/ts-grammars-download.sh --shared"; \ - exit 1; \ - fi; \ - arch=$$(uname -m); \ - case "$$(uname -s)" in \ - Linux) platform="$${arch}-linux" ;; \ - Darwin) case "$$arch" in arm64) platform="aarch64-macos" ;; *) platform="$${arch}-macos" ;; esac ;; \ - MINGW*) platform="$${arch}-windows" ;; \ - *) platform="$${arch}-unknown" ;; \ - esac; \ - outdir="$(top_builddir)/mc-ts-grammars-$$platform"; \ - rm -rf "$$outdir"; \ - mkdir -p "$$outdir/lib" "$$outdir/queries" "$$outdir/queries-override"; \ - count=0; \ - for lang_dir in "$$staging"/*/; do \ - lang=$$(basename "$$lang_dir"); \ - for ext in so dylib dll; do \ - for lib in "$$lang_dir"/$$lang.$$ext; do \ - [ -f "$$lib" ] && cp "$$lib" "$$outdir/lib/"; \ - done; \ - done; \ - count=$$((count + 1)); \ - done; \ - for f in $(srcdir)/queries/*-highlights.scm $(srcdir)/queries/*-injections.scm; do \ - [ -f "$$f" ] && cp "$$f" "$$outdir/queries/"; \ - done; true; \ - for f in $(srcdir)/queries-override/*-highlights.scm $(srcdir)/queries-override/*-injections.scm; do \ - [ -f "$$f" ] && cp "$$f" "$$outdir/queries-override/"; \ - done; true; \ - (cd "$(top_builddir)" && tar czf "mc-ts-grammars-$$platform.tar.gz" "mc-ts-grammars-$$platform"); \ - rm -rf "$$outdir"; \ - echo "Created: $(top_builddir)/mc-ts-grammars-$$platform.tar.gz ($$count grammars)" + queries diff --git a/misc/syntax-ts/queries/.gitignore b/misc/syntax-ts/queries/.gitignore deleted file mode 100644 index 1fde35dcf3..0000000000 --- a/misc/syntax-ts/queries/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -# Query files are downloaded at build time, not stored in git. -# For static mode: ./scripts/ts-grammars-download.sh --source -# For shared mode: installed from the grammar tarball. -*.scm diff --git a/misc/syntax-ts/queries-override/ada-highlights.scm b/misc/syntax-ts/queries/ada-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/ada-highlights.scm rename to misc/syntax-ts/queries/ada-highlights.scm diff --git a/misc/syntax-ts/queries-override/asm-highlights.scm b/misc/syntax-ts/queries/asm-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/asm-highlights.scm rename to misc/syntax-ts/queries/asm-highlights.scm diff --git a/misc/syntax-ts/queries-override/awk-highlights.scm b/misc/syntax-ts/queries/awk-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/awk-highlights.scm rename to misc/syntax-ts/queries/awk-highlights.scm diff --git a/misc/syntax-ts/queries-override/bash-highlights.scm b/misc/syntax-ts/queries/bash-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/bash-highlights.scm rename to misc/syntax-ts/queries/bash-highlights.scm diff --git a/misc/syntax-ts/queries-override/bison-highlights.scm b/misc/syntax-ts/queries/bison-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/bison-highlights.scm rename to misc/syntax-ts/queries/bison-highlights.scm diff --git a/misc/syntax-ts/queries-override/c-highlights.scm b/misc/syntax-ts/queries/c-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/c-highlights.scm rename to misc/syntax-ts/queries/c-highlights.scm diff --git a/misc/syntax-ts/queries-override/c_sharp-highlights.scm b/misc/syntax-ts/queries/c_sharp-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/c_sharp-highlights.scm rename to misc/syntax-ts/queries/c_sharp-highlights.scm diff --git a/misc/syntax-ts/queries-override/caddy-highlights.scm b/misc/syntax-ts/queries/caddy-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/caddy-highlights.scm rename to misc/syntax-ts/queries/caddy-highlights.scm diff --git a/misc/syntax-ts/queries-override/cmake-highlights.scm b/misc/syntax-ts/queries/cmake-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/cmake-highlights.scm rename to misc/syntax-ts/queries/cmake-highlights.scm diff --git a/misc/syntax-ts/queries-override/cobol-highlights.scm b/misc/syntax-ts/queries/cobol-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/cobol-highlights.scm rename to misc/syntax-ts/queries/cobol-highlights.scm diff --git a/misc/syntax-ts/queries-override/commonlisp-highlights.scm b/misc/syntax-ts/queries/commonlisp-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/commonlisp-highlights.scm rename to misc/syntax-ts/queries/commonlisp-highlights.scm diff --git a/misc/syntax-ts/queries-override/cpp-highlights.scm b/misc/syntax-ts/queries/cpp-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/cpp-highlights.scm rename to misc/syntax-ts/queries/cpp-highlights.scm diff --git a/misc/syntax-ts/queries-override/css-highlights.scm b/misc/syntax-ts/queries/css-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/css-highlights.scm rename to misc/syntax-ts/queries/css-highlights.scm diff --git a/misc/syntax-ts/queries-override/cuda-highlights.scm b/misc/syntax-ts/queries/cuda-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/cuda-highlights.scm rename to misc/syntax-ts/queries/cuda-highlights.scm diff --git a/misc/syntax-ts/queries-override/d-highlights.scm b/misc/syntax-ts/queries/d-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/d-highlights.scm rename to misc/syntax-ts/queries/d-highlights.scm diff --git a/misc/syntax-ts/queries-override/diff-highlights.scm b/misc/syntax-ts/queries/diff-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/diff-highlights.scm rename to misc/syntax-ts/queries/diff-highlights.scm diff --git a/misc/syntax-ts/queries-override/dockerfile-highlights.scm b/misc/syntax-ts/queries/dockerfile-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/dockerfile-highlights.scm rename to misc/syntax-ts/queries/dockerfile-highlights.scm diff --git a/misc/syntax-ts/queries-override/dot-highlights.scm b/misc/syntax-ts/queries/dot-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/dot-highlights.scm rename to misc/syntax-ts/queries/dot-highlights.scm diff --git a/misc/syntax-ts/queries-override/erlang-highlights.scm b/misc/syntax-ts/queries/erlang-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/erlang-highlights.scm rename to misc/syntax-ts/queries/erlang-highlights.scm diff --git a/misc/syntax-ts/queries-override/fortran-highlights.scm b/misc/syntax-ts/queries/fortran-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/fortran-highlights.scm rename to misc/syntax-ts/queries/fortran-highlights.scm diff --git a/misc/syntax-ts/queries-override/glsl-highlights.scm b/misc/syntax-ts/queries/glsl-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/glsl-highlights.scm rename to misc/syntax-ts/queries/glsl-highlights.scm diff --git a/misc/syntax-ts/queries-override/go-highlights.scm b/misc/syntax-ts/queries/go-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/go-highlights.scm rename to misc/syntax-ts/queries/go-highlights.scm diff --git a/misc/syntax-ts/queries-override/gotmpl-highlights.scm b/misc/syntax-ts/queries/gotmpl-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/gotmpl-highlights.scm rename to misc/syntax-ts/queries/gotmpl-highlights.scm diff --git a/misc/syntax-ts/queries-override/haskell-highlights.scm b/misc/syntax-ts/queries/haskell-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/haskell-highlights.scm rename to misc/syntax-ts/queries/haskell-highlights.scm diff --git a/misc/syntax-ts/queries-override/hcl-highlights.scm b/misc/syntax-ts/queries/hcl-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/hcl-highlights.scm rename to misc/syntax-ts/queries/hcl-highlights.scm diff --git a/misc/syntax-ts/queries-override/html-highlights.scm b/misc/syntax-ts/queries/html-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/html-highlights.scm rename to misc/syntax-ts/queries/html-highlights.scm diff --git a/misc/syntax-ts/queries-override/idl-highlights.scm b/misc/syntax-ts/queries/idl-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/idl-highlights.scm rename to misc/syntax-ts/queries/idl-highlights.scm diff --git a/misc/syntax-ts/queries-override/ini-highlights.scm b/misc/syntax-ts/queries/ini-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/ini-highlights.scm rename to misc/syntax-ts/queries/ini-highlights.scm diff --git a/misc/syntax-ts/queries-override/java-highlights.scm b/misc/syntax-ts/queries/java-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/java-highlights.scm rename to misc/syntax-ts/queries/java-highlights.scm diff --git a/misc/syntax-ts/queries-override/javascript-highlights.scm b/misc/syntax-ts/queries/javascript-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/javascript-highlights.scm rename to misc/syntax-ts/queries/javascript-highlights.scm diff --git a/misc/syntax-ts/queries-override/json-highlights.scm b/misc/syntax-ts/queries/json-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/json-highlights.scm rename to misc/syntax-ts/queries/json-highlights.scm diff --git a/misc/syntax-ts/queries-override/kotlin-highlights.scm b/misc/syntax-ts/queries/kotlin-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/kotlin-highlights.scm rename to misc/syntax-ts/queries/kotlin-highlights.scm diff --git a/misc/syntax-ts/queries-override/latex-highlights.scm b/misc/syntax-ts/queries/latex-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/latex-highlights.scm rename to misc/syntax-ts/queries/latex-highlights.scm diff --git a/misc/syntax-ts/queries-override/lua-highlights.scm b/misc/syntax-ts/queries/lua-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/lua-highlights.scm rename to misc/syntax-ts/queries/lua-highlights.scm diff --git a/misc/syntax-ts/queries-override/mail-highlights.scm b/misc/syntax-ts/queries/mail-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/mail-highlights.scm rename to misc/syntax-ts/queries/mail-highlights.scm diff --git a/misc/syntax-ts/queries-override/make-highlights.scm b/misc/syntax-ts/queries/make-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/make-highlights.scm rename to misc/syntax-ts/queries/make-highlights.scm diff --git a/misc/syntax-ts/queries-override/markdown-highlights.scm b/misc/syntax-ts/queries/markdown-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/markdown-highlights.scm rename to misc/syntax-ts/queries/markdown-highlights.scm diff --git a/misc/syntax-ts/queries-override/markdown-injections.scm b/misc/syntax-ts/queries/markdown-injections.scm similarity index 100% rename from misc/syntax-ts/queries-override/markdown-injections.scm rename to misc/syntax-ts/queries/markdown-injections.scm diff --git a/misc/syntax-ts/queries-override/markdown_inline-highlights.scm b/misc/syntax-ts/queries/markdown_inline-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/markdown_inline-highlights.scm rename to misc/syntax-ts/queries/markdown_inline-highlights.scm diff --git a/misc/syntax-ts/queries-override/matlab-highlights.scm b/misc/syntax-ts/queries/matlab-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/matlab-highlights.scm rename to misc/syntax-ts/queries/matlab-highlights.scm diff --git a/misc/syntax-ts/queries-override/meson-highlights.scm b/misc/syntax-ts/queries/meson-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/meson-highlights.scm rename to misc/syntax-ts/queries/meson-highlights.scm diff --git a/misc/syntax-ts/queries-override/muttrc-highlights.scm b/misc/syntax-ts/queries/muttrc-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/muttrc-highlights.scm rename to misc/syntax-ts/queries/muttrc-highlights.scm diff --git a/misc/syntax-ts/queries-override/ocaml-highlights.scm b/misc/syntax-ts/queries/ocaml-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/ocaml-highlights.scm rename to misc/syntax-ts/queries/ocaml-highlights.scm diff --git a/misc/syntax-ts/queries-override/pascal-highlights.scm b/misc/syntax-ts/queries/pascal-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/pascal-highlights.scm rename to misc/syntax-ts/queries/pascal-highlights.scm diff --git a/misc/syntax-ts/queries-override/perl-highlights.scm b/misc/syntax-ts/queries/perl-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/perl-highlights.scm rename to misc/syntax-ts/queries/perl-highlights.scm diff --git a/misc/syntax-ts/queries-override/php-highlights.scm b/misc/syntax-ts/queries/php-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/php-highlights.scm rename to misc/syntax-ts/queries/php-highlights.scm diff --git a/misc/syntax-ts/queries-override/po-highlights.scm b/misc/syntax-ts/queries/po-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/po-highlights.scm rename to misc/syntax-ts/queries/po-highlights.scm diff --git a/misc/syntax-ts/queries-override/properties-highlights.scm b/misc/syntax-ts/queries/properties-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/properties-highlights.scm rename to misc/syntax-ts/queries/properties-highlights.scm diff --git a/misc/syntax-ts/queries-override/proto-highlights.scm b/misc/syntax-ts/queries/proto-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/proto-highlights.scm rename to misc/syntax-ts/queries/proto-highlights.scm diff --git a/misc/syntax-ts/queries-override/puppet-highlights.scm b/misc/syntax-ts/queries/puppet-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/puppet-highlights.scm rename to misc/syntax-ts/queries/puppet-highlights.scm diff --git a/misc/syntax-ts/queries-override/python-highlights.scm b/misc/syntax-ts/queries/python-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/python-highlights.scm rename to misc/syntax-ts/queries/python-highlights.scm diff --git a/misc/syntax-ts/queries-override/qmljs-highlights.scm b/misc/syntax-ts/queries/qmljs-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/qmljs-highlights.scm rename to misc/syntax-ts/queries/qmljs-highlights.scm diff --git a/misc/syntax-ts/queries-override/r-highlights.scm b/misc/syntax-ts/queries/r-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/r-highlights.scm rename to misc/syntax-ts/queries/r-highlights.scm diff --git a/misc/syntax-ts/queries-override/ruby-highlights.scm b/misc/syntax-ts/queries/ruby-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/ruby-highlights.scm rename to misc/syntax-ts/queries/ruby-highlights.scm diff --git a/misc/syntax-ts/queries-override/rust-highlights.scm b/misc/syntax-ts/queries/rust-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/rust-highlights.scm rename to misc/syntax-ts/queries/rust-highlights.scm diff --git a/misc/syntax-ts/queries-override/scala-highlights.scm b/misc/syntax-ts/queries/scala-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/scala-highlights.scm rename to misc/syntax-ts/queries/scala-highlights.scm diff --git a/misc/syntax-ts/queries-override/slang-highlights.scm b/misc/syntax-ts/queries/slang-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/slang-highlights.scm rename to misc/syntax-ts/queries/slang-highlights.scm diff --git a/misc/syntax-ts/queries-override/smalltalk-highlights.scm b/misc/syntax-ts/queries/smalltalk-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/smalltalk-highlights.scm rename to misc/syntax-ts/queries/smalltalk-highlights.scm diff --git a/misc/syntax-ts/queries-override/sql-highlights.scm b/misc/syntax-ts/queries/sql-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/sql-highlights.scm rename to misc/syntax-ts/queries/sql-highlights.scm diff --git a/misc/syntax-ts/queries-override/strace-highlights.scm b/misc/syntax-ts/queries/strace-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/strace-highlights.scm rename to misc/syntax-ts/queries/strace-highlights.scm diff --git a/misc/syntax-ts/queries-override/swift-highlights.scm b/misc/syntax-ts/queries/swift-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/swift-highlights.scm rename to misc/syntax-ts/queries/swift-highlights.scm diff --git a/misc/syntax-ts/queries-override/tcl-highlights.scm b/misc/syntax-ts/queries/tcl-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/tcl-highlights.scm rename to misc/syntax-ts/queries/tcl-highlights.scm diff --git a/misc/syntax-ts/queries-override/terraform-highlights.scm b/misc/syntax-ts/queries/terraform-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/terraform-highlights.scm rename to misc/syntax-ts/queries/terraform-highlights.scm diff --git a/misc/syntax-ts/queries-override/toml-highlights.scm b/misc/syntax-ts/queries/toml-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/toml-highlights.scm rename to misc/syntax-ts/queries/toml-highlights.scm diff --git a/misc/syntax-ts/queries-override/turtle-highlights.scm b/misc/syntax-ts/queries/turtle-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/turtle-highlights.scm rename to misc/syntax-ts/queries/turtle-highlights.scm diff --git a/misc/syntax-ts/queries-override/typescript-highlights.scm b/misc/syntax-ts/queries/typescript-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/typescript-highlights.scm rename to misc/syntax-ts/queries/typescript-highlights.scm diff --git a/misc/syntax-ts/queries-override/verilog-highlights.scm b/misc/syntax-ts/queries/verilog-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/verilog-highlights.scm rename to misc/syntax-ts/queries/verilog-highlights.scm diff --git a/misc/syntax-ts/queries-override/vhdl-highlights.scm b/misc/syntax-ts/queries/vhdl-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/vhdl-highlights.scm rename to misc/syntax-ts/queries/vhdl-highlights.scm diff --git a/misc/syntax-ts/queries-override/xml-highlights.scm b/misc/syntax-ts/queries/xml-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/xml-highlights.scm rename to misc/syntax-ts/queries/xml-highlights.scm diff --git a/misc/syntax-ts/queries-override/yaml-highlights.scm b/misc/syntax-ts/queries/yaml-highlights.scm similarity index 100% rename from misc/syntax-ts/queries-override/yaml-highlights.scm rename to misc/syntax-ts/queries/yaml-highlights.scm diff --git a/scripts/ts-grammars-download.sh b/scripts/ts-grammars-download.sh index 16b3b1e1c3..244a7a0df1 100755 --- a/scripts/ts-grammars-download.sh +++ b/scripts/ts-grammars-download.sh @@ -2,7 +2,7 @@ set -euo pipefail -TS_GRAMMARS_VERSION='2026.04.02' +TS_GRAMMARS_VERSION='2026.04.11' REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" BASE_URL='https://github.com/jtyr/tree-sitter-grammars/releases/download' @@ -13,8 +13,9 @@ Usage: ts-grammars-download.sh [OPTIONS] Download tree-sitter grammar files from GitHub releases. Options: - --source Download source tarball (for static builds) - --shared Download platform-specific tarball (for shared builds) + --source Download source tarball (for static builds from source) + --shared Download shared library tarball (.so/.dylib/.dll) + --static Download static library tarball (.a) --latest Use latest release instead of pinned version --platform= Override platform auto-detection Supported: x86_64-linux, aarch64-linux, @@ -22,7 +23,7 @@ Options: x86_64-windows -h, --help Show this help message -At least one of --source or --shared must be specified. +At least one of --source, --shared, or --static must be specified. USAGE } @@ -123,11 +124,8 @@ extract_source() { tar -xzf "$tmp_dir/$filename" -C "$tmp_dir" local grammars_dest="$REPO_ROOT/src/editor/ts-grammars" - local queries_dest="$REPO_ROOT/misc/syntax-ts/queries" local count=0 - mkdir -p "$queries_dest" - # Tarball contains a top-level directory; iterate grammar dirs inside it for lang_dir in "$tmp_dir"/tree-sitter-grammars-*/*/; do local lang @@ -135,7 +133,6 @@ extract_source() { # Skip if not a grammar directory [[ -f "$lang_dir/src/parser.c" ]] || continue - [[ -f "$lang_dir/queries/highlights.scm" ]] || continue # Skip C++ scanners if [[ -f "$lang_dir/src/scanner.cc" ]]; then @@ -148,12 +145,6 @@ extract_source() { mkdir -p "$grammars_dest/$lang" cp "$lang_dir"/src/* "$grammars_dest/$lang/" - cp "$lang_dir/queries/highlights.scm" "$queries_dest/${lang}-highlights.scm" - - if [[ -f "$lang_dir/queries/injections.scm" ]]; then - cp "$lang_dir/queries/injections.scm" "$queries_dest/${lang}-injections.scm" - fi - count=$((count + 1)) done @@ -169,7 +160,7 @@ extract_shared() { # Only set trap if not already set by extract_source trap "rm -rf '$tmp_dir'" EXIT - local filename="tree-sitter-grammars-$platform.tar.gz" + local filename="tree-sitter-grammars-$platform-shared.tar.gz" download_and_verify "$version" "$filename" "$tmp_dir" @@ -177,19 +168,15 @@ extract_shared() { tar -xzf "$tmp_dir/$filename" -C "$tmp_dir" local shared_dest="$REPO_ROOT/ts-grammars-shared" - local queries_dest="$REPO_ROOT/misc/syntax-ts/queries" local count=0 - mkdir -p "$shared_dest" "$queries_dest" + mkdir -p "$shared_dest" # Tarball contains a top-level directory; iterate grammar dirs inside it for lang_dir in "$tmp_dir"/tree-sitter-grammars-*/*/; do local lang lang="$(basename "$lang_dir")" - # Skip if no highlights query - [[ -f "$lang_dir/queries/highlights.scm" ]] || continue - echo " Extracting shared: $lang" mkdir -p "$shared_dest/$lang" @@ -201,21 +188,55 @@ extract_shared() { done done - cp "$lang_dir/queries/highlights.scm" "$queries_dest/${lang}-highlights.scm" + count=$((count + 1)) + done - if [[ -f "$lang_dir/queries/injections.scm" ]]; then - cp "$lang_dir/queries/injections.scm" "$queries_dest/${lang}-injections.scm" - fi + echo "Shared extraction complete: $count grammars." +} + +extract_static() { + local version=$1 + local platform=$2 + local tmp_dir + + tmp_dir="$(mktemp -d)" + trap "rm -rf '$tmp_dir'" EXIT + + local filename="tree-sitter-grammars-$platform-static.tar.gz" + + download_and_verify "$version" "$filename" "$tmp_dir" + + echo 'Extracting static tarball ...' + tar -xzf "$tmp_dir/$filename" -C "$tmp_dir" + + local grammars_dest="$REPO_ROOT/src/editor/ts-grammars" + local count=0 + + mkdir -p "$grammars_dest" + + # Tarball contains a top-level directory; iterate grammar dirs inside it + for lang_dir in "$tmp_dir"/tree-sitter-grammars-*/*/; do + local lang + lang="$(basename "$lang_dir")" + + # Skip if no .a file + [[ -f "$lang_dir/$lang.a" ]] || continue + + echo " Extracting static: $lang" + + mkdir -p "$grammars_dest/$lang" + cp "$lang_dir/$lang.a" "$grammars_dest/$lang/" count=$((count + 1)) done - echo "Shared extraction complete: $count grammars." + echo "Static extraction complete: $count grammars." } main() { local do_source=0 local do_shared=0 + local do_static=0 local use_latest=0 local platform='' @@ -229,6 +250,10 @@ main() { do_shared=1 shift ;; + --static) + do_static=1 + shift + ;; --latest) use_latest=1 shift @@ -249,8 +274,8 @@ main() { esac done - if [[ $do_source -eq 0 && $do_shared -eq 0 ]]; then - echo 'Error: at least one of --source or --shared must be specified' >&2 + if [[ $do_source -eq 0 && $do_shared -eq 0 && $do_static -eq 0 ]]; then + echo 'Error: at least one of --source, --shared, or --static must be specified' >&2 usage >&2 exit 1 fi @@ -264,7 +289,7 @@ main() { echo "Using version: $version" - if [[ $do_shared -eq 1 && -z $platform ]]; then + if [[ ($do_shared -eq 1 || $do_static -eq 1) && -z $platform ]]; then platform="$(detect_platform)" echo "Detected platform: $platform" fi @@ -277,6 +302,10 @@ main() { extract_shared "$version" "$platform" fi + if [[ $do_static -eq 1 ]]; then + extract_static "$version" "$platform" + fi + echo 'Done.' } diff --git a/src/Makefile.am b/src/Makefile.am index dfd15456ce..5fce3b04d5 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -59,6 +59,10 @@ mc_LDADD = \ libinternal.la \ $(TREE_SITTER_LIBS) +if TREE_SITTER_STATIC +mc_LDADD += editor/ts-grammars/libtsgrammars.a +endif + if ENABLE_MCLIB libinternal_la_LIBADD += \ $(top_builddir)/lib/libmc.la diff --git a/src/editor/Makefile.am b/src/editor/Makefile.am index 5bf5706d45..81f8fb5ae6 100644 --- a/src/editor/Makefile.am +++ b/src/editor/Makefile.am @@ -42,7 +42,7 @@ AM_CPPFLAGS = $(GLIB_CFLAGS) $(TREE_SITTER_CFLAGS) -I$(top_srcdir) if USE_TREE_SITTER if TREE_SITTER_STATIC AM_CPPFLAGS += @TREE_SITTER_GRAMMAR_DEFS@ -libedit_la_LIBADD = ts-grammars/libtsgrammars.la $(TREE_SITTER_LIBS) +libedit_la_LIBADD = $(TREE_SITTER_LIBS) else AM_CPPFLAGS += $(GMODULE_CFLAGS) -DTS_GRAMMAR_LIBDIR=\""$(libdir)/mc/ts-grammars"\" libedit_la_LIBADD = $(TREE_SITTER_LIBS) $(GMODULE_LIBS) diff --git a/src/editor/syntax_ts.c b/src/editor/syntax_ts.c index 62c2619ac5..ca97c556e0 100644 --- a/src/editor/syntax_ts.c +++ b/src/editor/syntax_ts.c @@ -42,6 +42,7 @@ #include "lib/global.h" #include "lib/skin.h" #include "lib/fileloc.h" // EDIT_SYNTAX_DIR, EDIT_SYNTAX_TS_DIR +#include "lib/tty/key.h" // is_idle() #include "lib/strutil.h" // utf string functions #include "lib/util.h" // whiteness @@ -67,7 +68,9 @@ typedef struct typedef struct { void *parser; // TSParser* - void *query; // TSQuery* + void *query; // TSQuery* -- highlight query + void *injection_query; // TSQuery* -- nested injection query (or NULL if none) + gboolean injection_query_loaded; // TRUE once we tried to load it (success or fail) } ts_dynamic_lang_t; @@ -965,41 +968,34 @@ ts_setup_wrapper_injection (WEdit *edit, const TSLanguage *lang, /** * Load a query file. Search order: - * 1. User MC-specific override: ~/.local/share/mc/syntax-ts/queries-override/ - * 2. System MC-specific override: $(datadir)/mc/syntax-ts/queries-override/ - * 3. User upstream queries: ~/.local/share/mc/syntax-ts/queries/ - * 4. System upstream queries: $(datadir)/mc/syntax-ts/queries/ + * 1. User queries: ~/.local/share/mc/syntax-ts/queries/ + * 2. System queries: $(datadir)/mc/syntax-ts/queries/ * Returns a newly allocated string with the file contents, or NULL on failure. */ static char * ts_load_query_file (const char *query_filename, uint32_t *out_len) { - static const char *subdirs[] = { "queries-override", "queries" }; const char *base_dirs[2]; char *contents = NULL; gsize len = 0; - int b, s; + int b; base_dirs[0] = mc_config_get_data_path (); base_dirs[1] = mc_global.share_data_dir; - /* Try override dirs first, then upstream dirs */ - for (s = 0; s < 2; s++) + for (b = 0; b < 2; b++) { - for (b = 0; b < 2; b++) - { - char *path; + char *path; - path = g_build_filename (base_dirs[b], EDIT_SYNTAX_TS_DIR, subdirs[s], - query_filename, (char *) NULL); - if (g_file_get_contents (path, &contents, &len, NULL)) - { - g_free (path); - *out_len = (uint32_t) len; - return contents; - } + path = g_build_filename (base_dirs[b], EDIT_SYNTAX_TS_DIR, "queries", + query_filename, (char *) NULL); + if (g_file_get_contents (path, &contents, &len, NULL)) + { g_free (path); + *out_len = (uint32_t) len; + return contents; } + g_free (path); } *out_len = 0; @@ -1369,9 +1365,6 @@ ts_init_for_file (WEdit *edit, const char *forced_grammar) return FALSE; } - // Set a timeout to prevent pathological grammars from freezing the editor - ts_parser_set_timeout_micros (parser, 3000000); // 3 seconds - // Parse the buffer input.payload = edit; input.read = ts_input_read; @@ -1571,6 +1564,8 @@ ts_free (WEdit *edit) if (dl->query != NULL) ts_query_delete ((TSQuery *) dl->query); + if (dl->injection_query != NULL) + ts_query_delete ((TSQuery *) dl->injection_query); if (dl->parser != NULL) ts_parser_delete ((TSParser *) dl->parser); g_free (dl); @@ -1717,8 +1712,6 @@ ts_get_dynamic_lang (GHashTable *lang_cache, const char *lang_name) return NULL; } - ts_parser_set_timeout_micros (parser, 3000000); - query_filename = g_strdup_printf ("%s-highlights.scm", lang_name); query_src = ts_load_query_file (query_filename, &query_len); g_free (query_filename); @@ -1827,109 +1820,117 @@ ts_inject_and_highlight (const char *lang_name, TSNode content_node, TSInput inp inj_lang = ts_grammar_registry_lookup (lang_name); if (inj_lang != NULL) { - char *nested_inj_filename; - char *nested_inj_src; - uint32_t nested_inj_len; + TSQuery *nested_inj_query; - nested_inj_filename = g_strdup_printf ("%s-injections.scm", lang_name); - nested_inj_src = ts_load_query_file (nested_inj_filename, &nested_inj_len); - g_free (nested_inj_filename); + /* Lazy-load the nested injection query and cache it on the + dynamic language entry to avoid recompiling on every call. */ + if (!dl->injection_query_loaded) + { + char *nested_inj_filename; + char *nested_inj_src; + uint32_t nested_inj_len; + + nested_inj_filename = g_strdup_printf ("%s-injections.scm", lang_name); + nested_inj_src = ts_load_query_file (nested_inj_filename, &nested_inj_len); + g_free (nested_inj_filename); + + if (nested_inj_src != NULL) + { + uint32_t eo; + TSQueryError et; + + dl->injection_query = + ts_query_new (inj_lang, nested_inj_src, nested_inj_len, &eo, &et); + g_free (nested_inj_src); + } + dl->injection_query_loaded = TRUE; + } - if (nested_inj_src != NULL) + nested_inj_query = (TSQuery *) dl->injection_query; + + if (nested_inj_query != NULL) { - uint32_t eo; - TSQueryError et; - TSQuery *nested_inj_query; + TSNode inj_root; + TSQueryCursor *nested_cursor; + TSQueryMatch nested_match; - nested_inj_query = - ts_query_new (inj_lang, nested_inj_src, nested_inj_len, &eo, &et); - g_free (nested_inj_src); + inj_root = ts_tree_root_node (inject_tree); + nested_cursor = ts_query_cursor_new (); + ts_query_cursor_set_byte_range (nested_cursor, range_start, range_end); + ts_query_cursor_exec (nested_cursor, nested_inj_query, inj_root); - if (nested_inj_query != NULL) + while (ts_query_cursor_next_match (nested_cursor, &nested_match)) { - TSNode inj_root; - TSQueryCursor *nested_cursor; - TSQueryMatch nested_match; + TSNode nested_content = { .id = NULL }; + TSNode nested_lang_node = { .id = NULL }; + const char *nested_static_lang = NULL; + uint32_t nci; - inj_root = ts_tree_root_node (inject_tree); - nested_cursor = ts_query_cursor_new (); - ts_query_cursor_set_byte_range (nested_cursor, range_start, range_end); - ts_query_cursor_exec (nested_cursor, nested_inj_query, inj_root); + if (!ts_evaluate_match_predicates (nested_inj_query, &nested_match, edit)) + continue; - while (ts_query_cursor_next_match (nested_cursor, &nested_match)) + for (nci = 0; nci < nested_match.capture_count; nci++) { - TSNode nested_content = { .id = NULL }; - TSNode nested_lang_node = { .id = NULL }; - const char *nested_static_lang = NULL; - uint32_t nci; + uint32_t nlen; + const char *ncap; + + ncap = ts_query_capture_name_for_id (nested_inj_query, + nested_match.captures[nci].index, + &nlen); + if (strcmp (ncap, "injection.content") == 0) + nested_content = nested_match.captures[nci].node; + else if (strcmp (ncap, "injection.language") == 0) + nested_lang_node = nested_match.captures[nci].node; + } - if (!ts_evaluate_match_predicates (nested_inj_query, &nested_match, edit)) - continue; + if (ts_node_is_null (nested_content)) + continue; - for (nci = 0; nci < nested_match.capture_count; nci++) - { - uint32_t nlen; - const char *ncap; - - ncap = ts_query_capture_name_for_id (nested_inj_query, - nested_match.captures[nci].index, - &nlen); - if (strcmp (ncap, "injection.content") == 0) - nested_content = nested_match.captures[nci].node; - else if (strcmp (ncap, "injection.language") == 0) - nested_lang_node = nested_match.captures[nci].node; - } + nested_static_lang = + ts_get_set_predicate (nested_inj_query, nested_match.pattern_index, + "injection.language"); - if (ts_node_is_null (nested_content)) - continue; - - nested_static_lang = - ts_get_set_predicate (nested_inj_query, nested_match.pattern_index, - "injection.language"); + if (nested_static_lang != NULL) + { + ts_inject_and_highlight (nested_static_lang, nested_content, input, + range_start, range_end, highlights, + lang_cache, edit, depth - 1); + } + else if (!ts_node_is_null (nested_lang_node)) + { + uint32_t ls = ts_node_start_byte (nested_lang_node); + uint32_t le = ts_node_end_byte (nested_lang_node); + uint32_t ll = le - ls; - if (nested_static_lang != NULL) - { - ts_inject_and_highlight (nested_static_lang, nested_content, input, - range_start, range_end, highlights, - lang_cache, edit, depth - 1); - } - else if (!ts_node_is_null (nested_lang_node)) + if (ll > 0 && ll < 64) { - uint32_t ls = ts_node_start_byte (nested_lang_node); - uint32_t le = ts_node_end_byte (nested_lang_node); - uint32_t ll = le - ls; - - if (ll > 0 && ll < 64) - { - char lbuf[64]; - uint32_t li; - char *s, *e; - - for (li = 0; li < ll; li++) - lbuf[li] = (char) edit_buffer_get_byte (&edit->buffer, - (off_t) (ls + li)); - lbuf[ll] = '\0'; - - s = lbuf; - while (*s == ' ' || *s == '\t') - s++; - e = s + strlen (s); - while (e > s && (e[-1] == ' ' || e[-1] == '\t' || e[-1] == '\n' - || e[-1] == '\r')) - e--; - *e = '\0'; - - if (*s != '\0') - ts_inject_and_highlight (s, nested_content, input, - range_start, range_end, highlights, - lang_cache, edit, depth - 1); - } + char lbuf[64]; + uint32_t li; + char *s, *e; + + for (li = 0; li < ll; li++) + lbuf[li] = (char) edit_buffer_get_byte (&edit->buffer, + (off_t) (ls + li)); + lbuf[ll] = '\0'; + + s = lbuf; + while (*s == ' ' || *s == '\t') + s++; + e = s + strlen (s); + while (e > s && (e[-1] == ' ' || e[-1] == '\t' || e[-1] == '\n' + || e[-1] == '\r')) + e--; + *e = '\0'; + + if (*s != '\0') + ts_inject_and_highlight (s, nested_content, input, + range_start, range_end, highlights, + lang_cache, edit, depth - 1); } } - - ts_query_cursor_delete (nested_cursor); - ts_query_delete (nested_inj_query); } + + ts_query_cursor_delete (nested_cursor); } } } @@ -1949,6 +1950,7 @@ ts_rebuild_highlight_cache (WEdit *edit, off_t range_start, off_t range_end) { TSTree *tree; TSInput input; + gboolean had_edit; if (!edit->ts.active) return; @@ -1957,6 +1959,12 @@ ts_rebuild_highlight_cache (WEdit *edit, off_t range_start, off_t range_end) input.read = ts_input_read; input.encoding = TSInputEncodingUTF8; + /* Remember if this rebuild was triggered by an edit (vs scroll). + During rapid editing, we skip the expensive injection processing + to keep the render responsive. Injections are only refreshed when + rebuilding due to a scroll (cache invalidated by viewport change). */ + had_edit = edit->ts.need_reparse; + // Perform deferred re-parse if the tree was edited since last parse if (edit->ts.need_reparse) { @@ -1982,8 +1990,11 @@ ts_rebuild_highlight_cache (WEdit *edit, off_t range_start, off_t range_end) (uint32_t) range_start, (uint32_t) range_end, edit->ts.highlights, edit->ts.grammar_name, edit); - // Run injection queries if configured. - if (edit->ts.injection_query != NULL) + /* Run injection queries if configured. Skip during rapid edits to keep + rendering responsive - injections are expensive (parse + query for each + injected range). Edits that don't change injection structure won't lose + much; the next idle rebuild will refresh injections. */ + if (edit->ts.injection_query != NULL && (!had_edit || is_idle ())) { TSNode root; TSQuery *inj_query; diff --git a/src/editor/ts-grammars/.gitignore b/src/editor/ts-grammars/.gitignore index 6cd20e1928..1190881ba0 100644 --- a/src/editor/ts-grammars/.gitignore +++ b/src/editor/ts-grammars/.gitignore @@ -1,4 +1,3 @@ -# Grammar source directories are downloaded at build time. -# Only keep infrastructure files (tree_sitter/, ts-grammar-registry.h, etc.) +# Grammar directories and build artifacts are downloaded/generated at build time. */ -!tree_sitter/ +libtsgrammars.a diff --git a/src/editor/ts-grammars/Makefile.am b/src/editor/ts-grammars/Makefile.am index c41839b914..259aa5e0e5 100644 --- a/src/editor/ts-grammars/Makefile.am +++ b/src/editor/ts-grammars/Makefile.am @@ -1,71 +1,43 @@ -# Makefile.am for tree-sitter grammar sources. -# Compiles selected grammar sources into a static convenience library (static -# mode) or per-grammar shared modules (shared mode). -# -# We use all-local/clean-local/install hooks instead of automake's _LTLIBRARIES -# because the source list is determined at configure time and automake does not -# support configure substitutions in _SOURCES variables. +# Makefile.am for tree-sitter grammar libraries. +# In static mode, merges pre-built .a files into a convenience library. +# In shared mode, grammar .so files are loaded at runtime (nothing to build). EXTRA_DIST = \ - download-grammars.sh \ - ts-grammar-registry.h \ - tree_sitter/parser.h \ - tree_sitter/alloc.h \ - tree_sitter/array.h \ - tree_sitter/ts_assert.h + ts-grammar-registry.h # Variables substituted by configure -TREE_SITTER_GRAMMAR_SOURCES = @TREE_SITTER_GRAMMAR_SOURCES@ -TREE_SITTER_GRAMMAR_OBJECTS = @TREE_SITTER_GRAMMAR_OBJECTS@ +TREE_SITTER_GRAMMAR_ARCHIVES = @TREE_SITTER_GRAMMAR_ARCHIVES@ TREE_SITTER_GRAMMARS = @TREE_SITTER_GRAMMARS@ -TREE_SITTER_SHARED_LIBS = @TREE_SITTER_SHARED_LIBS@ TREE_SITTER_BUILD_TARGET = @TREE_SITTER_BUILD_TARGET@ TREE_SITTER_BUILD_MODE = @TREE_SITTER_BUILD_MODE@ TREE_SITTER_GRAMMAR_LIBDIR = @TREE_SITTER_GRAMMAR_LIBDIR@ -TS_GRAMMAR_DIRS = $(TREE_SITTER_GRAMMARS) -OBJECTS = $(TREE_SITTER_GRAMMAR_OBJECTS) - -AM_CPPFLAGS = -I$(top_srcdir) -I$(srcdir) $(TREE_SITTER_CFLAGS) -AM_CFLAGS = -w - # ============================================================================ # Build # ============================================================================ all-local: $(TREE_SITTER_BUILD_TARGET) -# Static mode: single convenience library -libtsgrammars.la: check-grammar-sources $(OBJECTS) - $(LIBTOOL) --tag=CXX --mode=link $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) - -# Shared mode: one loadable module per grammar -shared-modules: check-grammar-sources $(OBJECTS) - @for lang in $(TS_GRAMMAR_DIRS); do \ - objs="$$lang/parser.lo"; \ - linker="$(CC)"; tag="CC"; \ - if test -f "$$lang/scanner.lo"; then \ - objs="$$objs $$lang/scanner.lo"; \ - fi; \ - if test -f "$(srcdir)/$$lang/scanner.cc"; then \ - linker="$(CXX)"; tag="CXX"; \ - fi; \ - echo " LINK $$lang.so"; \ - $(LIBTOOL) --tag=$$tag --mode=link $$linker $(LDFLAGS) \ - -module -avoid-version -rpath $(TREE_SITTER_GRAMMAR_LIBDIR) \ - $(TREE_SITTER_LIBS) -o $$lang.la $$objs || exit 1; \ - done - -# Pattern rules for compiling grammar sources -SUFFIXES = .c .cc .lo - -.c.lo: - @dir=`dirname $@`; test -d "$$dir" || mkdir -p "$$dir" - $(LIBTOOL) --tag=CC --mode=compile $(CC) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o $@ $< - -.cc.lo: - @dir=`dirname $@`; test -d "$$dir" || mkdir -p "$$dir" - $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CXXFLAGS) -c -o $@ $< +# Static mode: merge pre-built .a files into a single static library. +# Extract .o files from each grammar archive, then combine into one .a. +libtsgrammars.a: + @rm -rf _objs && mkdir -p _objs; \ + for archive in $(TREE_SITTER_GRAMMAR_ARCHIVES); do \ + lang=`echo $$archive | sed 's|/.*||'`; \ + mkdir -p _objs/$$lang; \ + (cd _objs/$$lang && $(AR) x $(abs_srcdir)/$$archive); \ + done; \ + all_objs=""; \ + for archive in $(TREE_SITTER_GRAMMAR_ARCHIVES); do \ + lang=`echo $$archive | sed 's|/.*||'`; \ + for o in _objs/$$lang/*.o; do \ + test -f "$$o" && all_objs="$$all_objs $$o"; \ + done; \ + done; \ + echo " AR libtsgrammars.a"; \ + $(AR) cr $@ $$all_objs; \ + $(RANLIB) $@; \ + rm -rf _objs # ============================================================================ # Install @@ -77,67 +49,12 @@ install-exec-static: @true install-exec-shared: - $(INSTALL) -d $(DESTDIR)$(TREE_SITTER_GRAMMAR_LIBDIR) - @for lang in $(TS_GRAMMAR_DIRS); do \ - $(INSTALL) -m 755 .libs/$$lang.so $(DESTDIR)$(TREE_SITTER_GRAMMAR_LIBDIR)/$$lang.so; \ - done + @true # ============================================================================ # Clean # ============================================================================ clean-local: - @for lang in $(TS_GRAMMAR_DIRS); do \ - rm -f "$$lang"/*.lo "$$lang"/*.o; \ - rm -rf "$$lang/.libs"; \ - done - rm -f libtsgrammars.la - rm -f *.la - rm -rf .libs - -distclean-local: - @for d in "$(srcdir)"/*/; do \ - case "$$d" in */tree_sitter/) ;; *) rm -rf "$$d" ;; esac; \ - done - -# ============================================================================ -# Grammar download -# ============================================================================ - -.PHONY: download-grammars shared-modules check-grammar-sources - -download-grammars: - @if test -n "$(TREE_SITTER_GRAMMARS_DIR)"; then \ - echo "Copying grammar sources from $(TREE_SITTER_GRAMMARS_DIR)..."; \ - for lang in $(TS_GRAMMAR_DIRS); do \ - if test -d "$(TREE_SITTER_GRAMMARS_DIR)/$$lang"; then \ - mkdir -p "$(srcdir)/$$lang"; \ - cp -a "$(TREE_SITTER_GRAMMARS_DIR)/$$lang"/* "$(srcdir)/$$lang/"; \ - else \ - echo "WARNING: $$lang not found in $(TREE_SITTER_GRAMMARS_DIR)"; \ - fi; \ - done; \ - echo "Done."; \ - else \ - $(SHELL) "$(srcdir)/download-grammars.sh" $(TS_GRAMMAR_DIRS); \ - fi - -# Check that grammar sources exist before compiling. -check-grammar-sources: - @missing=""; \ - for lang in $(TS_GRAMMAR_DIRS); do \ - if test ! -f "$(srcdir)/$$lang/parser.c"; then \ - missing="$$missing $$lang"; \ - fi; \ - done; \ - if test -n "$$missing"; then \ - echo ""; \ - echo "ERROR: Tree-sitter grammar sources not found for:$$missing"; \ - echo ""; \ - echo "Please run: ./configure --with-tree-sitter"; \ - echo "(configure auto-downloads missing grammar sources)"; \ - echo ""; \ - echo "Or manually: make -C src/editor/ts-grammars download-grammars"; \ - echo ""; \ - exit 1; \ - fi + rm -f libtsgrammars.a + rm -rf _objs diff --git a/src/editor/ts-grammars/lisp b/src/editor/ts-grammars/lisp deleted file mode 120000 index 2a4123e0e7..0000000000 --- a/src/editor/ts-grammars/lisp +++ /dev/null @@ -1 +0,0 @@ -commonlisp \ No newline at end of file diff --git a/src/editor/ts-grammars/tree_sitter/alloc.h b/src/editor/ts-grammars/tree_sitter/alloc.h deleted file mode 100644 index a27b8a6334..0000000000 --- a/src/editor/ts-grammars/tree_sitter/alloc.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef TREE_SITTER_ALLOC_H_ -#define TREE_SITTER_ALLOC_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include -#include - -#if defined(TREE_SITTER_HIDDEN_SYMBOLS) || defined(_WIN32) -#define TS_PUBLIC -#else -#define TS_PUBLIC __attribute__((visibility("default"))) -#endif - -TS_PUBLIC extern void *(*ts_current_malloc)(size_t size); -TS_PUBLIC extern void *(*ts_current_calloc)(size_t count, size_t size); -TS_PUBLIC extern void *(*ts_current_realloc)(void *ptr, size_t size); -TS_PUBLIC extern void (*ts_current_free)(void *ptr); - -// Allow clients to override allocation functions -#ifndef ts_malloc -#define ts_malloc ts_current_malloc -#endif -#ifndef ts_calloc -#define ts_calloc ts_current_calloc -#endif -#ifndef ts_realloc -#define ts_realloc ts_current_realloc -#endif -#ifndef ts_free -#define ts_free ts_current_free -#endif - -#ifdef __cplusplus -} -#endif - -#endif // TREE_SITTER_ALLOC_H_ diff --git a/src/editor/ts-grammars/tree_sitter/array.h b/src/editor/ts-grammars/tree_sitter/array.h deleted file mode 100644 index d965c6170c..0000000000 --- a/src/editor/ts-grammars/tree_sitter/array.h +++ /dev/null @@ -1,291 +0,0 @@ -#ifndef TREE_SITTER_ARRAY_H_ -#define TREE_SITTER_ARRAY_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "./alloc.h" -#include "./ts_assert.h" - -#include -#include -#include -#include - -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4101) -#elif defined(__GNUC__) || defined(__clang__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-variable" -#endif - -#define Array(T) \ - struct { \ - T *contents; \ - uint32_t size; \ - uint32_t capacity; \ - } - -/// Initialize an array. -#define array_init(self) \ - ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL) - -/// Create an empty array. -#define array_new() \ - { NULL, 0, 0 } - -/// Get a pointer to the element at a given `index` in the array. -#define array_get(self, _index) \ - (ts_assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index]) - -/// Get a pointer to the first element in the array. -#define array_front(self) array_get(self, 0) - -/// Get a pointer to the last element in the array. -#define array_back(self) array_get(self, (self)->size - 1) - -/// Clear the array, setting its size to zero. Note that this does not free any -/// memory allocated for the array's contents. -#define array_clear(self) ((self)->size = 0) - -/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is -/// less than the array's current capacity, this function has no effect. -#define array_reserve(self, new_capacity) \ - _array__reserve((Array *)(self), array_elem_size(self), new_capacity) - -/// Free any memory allocated for this array. Note that this does not free any -/// memory allocated for the array's contents. -#define array_delete(self) _array__delete((Array *)(self)) - -/// Push a new `element` onto the end of the array. -#define array_push(self, element) \ - (_array__grow((Array *)(self), 1, array_elem_size(self)), \ - (self)->contents[(self)->size++] = (element)) - -/// Increase the array's size by `count` elements. -/// New elements are zero-initialized. -#define array_grow_by(self, count) \ - do { \ - if ((count) == 0) break; \ - _array__grow((Array *)(self), count, array_elem_size(self)); \ - memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \ - (self)->size += (count); \ - } while (0) - -/// Append all elements from one array to the end of another. -#define array_push_all(self, other) \ - array_extend((self), (other)->size, (other)->contents) - -/// Append `count` elements to the end of the array, reading their values from the -/// `contents` pointer. -#define array_extend(self, count, contents) \ - _array__splice( \ - (Array *)(self), array_elem_size(self), (self)->size, \ - 0, count, contents \ - ) - -/// Remove `old_count` elements from the array starting at the given `index`. At -/// the same index, insert `new_count` new elements, reading their values from the -/// `new_contents` pointer. -#define array_splice(self, _index, old_count, new_count, new_contents) \ - _array__splice( \ - (Array *)(self), array_elem_size(self), _index, \ - old_count, new_count, new_contents \ - ) - -/// Insert one `element` into the array at the given `index`. -#define array_insert(self, _index, element) \ - _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element)) - -/// Remove one element from the array at the given `index`. -#define array_erase(self, _index) \ - _array__erase((Array *)(self), array_elem_size(self), _index) - -/// Pop the last element off the array, returning the element by value. -#define array_pop(self) ((self)->contents[--(self)->size]) - -/// Assign the contents of one array to another, reallocating if necessary. -#define array_assign(self, other) \ - _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self)) - -/// Swap one array with another -#define array_swap(self, other) \ - _array__swap((Array *)(self), (Array *)(other)) - -/// Get the size of the array contents -#define array_elem_size(self) (sizeof *(self)->contents) - -/// Search a sorted array for a given `needle` value, using the given `compare` -/// callback to determine the order. -/// -/// If an existing element is found to be equal to `needle`, then the `index` -/// out-parameter is set to the existing value's index, and the `exists` -/// out-parameter is set to true. Otherwise, `index` is set to an index where -/// `needle` should be inserted in order to preserve the sorting, and `exists` -/// is set to false. -#define array_search_sorted_with(self, compare, needle, _index, _exists) \ - _array__search_sorted(self, 0, compare, , needle, _index, _exists) - -/// Search a sorted array for a given `needle` value, using integer comparisons -/// of a given struct field (specified with a leading dot) to determine the order. -/// -/// See also `array_search_sorted_with`. -#define array_search_sorted_by(self, field, needle, _index, _exists) \ - _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists) - -/// Insert a given `value` into a sorted array, using the given `compare` -/// callback to determine the order. -#define array_insert_sorted_with(self, compare, value) \ - do { \ - unsigned _index, _exists; \ - array_search_sorted_with(self, compare, &(value), &_index, &_exists); \ - if (!_exists) array_insert(self, _index, value); \ - } while (0) - -/// Insert a given `value` into a sorted array, using integer comparisons of -/// a given struct field (specified with a leading dot) to determine the order. -/// -/// See also `array_search_sorted_by`. -#define array_insert_sorted_by(self, field, value) \ - do { \ - unsigned _index, _exists; \ - array_search_sorted_by(self, field, (value) field, &_index, &_exists); \ - if (!_exists) array_insert(self, _index, value); \ - } while (0) - -// Private - -typedef Array(void) Array; - -/// This is not what you're looking for, see `array_delete`. -static inline void _array__delete(Array *self) { - if (self->contents) { - ts_free(self->contents); - self->contents = NULL; - self->size = 0; - self->capacity = 0; - } -} - -/// This is not what you're looking for, see `array_erase`. -static inline void _array__erase(Array *self, size_t element_size, - uint32_t index) { - ts_assert(index < self->size); - char *contents = (char *)self->contents; - memmove(contents + index * element_size, contents + (index + 1) * element_size, - (self->size - index - 1) * element_size); - self->size--; -} - -/// This is not what you're looking for, see `array_reserve`. -static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) { - if (new_capacity > self->capacity) { - if (self->contents) { - self->contents = ts_realloc(self->contents, new_capacity * element_size); - } else { - self->contents = ts_malloc(new_capacity * element_size); - } - self->capacity = new_capacity; - } -} - -/// This is not what you're looking for, see `array_assign`. -static inline void _array__assign(Array *self, const Array *other, size_t element_size) { - _array__reserve(self, element_size, other->size); - self->size = other->size; - memcpy(self->contents, other->contents, self->size * element_size); -} - -/// This is not what you're looking for, see `array_swap`. -static inline void _array__swap(Array *self, Array *other) { - Array swap = *other; - *other = *self; - *self = swap; -} - -/// This is not what you're looking for, see `array_push` or `array_grow_by`. -static inline void _array__grow(Array *self, uint32_t count, size_t element_size) { - uint32_t new_size = self->size + count; - if (new_size > self->capacity) { - uint32_t new_capacity = self->capacity * 2; - if (new_capacity < 8) new_capacity = 8; - if (new_capacity < new_size) new_capacity = new_size; - _array__reserve(self, element_size, new_capacity); - } -} - -/// This is not what you're looking for, see `array_splice`. -static inline void _array__splice(Array *self, size_t element_size, - uint32_t index, uint32_t old_count, - uint32_t new_count, const void *elements) { - uint32_t new_size = self->size + new_count - old_count; - uint32_t old_end = index + old_count; - uint32_t new_end = index + new_count; - ts_assert(old_end <= self->size); - - _array__reserve(self, element_size, new_size); - - char *contents = (char *)self->contents; - if (self->size > old_end) { - memmove( - contents + new_end * element_size, - contents + old_end * element_size, - (self->size - old_end) * element_size - ); - } - if (new_count > 0) { - if (elements) { - memcpy( - (contents + index * element_size), - elements, - new_count * element_size - ); - } else { - memset( - (contents + index * element_size), - 0, - new_count * element_size - ); - } - } - self->size += new_count - old_count; -} - -/// A binary search routine, based on Rust's `std::slice::binary_search_by`. -/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`. -#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \ - do { \ - *(_index) = start; \ - *(_exists) = false; \ - uint32_t size = (self)->size - *(_index); \ - if (size == 0) break; \ - int comparison; \ - while (size > 1) { \ - uint32_t half_size = size / 2; \ - uint32_t mid_index = *(_index) + half_size; \ - comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \ - if (comparison <= 0) *(_index) = mid_index; \ - size -= half_size; \ - } \ - comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \ - if (comparison == 0) *(_exists) = true; \ - else if (comparison < 0) *(_index) += 1; \ - } while (0) - -/// Helper macro for the `_sorted_by` routines below. This takes the left (existing) -/// parameter by reference in order to work with the generic sorting function above. -#define _compare_int(a, b) ((int)*(a) - (int)(b)) - -#ifdef _MSC_VER -#pragma warning(pop) -#elif defined(__GNUC__) || defined(__clang__) -#pragma GCC diagnostic pop -#endif - -#ifdef __cplusplus -} -#endif - -#endif // TREE_SITTER_ARRAY_H_ diff --git a/src/editor/ts-grammars/tree_sitter/parser.h b/src/editor/ts-grammars/tree_sitter/parser.h deleted file mode 100644 index 890c9f0935..0000000000 --- a/src/editor/ts-grammars/tree_sitter/parser.h +++ /dev/null @@ -1,310 +0,0 @@ -#ifndef TREE_SITTER_PARSER_H_ -#define TREE_SITTER_PARSER_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include -#include - -#define ts_builtin_sym_error ((TSSymbol)-1) -#define ts_builtin_sym_end 0 -#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 - -#ifndef TREE_SITTER_API_H_ -typedef uint16_t TSStateId; -typedef uint16_t TSSymbol; -typedef uint16_t TSFieldId; -typedef struct TSLanguage TSLanguage; -typedef struct TSLanguageMetadata { - uint8_t major_version; - uint8_t minor_version; - uint8_t patch_version; -} TSLanguageMetadata; -#endif - -typedef struct { - TSFieldId field_id; - uint8_t child_index; - bool inherited; -} TSFieldMapEntry; - -// Used to index the field and supertype maps. -typedef struct { - uint16_t index; - uint16_t length; -} TSMapSlice; - -/* Backward compatibility for grammars generated with older tree-sitter versions */ -typedef TSMapSlice TSFieldMapSlice; - -typedef struct { - bool visible; - bool named; - bool supertype; -} TSSymbolMetadata; - -typedef struct TSLexer TSLexer; - -struct TSLexer { - int32_t lookahead; - TSSymbol result_symbol; - void (*advance)(TSLexer *, bool); - void (*mark_end)(TSLexer *); - uint32_t (*get_column)(TSLexer *); - bool (*is_at_included_range_start)(const TSLexer *); - bool (*eof)(const TSLexer *); - void (*log)(const TSLexer *, const char *, ...); -}; - -typedef enum { - TSParseActionTypeShift, - TSParseActionTypeReduce, - TSParseActionTypeAccept, - TSParseActionTypeRecover, -} TSParseActionType; - -typedef union { - struct { - uint8_t type; - TSStateId state; - bool extra; - bool repetition; - } shift; - struct { - uint8_t type; - uint8_t child_count; - TSSymbol symbol; - int16_t dynamic_precedence; - uint16_t production_id; - } reduce; - uint8_t type; -} TSParseAction; - -typedef struct { - uint16_t lex_state; - uint16_t external_lex_state; -} TSLexMode; - -typedef struct { - uint16_t lex_state; - uint16_t external_lex_state; - uint16_t reserved_word_set_id; -} TSLexerMode; - -typedef union { - TSParseAction action; - struct { - uint8_t count; - bool reusable; - } entry; - /* Anonymous struct for old grammars that use {.count = X, .reusable = Y} - without the .entry wrapper. Both forms access the same memory layout. */ - struct { - uint8_t count; - bool reusable; - }; -} TSParseActionEntry; - -typedef struct { - int32_t start; - int32_t end; -} TSCharacterRange; - -struct TSLanguage { - union { - uint32_t abi_version; - uint32_t version; /* backward compat for old grammars */ - }; - uint32_t symbol_count; - uint32_t alias_count; - uint32_t token_count; - uint32_t external_token_count; - uint32_t state_count; - uint32_t large_state_count; - uint32_t production_id_count; - uint32_t field_count; - uint16_t max_alias_sequence_length; - const uint16_t *parse_table; - const uint16_t *small_parse_table; - const uint32_t *small_parse_table_map; - const TSParseActionEntry *parse_actions; - const char * const *symbol_names; - const char * const *field_names; - const TSMapSlice *field_map_slices; - const TSFieldMapEntry *field_map_entries; - const TSSymbolMetadata *symbol_metadata; - const TSSymbol *public_symbol_map; - const uint16_t *alias_map; - const TSSymbol *alias_sequences; - const void *lex_modes; /* TSLexMode* (old) or TSLexerMode* (new) */ - bool (*lex_fn)(TSLexer *, TSStateId); - bool (*keyword_lex_fn)(TSLexer *, TSStateId); - TSSymbol keyword_capture_token; - struct { - const bool *states; - const TSSymbol *symbol_map; - void *(*create)(void); - void (*destroy)(void *); - bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist); - unsigned (*serialize)(void *, char *); - void (*deserialize)(void *, const char *, unsigned); - } external_scanner; - const TSStateId *primary_state_ids; - const char *name; - const TSSymbol *reserved_words; - uint16_t max_reserved_word_set_size; - uint32_t supertype_count; - const TSSymbol *supertype_symbols; - const TSMapSlice *supertype_map_slices; - const TSSymbol *supertype_map_entries; - TSLanguageMetadata metadata; -}; - -static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) { - uint32_t index = 0; - uint32_t size = len - index; - while (size > 1) { - uint32_t half_size = size / 2; - uint32_t mid_index = index + half_size; - const TSCharacterRange *range = &ranges[mid_index]; - if (lookahead >= range->start && lookahead <= range->end) { - return true; - } else if (lookahead > range->end) { - index = mid_index; - } - size -= half_size; - } - const TSCharacterRange *range = &ranges[index]; - return (lookahead >= range->start && lookahead <= range->end); -} - -/* - * Lexer Macros - */ - -#ifdef _MSC_VER -#define UNUSED __pragma(warning(suppress : 4101)) -#else -#define UNUSED __attribute__((unused)) -#endif - -#define START_LEXER() \ - bool result = false; \ - bool skip = false; \ - UNUSED \ - bool eof = false; \ - int32_t lookahead; \ - goto start; \ - next_state: \ - lexer->advance(lexer, skip); \ - start: \ - skip = false; \ - lookahead = lexer->lookahead; - -#define ADVANCE(state_value) \ - { \ - state = state_value; \ - goto next_state; \ - } - -#define ADVANCE_MAP(...) \ - { \ - static const uint16_t map[] = { __VA_ARGS__ }; \ - for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \ - if (map[i] == lookahead) { \ - state = map[i + 1]; \ - goto next_state; \ - } \ - } \ - } - -#define SKIP(state_value) \ - { \ - skip = true; \ - state = state_value; \ - goto next_state; \ - } - -#define ACCEPT_TOKEN(symbol_value) \ - result = true; \ - lexer->result_symbol = symbol_value; \ - lexer->mark_end(lexer); - -#define END_STATE() return result; - -/* - * Parse Table Macros - */ - -#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT) - -#define STATE(id) id - -#define ACTIONS(id) id - -#define SHIFT(state_value) \ - {{ \ - .shift = { \ - .type = TSParseActionTypeShift, \ - .state = (state_value) \ - } \ - }} - -#define SHIFT_REPEAT(state_value) \ - {{ \ - .shift = { \ - .type = TSParseActionTypeShift, \ - .state = (state_value), \ - .repetition = true \ - } \ - }} - -#define SHIFT_EXTRA() \ - {{ \ - .shift = { \ - .type = TSParseActionTypeShift, \ - .extra = true \ - } \ - }} - -/* - * Variadic REDUCE macro: supports all grammar generations without - * modifying downloaded grammar files. - * - * Supported forms: - * - REDUCE(sym, count) -- old (2 args) - * - REDUCE(sym, count, .production_id = N) -- mid (3 args) - * - REDUCE(sym, count, .dynamic_precedence = N) -- mid (3 args) - * - REDUCE(sym, count, 0, 0) -- new (4 positional) - * - REDUCE(sym, count, .dynamic_precedence = P, .production_id = N) -- new (4 named) - * - * Uses GNU C ##__VA_ARGS__ to handle all forms with a single macro. - * - * Key detail: .child_count and .symbol are initialized in struct field order - * (.child_count at index 1, .symbol at index 2) so that after .symbol the - * "current position" is at index 3 (dynamic_precedence). This means the - * 4-arg positional form "prec, prod_id" fills .dynamic_precedence and - * .production_id correctly. Named-designator forms simply override the - * fields they name. - */ -#define REDUCE(sym, cnt, ...) \ - {{ .reduce = { .type = TSParseActionTypeReduce, .child_count = (cnt), .symbol = (sym), ##__VA_ARGS__ } }} - -#define RECOVER() \ - {{ \ - .type = TSParseActionTypeRecover \ - }} - -#define ACCEPT_INPUT() \ - {{ \ - .type = TSParseActionTypeAccept \ - }} - -#ifdef __cplusplus -} -#endif - -#endif // TREE_SITTER_PARSER_H_ diff --git a/src/editor/ts-grammars/tree_sitter/ts_assert.h b/src/editor/ts-grammars/tree_sitter/ts_assert.h deleted file mode 100644 index 4cb8f36a12..0000000000 --- a/src/editor/ts-grammars/tree_sitter/ts_assert.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef TREE_SITTER_ASSERT_H_ -#define TREE_SITTER_ASSERT_H_ - -#ifdef NDEBUG -#define ts_assert(e) ((void)(e)) -#else -#include -#define ts_assert(e) assert(e) -#endif - -#endif // TREE_SITTER_ASSERT_H_ From edbb4dbeb77f513c59171e069ede9a7426837362 Mon Sep 17 00:00:00 2001 From: Jiri Tyr Date: Wed, 15 Apr 2026 10:42:13 +0100 Subject: [PATCH 14/16] Implementing code review comments Signed-off-by: Jiri Tyr --- INSTALL | 381 +----------------------------- m4.include/mc-with-tree-sitter.m4 | 3 +- 2 files changed, 3 insertions(+), 381 deletions(-) mode change 100644 => 120000 INSTALL diff --git a/INSTALL b/INSTALL deleted file mode 100644 index d43221a2ca..0000000000 --- a/INSTALL +++ /dev/null @@ -1,380 +0,0 @@ -Installation Instructions -************************* - -Basic Installation -================== - - The following shell commands: - - test -f configure || ./bootstrap - ./configure - make - make install - -should configure, build, and install this package. The first line, -which bootstraps, is intended for developers; when building from -distribution tarballs it does nothing and can be skipped. A package -might name the bootstrapping script differently; if the name is -‘autogen.sh’, for example, the first line should say ‘./autogen.sh’ -instead of ‘./bootstrap’. - - The following more-detailed instructions are generic; see the -‘README’ file for instructions specific to this package. Some packages -provide this ‘INSTALL’ file but do not implement all of the features -documented below. The lack of an optional feature in a given package is -not necessarily a bug. More recommendations for GNU packages can be -found in the GNU Coding Standards. - - Many packages have scripts meant for developers instead of ordinary -builders, as they may use developer tools that are less commonly -installed, or they may access the network, which has privacy -implications. These scripts attempt to bootstrap by building the -‘configure’ script and related files, possibly using developer tools or -the network. Because the output of bootstrapping is system-independent, -it is normally run by a package developer so that its output can be put -into the distribution tarball and ordinary builders and users need not -bootstrap. Some packages have commands like ‘./autopull.sh’ and -‘./autogen.sh’ that you can run instead of ‘./bootstrap’, for more -fine-grained control over bootstrapping. - - The ‘configure’ script attempts to guess correct values for various -system-dependent variables used during compilation. It uses those -values to create a ‘Makefile’ in each directory of the package. It may -also create one or more ‘.h’ files containing system-dependent -definitions. Finally, it creates a script ‘config.status’ that you can -run in the future to recreate the current configuration, and a file -‘config.log’ containing output useful for debugging ‘configure’. - - It can also use an optional file (typically called ‘config.cache’ and -enabled with ‘--cache-file=config.cache’ or simply ‘-C’) that saves the -results of its tests to speed up reconfiguring. Caching is disabled by -default to prevent problems with accidental use of stale cache files. - - If you need to do unusual things to compile the package, please try -to figure out how ‘configure’ could check whether to do them, and mail -diffs or instructions to the address given in the ‘README’ so they can -be considered for the next release. If you are using the cache, and at -some point ‘config.cache’ contains results you don’t want to keep, you -may remove or edit it. - - The ‘autoconf’ program generates ‘configure’ from the file -‘configure.ac’. Normally you should edit ‘configure.ac’ instead of -editing ‘configure’ directly. - - The simplest way to compile this package is: - - 1. ‘cd’ to the directory containing the package’s source code. - - 2. If this is a developer checkout and file ‘configure’ does not yet - exist, run the bootstrapping script (typically ‘./bootstrap’ or - ‘./autogen.sh’) to bootstrap and create the file. You may need - special developer tools and network access to bootstrap, and the - network access may have privacy implications. - - 3. Type ‘./configure’ to configure the package for your system. This - might take a while. While running, ‘configure’ prints messages - telling which features it is checking for. - - 4. Type ‘make’ to compile the package. - - 5. Optionally, type ‘make check’ to run any self-tests that come with - the package, generally using the just-built uninstalled binaries. - - 6. Type ‘make install’ to install the programs and any data files and - documentation. When installing into a prefix owned by root, it is - recommended that the package be configured and built as a regular - user, and only the ‘make install’ phase executed with root - privileges. - - 7. Optionally, type ‘make installcheck’ to repeat any self-tests, but - this time using the binaries in their final installed location. - This target does not install anything. Running this target as a - regular user, particularly if the prior ‘make install’ required - root privileges, verifies that the installation completed - correctly. - - 8. You can remove the program binaries and object files from the - source code directory by typing ‘make clean’. To also remove the - files that ‘configure’ created (so you can compile the package for - a different kind of computer), type ‘make distclean’. There is - also a ‘make maintainer-clean’ target, but that is intended mainly - for the package’s developers. If you use it, you may have to - bootstrap again. - - 9. If the package follows the GNU Coding Standards, you can type ‘make - uninstall’ to remove the installed files. - -Installation Prerequisites -========================== - - Installation requires a POSIX-like environment with a shell and at -least the following standard utilities: - - awk cat cp diff echo expr false ls mkdir mv printf pwd rm rmdir sed - sort test tr - -This package’s installation may need other standard utilities such as -‘grep’, ‘make’, ‘sleep’ and ‘touch’, along with compilers like ‘gcc’. - -Compilers and Options -===================== - - Some systems require unusual options for compilation or linking that -the ‘configure’ script does not know about. Run ‘./configure --help’ -for details on some of the pertinent environment variables. - - You can give ‘configure’ initial values for configuration parameters -by setting variables in the command line or in the environment. Here is -an example: - - ./configure CC=gcc CFLAGS=-g LIBS=-lposix - - See “Defining Variables” for more details. - -Compiling For Multiple Architectures -==================================== - - You can compile the package for more than one kind of computer at the -same time, by placing the object files for each system in their own -directory. To do this, you can use GNU ‘make’. ‘cd’ to the directory -where you want the object files and executables to go and run the -‘configure’ script. ‘configure’ automatically checks for the source -code in the directory that ‘configure’ is in and in ‘..’. This is known -as a “VPATH” build. - - With a non-GNU ‘make’, it is safer to compile the package for one -system at a time in the source code directory. After you have installed -the package for one system, use ‘make distclean’ before reconfiguring -for another system. - - Some platforms, notably macOS, support “fat” or “universal” binaries, -where a single binary can execute on different architectures. On these -platforms you can configure and compile just once, with options specific -to that platform. - -Installation Names -================== - - By default, ‘make install’ installs the package’s commands under -‘/usr/local/bin’, include files under ‘/usr/local/include’, etc. You -can specify an installation prefix other than ‘/usr/local’ by giving -‘configure’ the option ‘--prefix=PREFIX’, where PREFIX must be an -absolute file name. - - You can specify separate installation prefixes for -architecture-specific files and architecture-independent files. If you -pass the option ‘--exec-prefix=PREFIX’ to ‘configure’, the package uses -PREFIX as the prefix for installing programs and libraries. -Documentation and other data files still use the regular prefix. - - In addition, if you use an unusual directory layout you can give -options like ‘--bindir=DIR’ to specify different values for particular -kinds of files. Run ‘configure --help’ for a list of the directories -you can set and what kinds of files go in them. In general, the default -for these options is expressed in terms of ‘${prefix}’, so that -specifying just ‘--prefix’ will affect all of the other directory -specifications that were not explicitly provided. - - The most portable way to affect installation locations is to pass the -correct locations to ‘configure’; however, many packages provide one or -both of the following shortcuts of passing variable assignments to the -‘make install’ command line to change installation locations without -having to reconfigure or recompile. - - The first method involves providing an override variable for each -affected directory. For example, ‘make install -prefix=/alternate/directory’ will choose an alternate location for all -directory configuration variables that were expressed in terms of -‘${prefix}’. Any directories that were specified during ‘configure’, -but not in terms of ‘${prefix}’, must each be overridden at install time -for the entire installation to be relocated. The approach of makefile -variable overrides for each directory variable is required by the GNU -Coding Standards, and ideally causes no recompilation. However, some -platforms have known limitations with the semantics of shared libraries -that end up requiring recompilation when using this method, particularly -noticeable in packages that use GNU Libtool. - - The second method involves providing the ‘DESTDIR’ variable. For -example, ‘make install DESTDIR=/alternate/directory’ will prepend -‘/alternate/directory’ before all installation names. The approach of -‘DESTDIR’ overrides is not required by the GNU Coding Standards, and -does not work on platforms that have drive letters. On the other hand, -it does better at avoiding recompilation issues, and works well even -when some directory options were not specified in terms of ‘${prefix}’ -at ‘configure’ time. - -Optional Features -================= - - If the package supports it, you can cause programs to be installed -with an extra prefix or suffix on their names by giving ‘configure’ the -option ‘--program-prefix=PREFIX’ or ‘--program-suffix=SUFFIX’. - - Some packages pay attention to ‘--enable-FEATURE’ and -‘--disable-FEATURE’ options to ‘configure’, where FEATURE indicates an -optional part of the package. They may also pay attention to -‘--with-PACKAGE’ and ‘--without-PACKAGE’ options, where PACKAGE is -something like ‘gnu-ld’. ‘./configure --help’ should mention the -‘--enable-...’ and ‘--with-...’ options that the package recognizes. - - Some packages offer the ability to configure how verbose the -execution of ‘make’ will be. For these packages, running ‘./configure ---enable-silent-rules’ sets the default to minimal output, which can be -overridden with ‘make V=1’; while running ‘./configure ---disable-silent-rules’ sets the default to verbose, which can be -overridden with ‘make V=0’. - -Specifying a System Type -======================== - - By default ‘configure’ builds for the current system. To create -binaries that can run on a different system type, specify a -‘--host=TYPE’ option along with compiler variables that specify how to -generate object code for TYPE. For example, to create binaries intended -to run on a 64-bit ARM processor: - - ./configure --host=aarch64-linux-gnu \ - CC=aarch64-linux-gnu-gcc \ - CXX=aarch64-linux-gnu-g++ - -If done on a machine that can execute these binaries (e.g., via -‘qemu-aarch64’, ‘$QEMU_LD_PREFIX’, and Linux’s ‘binfmt_misc’ -capability), the build behaves like a native build. Otherwise it is a -cross-build: ‘configure’ will make cross-compilation guesses instead of -running test programs, and ‘make check’ will not work. - - A system type can either be a short name like ‘mingw64’, or a -canonical name like ‘x86_64-pc-linux-gnu’. Canonical names have the -form CPU-COMPANY-SYSTEM where SYSTEM is either OS or KERNEL-OS. To -canonicalize and validate a system type, you can run the command -‘config.sub’, which is often squirreled away in a subdirectory like -‘build-aux’. For example: - - $ build-aux/config.sub arm64-linux - aarch64-unknown-linux-gnu - $ build-aux/config.sub riscv-lnx - Invalid configuration 'riscv-lnx': OS 'lnx' not recognized - -You can look at the ‘config.sub’ file to see which types are recognized. -If the file is absent, this package does not need the system type. - - If ‘configure’ fails with the diagnostic “cannot guess build type”. -‘config.sub’ did not recognize your system’s type. In this case, first -fetch the newest versions of these files from the GNU config package -(https://savannah.gnu.org/projects/config). If that fixes things, -please report it to the maintainers of the package containing -‘configure’. Otherwise, you can try the configure option ‘--build=TYPE’ -where TYPE comes close to your system type; also, please report the -problem to . - - For more details about configuring system types, see the Autoconf -documentation. - -Sharing Defaults -================ - - If you want to set default values for ‘configure’ scripts to share, -you can create a site shell script called ‘config.site’ that gives -default values for variables like ‘CC’, ‘cache_file’, and ‘prefix’. -‘configure’ looks for ‘PREFIX/share/config.site’ if it exists, then -‘PREFIX/etc/config.site’ if it exists. Or, you can set the -‘CONFIG_SITE’ environment variable to the location of the site script. -A warning: not all ‘configure’ scripts look for a site script. - -Defining Variables -================== - - Variables not defined in a site shell script can be set in the -environment passed to ‘configure’. However, some packages may run -configure again during the build, and the customized values of these -variables may be lost. In order to avoid this problem, you should set -them in the ‘configure’ command line, using ‘VAR=value’. For example: - - ./configure CC=/usr/local2/bin/gcc - -causes the specified ‘gcc’ to be used as the C compiler (unless it is -overridden in the site shell script). - -Unfortunately, this technique does not work for ‘CONFIG_SHELL’ due to an -Autoconf limitation. Until the limitation is lifted, you can use this -workaround: - - CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash - -‘configure’ Invocation -====================== - - ‘configure’ recognizes the following options to control how it -operates. - -‘--help’ -‘-h’ - Print a summary of all of the options to ‘configure’, and exit. - -‘--help=short’ -‘--help=recursive’ - Print a summary of the options unique to this package’s - ‘configure’, and exit. The ‘short’ variant lists options used only - in the top level, while the ‘recursive’ variant lists options also - present in any nested packages. - -‘--version’ -‘-V’ - Print the version of Autoconf used to generate the ‘configure’ - script, and exit. - -‘--cache-file=FILE’ - Enable the cache: use and save the results of the tests in FILE, - traditionally ‘config.cache’. FILE defaults to ‘/dev/null’ to - disable caching. - -‘--config-cache’ -‘-C’ - Alias for ‘--cache-file=config.cache’. - -‘--srcdir=DIR’ - Look for the package’s source code in directory DIR. Usually - ‘configure’ can determine that directory automatically. - -‘--prefix=DIR’ - Use DIR as the installation prefix. See “Installation Names” for - more details, including other options available for fine-tuning the - installation locations. - -‘--host=TYPE’ - Build binaries for system TYPE. See “Specifying a System Type”. - -‘--enable-FEATURE’ -‘--disable-FEATURE’ - Enable or disable the optional FEATURE. See “Optional Features”. - -‘--with-PACKAGE’ -‘--without-PACKAGE’ - Use or omit PACKAGE when building. See “Optional Features”. - -‘--quiet’ -‘--silent’ -‘-q’ - Do not print messages saying which checks are being made. To - suppress all normal output, redirect it to ‘/dev/null’ (any error - messages will still be shown). - -‘--no-create’ -‘-n’ - Run the configure checks, but stop before creating any output - files. - -‘configure’ also recognizes several environment variables, and accepts -some other, less widely useful, options. Run ‘configure --help’ for -more details. - -Copyright notice -================ - - Copyright © 1994–1996, 1999–2002, 2004–2017, 2020–2025 Free Software -Foundation, Inc. - - Copying and distribution of this file, with or without modification, -are permitted in any medium without royalty provided the copyright -notice and this notice are preserved. This file is offered as-is, -without warranty of any kind. diff --git a/INSTALL b/INSTALL new file mode 120000 index 0000000000..1f38889c39 --- /dev/null +++ b/INSTALL @@ -0,0 +1 @@ +doc/INSTALL \ No newline at end of file diff --git a/m4.include/mc-with-tree-sitter.m4 b/m4.include/mc-with-tree-sitter.m4 index f0cf814568..191b82cd9a 100644 --- a/m4.include/mc-with-tree-sitter.m4 +++ b/m4.include/mc-with-tree-sitter.m4 @@ -62,7 +62,8 @@ AC_DEFUN([mc_WITH_TREE_SITTER], [ tree_sitter_grammars="" for a in "$ts_grammar_dir"/*/*.a; do test -f "$a" || continue - g=`basename "\`dirname \"$a\"\`"` + ts_grammar_a_dir=`dirname "$a"` + g=`basename "$ts_grammar_a_dir"` tree_sitter_grammars="$tree_sitter_grammars $g" done tree_sitter_grammars=`echo $tree_sitter_grammars` From 7a12b3783453f4ec382c3c223ce8eef561583d2d Mon Sep 17 00:00:00 2001 From: Jiri Tyr Date: Fri, 17 Apr 2026 21:07:33 +0100 Subject: [PATCH 15/16] New distribution model Signed-off-by: Jiri Tyr --- Makefile.am | 3 +- configure.ac | 2 - doc/TREE-SITTER | 857 +++++---------- m4.include/mc-with-tree-sitter.m4 | 108 +- misc/syntax-ts/Makefile.am | 30 +- misc/syntax-ts/colors.ini | 787 -------------- misc/syntax-ts/display-names | 160 --- misc/syntax-ts/extensions | 153 --- misc/syntax-ts/filenames | 20 - misc/syntax-ts/queries/ada-highlights.scm | 134 --- misc/syntax-ts/queries/asm-highlights.scm | 37 - misc/syntax-ts/queries/awk-highlights.scm | 89 -- misc/syntax-ts/queries/bash-highlights.scm | 192 ---- misc/syntax-ts/queries/bison-highlights.scm | 36 - misc/syntax-ts/queries/c-highlights.scm | 160 --- misc/syntax-ts/queries/c_sharp-highlights.scm | 145 --- misc/syntax-ts/queries/caddy-highlights.scm | 37 - misc/syntax-ts/queries/cmake-highlights.scm | 45 - misc/syntax-ts/queries/cobol-highlights.scm | 40 - .../queries/commonlisp-highlights.scm | 70 -- misc/syntax-ts/queries/cpp-highlights.scm | 163 --- misc/syntax-ts/queries/css-highlights.scm | 80 -- misc/syntax-ts/queries/cuda-highlights.scm | 128 --- misc/syntax-ts/queries/d-highlights.scm | 173 ---- misc/syntax-ts/queries/diff-highlights.scm | 35 - .../queries/dockerfile-highlights.scm | 59 -- misc/syntax-ts/queries/dot-highlights.scm | 58 -- misc/syntax-ts/queries/erlang-highlights.scm | 112 -- misc/syntax-ts/queries/fortran-highlights.scm | 150 --- misc/syntax-ts/queries/glsl-highlights.scm | 130 --- misc/syntax-ts/queries/go-highlights.scm | 110 -- misc/syntax-ts/queries/gotmpl-highlights.scm | 145 --- misc/syntax-ts/queries/haskell-highlights.scm | 79 -- misc/syntax-ts/queries/hcl-highlights.scm | 90 -- misc/syntax-ts/queries/html-highlights.scm | 35 - misc/syntax-ts/queries/idl-highlights.scm | 139 --- misc/syntax-ts/queries/ini-highlights.scm | 25 - misc/syntax-ts/queries/java-highlights.scm | 141 --- .../queries/javascript-highlights.scm | 133 --- misc/syntax-ts/queries/json-highlights.scm | 39 - misc/syntax-ts/queries/kotlin-highlights.scm | 134 --- misc/syntax-ts/queries/latex-highlights.scm | 401 ------- misc/syntax-ts/queries/lua-highlights.scm | 99 -- misc/syntax-ts/queries/mail-highlights.scm | 4 - misc/syntax-ts/queries/make-highlights.scm | 84 -- .../syntax-ts/queries/markdown-highlights.scm | 67 -- .../syntax-ts/queries/markdown-injections.scm | 33 - .../queries/markdown_inline-highlights.scm | 48 - misc/syntax-ts/queries/matlab-highlights.scm | 65 -- misc/syntax-ts/queries/meson-highlights.scm | 45 - misc/syntax-ts/queries/muttrc-highlights.scm | 48 - misc/syntax-ts/queries/ocaml-highlights.scm | 119 --- misc/syntax-ts/queries/pascal-highlights.scm | 105 -- misc/syntax-ts/queries/perl-highlights.scm | 113 -- misc/syntax-ts/queries/php-highlights.scm | 165 --- misc/syntax-ts/queries/po-highlights.scm | 30 - .../queries/properties-highlights.scm | 25 - misc/syntax-ts/queries/proto-highlights.scm | 94 -- misc/syntax-ts/queries/puppet-highlights.scm | 193 ---- misc/syntax-ts/queries/python-highlights.scm | 122 --- misc/syntax-ts/queries/qmljs-highlights.scm | 124 --- misc/syntax-ts/queries/r-highlights.scm | 90 -- misc/syntax-ts/queries/ruby-highlights.scm | 132 --- misc/syntax-ts/queries/rust-highlights.scm | 83 -- misc/syntax-ts/queries/scala-highlights.scm | 80 -- misc/syntax-ts/queries/slang-highlights.scm | 350 ------- .../queries/smalltalk-highlights.scm | 63 -- misc/syntax-ts/queries/sql-highlights.scm | 44 - misc/syntax-ts/queries/strace-highlights.scm | 44 - misc/syntax-ts/queries/swift-highlights.scm | 1 - misc/syntax-ts/queries/tcl-highlights.scm | 62 -- .../queries/terraform-highlights.scm | 95 -- misc/syntax-ts/queries/toml-highlights.scm | 43 - misc/syntax-ts/queries/turtle-highlights.scm | 64 -- .../queries/typescript-highlights.scm | 137 --- misc/syntax-ts/queries/verilog-highlights.scm | 124 --- misc/syntax-ts/queries/vhdl-highlights.scm | 152 --- misc/syntax-ts/queries/xml-highlights.scm | 43 - misc/syntax-ts/queries/yaml-highlights.scm | 63 -- misc/syntax-ts/shebangs | 15 - misc/syntax-ts/symbols | 15 - misc/syntax-ts/wrappers | 31 - scripts/ts-grammars-download.sh | 312 ------ src/Makefile.am | 4 - src/editor/Makefile.am | 16 +- src/editor/edit.c | 1 + src/editor/edit.h | 3 +- src/editor/editcmd.c | 47 +- src/editor/editdraw.c | 137 ++- src/editor/editoptions.c | 22 +- src/editor/syntax.c | 2 +- src/editor/syntax_ts.c | 954 ++++++++--------- src/editor/syntax_ts.h | 1 + src/editor/ts-grammar-loader.h | 10 +- src/editor/ts-grammars/.gitignore | 3 - src/editor/ts-grammars/Makefile.am | 60 -- src/editor/ts-grammars/ts-grammar-registry.h | 979 ------------------ src/setup.c | 3 + tests/src/editor/Makefile.am | 22 +- tests/src/editor/edit_syntax_ts.c | 51 +- tests/syntax/mc-syntax-dump.c | 411 +++++++- tests/syntax/samples/Caddyfile | 122 --- tests/syntax/samples/Caddyfile-report.md | 78 -- tests/syntax/samples/Dockerfile | 78 -- tests/syntax/samples/Dockerfile-report.md | 66 -- tests/syntax/samples/Makefile-report.md | 93 -- tests/syntax/samples/ada-report.md | 71 -- tests/syntax/samples/ada.adb | 159 --- tests/syntax/samples/asm-report.md | 61 -- tests/syntax/samples/asm.asm | 134 --- tests/syntax/samples/awk-report.md | 86 -- tests/syntax/samples/awk.awk | 177 ---- tests/syntax/samples/bash-report.md | 70 -- tests/syntax/samples/bash.sh | 144 --- tests/syntax/samples/c-report.md | 65 -- tests/syntax/samples/c.c | 207 ---- tests/syntax/samples/c_sharp-report.md | 83 -- tests/syntax/samples/c_sharp.cs | 229 ---- tests/syntax/samples/cmake-report.md | 73 -- tests/syntax/samples/cmake.cmake | 113 -- tests/syntax/samples/cobol-report.md | 63 -- tests/syntax/samples/cobol.cob | 113 -- tests/syntax/samples/cpp-report.md | 85 -- tests/syntax/samples/cpp.cpp | 207 ---- tests/syntax/samples/css-report.md | 86 -- tests/syntax/samples/css.css | 212 ---- tests/syntax/samples/cuda-report.md | 91 -- tests/syntax/samples/cuda.cu | 206 ---- tests/syntax/samples/d-report.md | 94 -- tests/syntax/samples/d.d | 205 ---- tests/syntax/samples/diff-report.md | 49 - tests/syntax/samples/diff.diff | 61 -- tests/syntax/samples/dot-report.md | 68 -- tests/syntax/samples/dot.dot | 128 --- tests/syntax/samples/erlang-report.md | 78 -- tests/syntax/samples/erlang.erl | 172 --- tests/syntax/samples/fortran-report.md | 83 -- tests/syntax/samples/fortran.f90 | 178 ---- tests/syntax/samples/glsl-report.md | 82 -- tests/syntax/samples/glsl.glsl | 152 --- tests/syntax/samples/go-report.md | 78 -- tests/syntax/samples/go.go | 193 ---- tests/syntax/samples/gotmpl-report.md | 100 -- tests/syntax/samples/gotmpl.tmpl | 234 ----- tests/syntax/samples/haskell-report.md | 77 -- tests/syntax/samples/haskell.hs | 138 --- tests/syntax/samples/hcl-report.md | 63 -- tests/syntax/samples/hcl.hcl | 130 --- tests/syntax/samples/html-report.md | 60 -- tests/syntax/samples/html.html | 116 --- tests/syntax/samples/idl-report.md | 70 -- tests/syntax/samples/idl.idl | 164 --- tests/syntax/samples/ini-report.md | 51 - tests/syntax/samples/ini.ini | 57 - tests/syntax/samples/java-report.md | 88 -- tests/syntax/samples/java.java | 199 ---- tests/syntax/samples/javascript-report.md | 66 -- tests/syntax/samples/javascript.js | 131 --- tests/syntax/samples/json-report.md | 62 -- tests/syntax/samples/json.json | 95 -- tests/syntax/samples/kotlin-report.md | 99 -- tests/syntax/samples/kotlin.kt | 240 ----- tests/syntax/samples/latex-report.md | 70 -- tests/syntax/samples/latex.tex | 133 --- tests/syntax/samples/lisp-report.md | 66 -- tests/syntax/samples/lisp.lisp | 128 --- tests/syntax/samples/lua-report.md | 72 -- tests/syntax/samples/lua.lua | 196 ---- tests/syntax/samples/mail-report.md | 75 -- tests/syntax/samples/mail.mail | 65 -- tests/syntax/samples/markdown-report.md | 92 -- tests/syntax/samples/markdown.md | 91 -- tests/syntax/samples/matlab-report.md | 41 - tests/syntax/samples/matlab.m | 140 --- tests/syntax/samples/meson.build | 164 --- tests/syntax/samples/meson.build-report.md | 69 -- tests/syntax/samples/muttrc | 104 -- tests/syntax/samples/muttrc-report.md | 78 -- tests/syntax/samples/ocaml-report.md | 89 -- tests/syntax/samples/ocaml.ml | 184 ---- tests/syntax/samples/pascal-report.md | 76 -- tests/syntax/samples/pascal.pas | 228 ---- tests/syntax/samples/perl-report.md | 84 -- tests/syntax/samples/perl.pl | 156 --- tests/syntax/samples/php-report.md | 87 -- tests/syntax/samples/php.php | 259 ----- tests/syntax/samples/po-report.md | 73 -- tests/syntax/samples/po.po | 105 -- tests/syntax/samples/properties-report.md | 75 -- tests/syntax/samples/properties.properties | 76 -- tests/syntax/samples/proto-report.md | 67 -- tests/syntax/samples/proto.proto | 149 --- tests/syntax/samples/puppet-report.md | 74 -- tests/syntax/samples/puppet.pp | 154 --- tests/syntax/samples/python-report.md | 82 -- tests/syntax/samples/python.py | 180 ---- tests/syntax/samples/r-report.md | 87 -- tests/syntax/samples/r.r | 158 --- tests/syntax/samples/ruby-report.md | 84 -- tests/syntax/samples/ruby.rb | 193 ---- tests/syntax/samples/rust-report.md | 79 -- tests/syntax/samples/rust.rs | 229 ---- tests/syntax/samples/slang-report.md | 62 -- tests/syntax/samples/slang.sl | 137 --- tests/syntax/samples/smalltalk-report.md | 77 -- tests/syntax/samples/smalltalk.st | 125 --- tests/syntax/samples/sql-report.md | 55 - tests/syntax/samples/sql.sql | 128 --- tests/syntax/samples/strace-report.md | 71 -- tests/syntax/samples/strace.strace | 58 -- tests/syntax/samples/swift-report.md | 65 -- tests/syntax/samples/swift.swift | 222 ---- tests/syntax/samples/tcl-report.md | 72 -- tests/syntax/samples/tcl.tcl | 137 --- tests/syntax/samples/terraform.tf | 189 ---- tests/syntax/samples/terraform.tf-report.md | 84 -- tests/syntax/samples/toml-report.md | 74 -- tests/syntax/samples/toml.toml | 126 --- tests/syntax/samples/turtle-report.md | 73 -- tests/syntax/samples/turtle.ttl | 114 -- tests/syntax/samples/typescript-report.md | 92 -- tests/syntax/samples/typescript.ts | 148 --- tests/syntax/samples/verilog-report.md | 72 -- tests/syntax/samples/verilog.v | 152 --- tests/syntax/samples/vhdl-report.md | 92 -- tests/syntax/samples/vhdl.vhd | 152 --- tests/syntax/samples/xml-report.md | 69 -- tests/syntax/samples/xml.xml | 118 --- tests/syntax/samples/yaml-report.md | 66 -- tests/syntax/samples/yaml.yaml | 102 -- 230 files changed, 1233 insertions(+), 25360 deletions(-) delete mode 100644 misc/syntax-ts/colors.ini delete mode 100644 misc/syntax-ts/display-names delete mode 100644 misc/syntax-ts/extensions delete mode 100644 misc/syntax-ts/filenames delete mode 100644 misc/syntax-ts/queries/ada-highlights.scm delete mode 100644 misc/syntax-ts/queries/asm-highlights.scm delete mode 100644 misc/syntax-ts/queries/awk-highlights.scm delete mode 100644 misc/syntax-ts/queries/bash-highlights.scm delete mode 100644 misc/syntax-ts/queries/bison-highlights.scm delete mode 100644 misc/syntax-ts/queries/c-highlights.scm delete mode 100644 misc/syntax-ts/queries/c_sharp-highlights.scm delete mode 100644 misc/syntax-ts/queries/caddy-highlights.scm delete mode 100644 misc/syntax-ts/queries/cmake-highlights.scm delete mode 100644 misc/syntax-ts/queries/cobol-highlights.scm delete mode 100644 misc/syntax-ts/queries/commonlisp-highlights.scm delete mode 100644 misc/syntax-ts/queries/cpp-highlights.scm delete mode 100644 misc/syntax-ts/queries/css-highlights.scm delete mode 100644 misc/syntax-ts/queries/cuda-highlights.scm delete mode 100644 misc/syntax-ts/queries/d-highlights.scm delete mode 100644 misc/syntax-ts/queries/diff-highlights.scm delete mode 100644 misc/syntax-ts/queries/dockerfile-highlights.scm delete mode 100644 misc/syntax-ts/queries/dot-highlights.scm delete mode 100644 misc/syntax-ts/queries/erlang-highlights.scm delete mode 100644 misc/syntax-ts/queries/fortran-highlights.scm delete mode 100644 misc/syntax-ts/queries/glsl-highlights.scm delete mode 100644 misc/syntax-ts/queries/go-highlights.scm delete mode 100644 misc/syntax-ts/queries/gotmpl-highlights.scm delete mode 100644 misc/syntax-ts/queries/haskell-highlights.scm delete mode 100644 misc/syntax-ts/queries/hcl-highlights.scm delete mode 100644 misc/syntax-ts/queries/html-highlights.scm delete mode 100644 misc/syntax-ts/queries/idl-highlights.scm delete mode 100644 misc/syntax-ts/queries/ini-highlights.scm delete mode 100644 misc/syntax-ts/queries/java-highlights.scm delete mode 100644 misc/syntax-ts/queries/javascript-highlights.scm delete mode 100644 misc/syntax-ts/queries/json-highlights.scm delete mode 100644 misc/syntax-ts/queries/kotlin-highlights.scm delete mode 100644 misc/syntax-ts/queries/latex-highlights.scm delete mode 100644 misc/syntax-ts/queries/lua-highlights.scm delete mode 100644 misc/syntax-ts/queries/mail-highlights.scm delete mode 100644 misc/syntax-ts/queries/make-highlights.scm delete mode 100644 misc/syntax-ts/queries/markdown-highlights.scm delete mode 100644 misc/syntax-ts/queries/markdown-injections.scm delete mode 100644 misc/syntax-ts/queries/markdown_inline-highlights.scm delete mode 100644 misc/syntax-ts/queries/matlab-highlights.scm delete mode 100644 misc/syntax-ts/queries/meson-highlights.scm delete mode 100644 misc/syntax-ts/queries/muttrc-highlights.scm delete mode 100644 misc/syntax-ts/queries/ocaml-highlights.scm delete mode 100644 misc/syntax-ts/queries/pascal-highlights.scm delete mode 100644 misc/syntax-ts/queries/perl-highlights.scm delete mode 100644 misc/syntax-ts/queries/php-highlights.scm delete mode 100644 misc/syntax-ts/queries/po-highlights.scm delete mode 100644 misc/syntax-ts/queries/properties-highlights.scm delete mode 100644 misc/syntax-ts/queries/proto-highlights.scm delete mode 100644 misc/syntax-ts/queries/puppet-highlights.scm delete mode 100644 misc/syntax-ts/queries/python-highlights.scm delete mode 100644 misc/syntax-ts/queries/qmljs-highlights.scm delete mode 100644 misc/syntax-ts/queries/r-highlights.scm delete mode 100644 misc/syntax-ts/queries/ruby-highlights.scm delete mode 100644 misc/syntax-ts/queries/rust-highlights.scm delete mode 100644 misc/syntax-ts/queries/scala-highlights.scm delete mode 100644 misc/syntax-ts/queries/slang-highlights.scm delete mode 100644 misc/syntax-ts/queries/smalltalk-highlights.scm delete mode 100644 misc/syntax-ts/queries/sql-highlights.scm delete mode 100644 misc/syntax-ts/queries/strace-highlights.scm delete mode 100644 misc/syntax-ts/queries/swift-highlights.scm delete mode 100644 misc/syntax-ts/queries/tcl-highlights.scm delete mode 100644 misc/syntax-ts/queries/terraform-highlights.scm delete mode 100644 misc/syntax-ts/queries/toml-highlights.scm delete mode 100644 misc/syntax-ts/queries/turtle-highlights.scm delete mode 100644 misc/syntax-ts/queries/typescript-highlights.scm delete mode 100644 misc/syntax-ts/queries/verilog-highlights.scm delete mode 100644 misc/syntax-ts/queries/vhdl-highlights.scm delete mode 100644 misc/syntax-ts/queries/xml-highlights.scm delete mode 100644 misc/syntax-ts/queries/yaml-highlights.scm delete mode 100644 misc/syntax-ts/shebangs delete mode 100644 misc/syntax-ts/symbols delete mode 100644 misc/syntax-ts/wrappers delete mode 100755 scripts/ts-grammars-download.sh delete mode 100644 src/editor/ts-grammars/.gitignore delete mode 100644 src/editor/ts-grammars/Makefile.am delete mode 100644 src/editor/ts-grammars/ts-grammar-registry.h delete mode 100644 tests/syntax/samples/Caddyfile delete mode 100644 tests/syntax/samples/Caddyfile-report.md delete mode 100644 tests/syntax/samples/Dockerfile delete mode 100644 tests/syntax/samples/Dockerfile-report.md delete mode 100644 tests/syntax/samples/Makefile-report.md delete mode 100644 tests/syntax/samples/ada-report.md delete mode 100644 tests/syntax/samples/ada.adb delete mode 100644 tests/syntax/samples/asm-report.md delete mode 100644 tests/syntax/samples/asm.asm delete mode 100644 tests/syntax/samples/awk-report.md delete mode 100644 tests/syntax/samples/awk.awk delete mode 100644 tests/syntax/samples/bash-report.md delete mode 100644 tests/syntax/samples/bash.sh delete mode 100644 tests/syntax/samples/c-report.md delete mode 100644 tests/syntax/samples/c.c delete mode 100644 tests/syntax/samples/c_sharp-report.md delete mode 100644 tests/syntax/samples/c_sharp.cs delete mode 100644 tests/syntax/samples/cmake-report.md delete mode 100644 tests/syntax/samples/cmake.cmake delete mode 100644 tests/syntax/samples/cobol-report.md delete mode 100644 tests/syntax/samples/cobol.cob delete mode 100644 tests/syntax/samples/cpp-report.md delete mode 100644 tests/syntax/samples/cpp.cpp delete mode 100644 tests/syntax/samples/css-report.md delete mode 100644 tests/syntax/samples/css.css delete mode 100644 tests/syntax/samples/cuda-report.md delete mode 100644 tests/syntax/samples/cuda.cu delete mode 100644 tests/syntax/samples/d-report.md delete mode 100644 tests/syntax/samples/d.d delete mode 100644 tests/syntax/samples/diff-report.md delete mode 100644 tests/syntax/samples/diff.diff delete mode 100644 tests/syntax/samples/dot-report.md delete mode 100644 tests/syntax/samples/dot.dot delete mode 100644 tests/syntax/samples/erlang-report.md delete mode 100644 tests/syntax/samples/erlang.erl delete mode 100644 tests/syntax/samples/fortran-report.md delete mode 100644 tests/syntax/samples/fortran.f90 delete mode 100644 tests/syntax/samples/glsl-report.md delete mode 100644 tests/syntax/samples/glsl.glsl delete mode 100644 tests/syntax/samples/go-report.md delete mode 100644 tests/syntax/samples/go.go delete mode 100644 tests/syntax/samples/gotmpl-report.md delete mode 100644 tests/syntax/samples/gotmpl.tmpl delete mode 100644 tests/syntax/samples/haskell-report.md delete mode 100644 tests/syntax/samples/haskell.hs delete mode 100644 tests/syntax/samples/hcl-report.md delete mode 100644 tests/syntax/samples/hcl.hcl delete mode 100644 tests/syntax/samples/html-report.md delete mode 100644 tests/syntax/samples/html.html delete mode 100644 tests/syntax/samples/idl-report.md delete mode 100644 tests/syntax/samples/idl.idl delete mode 100644 tests/syntax/samples/ini-report.md delete mode 100644 tests/syntax/samples/ini.ini delete mode 100644 tests/syntax/samples/java-report.md delete mode 100644 tests/syntax/samples/java.java delete mode 100644 tests/syntax/samples/javascript-report.md delete mode 100644 tests/syntax/samples/javascript.js delete mode 100644 tests/syntax/samples/json-report.md delete mode 100644 tests/syntax/samples/json.json delete mode 100644 tests/syntax/samples/kotlin-report.md delete mode 100644 tests/syntax/samples/kotlin.kt delete mode 100644 tests/syntax/samples/latex-report.md delete mode 100644 tests/syntax/samples/latex.tex delete mode 100644 tests/syntax/samples/lisp-report.md delete mode 100644 tests/syntax/samples/lisp.lisp delete mode 100644 tests/syntax/samples/lua-report.md delete mode 100644 tests/syntax/samples/lua.lua delete mode 100644 tests/syntax/samples/mail-report.md delete mode 100644 tests/syntax/samples/mail.mail delete mode 100644 tests/syntax/samples/markdown-report.md delete mode 100644 tests/syntax/samples/markdown.md delete mode 100644 tests/syntax/samples/matlab-report.md delete mode 100644 tests/syntax/samples/matlab.m delete mode 100644 tests/syntax/samples/meson.build delete mode 100644 tests/syntax/samples/meson.build-report.md delete mode 100644 tests/syntax/samples/muttrc delete mode 100644 tests/syntax/samples/muttrc-report.md delete mode 100644 tests/syntax/samples/ocaml-report.md delete mode 100644 tests/syntax/samples/ocaml.ml delete mode 100644 tests/syntax/samples/pascal-report.md delete mode 100644 tests/syntax/samples/pascal.pas delete mode 100644 tests/syntax/samples/perl-report.md delete mode 100644 tests/syntax/samples/perl.pl delete mode 100644 tests/syntax/samples/php-report.md delete mode 100644 tests/syntax/samples/php.php delete mode 100644 tests/syntax/samples/po-report.md delete mode 100644 tests/syntax/samples/po.po delete mode 100644 tests/syntax/samples/properties-report.md delete mode 100644 tests/syntax/samples/properties.properties delete mode 100644 tests/syntax/samples/proto-report.md delete mode 100644 tests/syntax/samples/proto.proto delete mode 100644 tests/syntax/samples/puppet-report.md delete mode 100644 tests/syntax/samples/puppet.pp delete mode 100644 tests/syntax/samples/python-report.md delete mode 100644 tests/syntax/samples/python.py delete mode 100644 tests/syntax/samples/r-report.md delete mode 100644 tests/syntax/samples/r.r delete mode 100644 tests/syntax/samples/ruby-report.md delete mode 100644 tests/syntax/samples/ruby.rb delete mode 100644 tests/syntax/samples/rust-report.md delete mode 100644 tests/syntax/samples/rust.rs delete mode 100644 tests/syntax/samples/slang-report.md delete mode 100644 tests/syntax/samples/slang.sl delete mode 100644 tests/syntax/samples/smalltalk-report.md delete mode 100644 tests/syntax/samples/smalltalk.st delete mode 100644 tests/syntax/samples/sql-report.md delete mode 100644 tests/syntax/samples/sql.sql delete mode 100644 tests/syntax/samples/strace-report.md delete mode 100644 tests/syntax/samples/strace.strace delete mode 100644 tests/syntax/samples/swift-report.md delete mode 100644 tests/syntax/samples/swift.swift delete mode 100644 tests/syntax/samples/tcl-report.md delete mode 100644 tests/syntax/samples/tcl.tcl delete mode 100644 tests/syntax/samples/terraform.tf delete mode 100644 tests/syntax/samples/terraform.tf-report.md delete mode 100644 tests/syntax/samples/toml-report.md delete mode 100644 tests/syntax/samples/toml.toml delete mode 100644 tests/syntax/samples/turtle-report.md delete mode 100644 tests/syntax/samples/turtle.ttl delete mode 100644 tests/syntax/samples/typescript-report.md delete mode 100644 tests/syntax/samples/typescript.ts delete mode 100644 tests/syntax/samples/verilog-report.md delete mode 100644 tests/syntax/samples/verilog.v delete mode 100644 tests/syntax/samples/vhdl-report.md delete mode 100644 tests/syntax/samples/vhdl.vhd delete mode 100644 tests/syntax/samples/xml-report.md delete mode 100644 tests/syntax/samples/xml.xml delete mode 100644 tests/syntax/samples/yaml-report.md delete mode 100644 tests/syntax/samples/yaml.yaml diff --git a/Makefile.am b/Makefile.am index 0a1ee80009..378cbf0c00 100644 --- a/Makefile.am +++ b/Makefile.am @@ -11,8 +11,7 @@ EXTRA_DIST = dist_noinst_SCRIPTS = \ build-glib2.sh \ - version.sh \ - scripts/ts-grammars-download.sh + version.sh dist_noinst_HEADERS = $(top_srcdir)/mc-version.h diff --git a/configure.ac b/configure.ac index fc16dabbe2..cda268f01c 100644 --- a/configure.ac +++ b/configure.ac @@ -549,7 +549,6 @@ AM_CONDITIONAL(USE_SCREEN_SLANG, [test x"$with_screen" = xslang]) AM_CONDITIONAL(USE_INTERNAL_EDIT, [test x"$use_internal_edit" = xyes ]) AM_CONDITIONAL(USE_ASPELL, [test x"$enable_aspell" = xyes ]) AM_CONDITIONAL(USE_TREE_SITTER, [test x"$with_tree_sitter" = xyes]) -AM_CONDITIONAL(TREE_SITTER_STATIC, [test x"$with_tree_sitter" = xyes -a x"$with_tree_sitter_static" = xyes]) AM_CONDITIONAL(USE_DIFF, [test -n "$use_diff"]) AM_CONDITIONAL(CONS_SAVER, [test -n "$cons_saver"]) dnl Clarify do we really need GModule @@ -591,7 +590,6 @@ misc/mc.ext.ini src/Makefile src/consaver/Makefile src/editor/Makefile -src/editor/ts-grammars/Makefile src/man2hlp/Makefile src/subshell/Makefile src/viewer/Makefile diff --git a/doc/TREE-SITTER b/doc/TREE-SITTER index 00afa00f73..e351acd4a2 100644 --- a/doc/TREE-SITTER +++ b/doc/TREE-SITTER @@ -6,20 +6,21 @@ Contents * Introduction * Building with tree-sitter support -* Downloading grammars +* Installing grammars * How it works * Language injection * Wrapper grammars * Parse error highlighting * Syntax highlighting modes * File layout -* Grammar configuration files -* Highlight query files (queries/*.scm) +* Per-grammar configuration (config.ini) +* Highlight query files * Color mapping * Adding a new language * Removing or disabling a language -* Updating a grammar +* Updating grammars * Validating query files +* Testing with mc-syntax-dump * Troubleshooting * Limitations @@ -41,11 +42,14 @@ regex-based highlighting from *.syntax files. The two highlighting systems share the same rendering path: both produce a color pair integer per byte, which is consumed by editdraw.c. -69 grammars have MC-curated query files (68 languages + -markdown_inline for injection), with language injection configured -via standard injections.scm query files. 358 grammar libraries are -available for download from the grammars repository; distros or users -can add query files for additional languages. +MC ships no grammar data. Grammar libraries (.so), query files (.scm), +and per-grammar configuration (config.ini) are provided by the external +mc-ts-grammars repository (https://github.com/jtyr/mc-ts-grammars) and +installed via the mc-ts-grammar tool or by distro packages. + +68 languages have MC-curated query files and configuration. Distros or +users can add support for additional languages by providing their own +query files and config.ini. Building with tree-sitter support @@ -55,92 +59,55 @@ Requirements: - libtree-sitter >= 0.22 (development headers and shared library) - https://github.com/tree-sitter/tree-sitter -- gmodule-2.0 (for shared mode, part of GLib) - -There are two build modes for grammars: shared (default) and static. +- gmodule-2.0 (part of GLib, for loading grammar .so modules) -Shared mode (default) -- each grammar is a pre-built .so module loaded -at runtime via g_module_open(). The mc binary stays small (~5 MB) -regardless of how many grammars are available. Distros can package -each grammar separately: +Build: ./configure --with-tree-sitter make -j$(nproc) -Static mode -- selected grammars are linked into the mc binary from -pre-built .a static libraries. No runtime module loading, but the -binary grows with each grammar included: - - ./configure --with-tree-sitter --with-tree-sitter-static - make -j$(nproc) - -To build with only specific grammars (static mode only): - - ./configure --with-tree-sitter --with-tree-sitter-static \ - --with-tree-sitter-grammars=c,python,bash +Grammar .so modules are loaded at runtime via g_module_open(). The mc +binary stays small regardless of how many grammars are installed. -In static mode, the default is 'all', which auto-discovers all .a -files present in src/editor/ts-grammars/. Grammar .a files must be -downloaded before building (see "Downloading grammars" below). - -Binary size comparison: -- Without tree-sitter: 5.0 MB -- Shared mode (any grammar count): 5.1 MB (grammars in separate .so) -- Static mode, 3 grammars: 7.4 MB -- Static mode, 10 grammars: 10 MB - -To build with the legacy highlighting only (default): +To build without tree-sitter (default): ./configure make -j$(nproc) -Both build modes are always supported. The --with-tree-sitter flag -controls which backend is compiled. When --with-tree-sitter is not -used, no tree-sitter headers or libraries are required and the -tree-sitter code is excluded via #ifdef HAVE_TREE_SITTER guards. - - -Downloading grammars --------------------- - -Grammar libraries are not stored in the MC git repository. They must -be downloaded before building using the download script. +When --with-tree-sitter is not used, no tree-sitter headers or libraries +are required and the tree-sitter code is excluded via #ifdef +HAVE_TREE_SITTER guards. -Pre-built grammars are hosted in the tree-sitter-grammars repository -(https://github.com/jtyr/tree-sitter-grammars) and published as -versioned release tarballs for multiple platforms. -For shared mode (pre-built .so modules): +Installing grammars +------------------- - scripts/ts-grammars-download.sh --shared +Grammars are installed separately from MC using the mc-ts-grammar tool +from the mc-ts-grammars repository: -This downloads platform-specific .so files into ts-grammars-shared/. -Only grammars that have a corresponding MC-curated query file in -misc/syntax-ts/queries/ will be functional. The rest are available -for distros or users who provide their own query files. + # Install all available grammars from the latest release + mc-ts-grammar install --all -For static mode (pre-built .a static libraries): + # Install specific grammars + mc-ts-grammar install python bash yaml markdown - scripts/ts-grammars-download.sh --static + # Build grammars from source (requires tree-sitter CLI + C compiler) + git clone https://github.com/jtyr/mc-ts-grammars + cd mc-ts-grammars + mc-ts-grammar build --install -This downloads platform-specific .a files into -src/editor/ts-grammars//.a. All grammars are downloaded; -only those with matching query files will have active highlighting. +Grammars are installed into per-grammar directories: -Both modes can be combined: + ~/.local/share/mc/syntax-ts//config.ini + ~/.local/share/mc/syntax-ts//highlights.scm + ~/.local/share/mc/syntax-ts//injections.scm (if applicable) + ~/.local/lib/mc/ts-grammars/.so - scripts/ts-grammars-download.sh --shared --static +Distros can install grammars into system paths: -Additional options: - - --latest Use latest release instead of pinned version - --platform=PLATFORM Override platform auto-detection - (x86_64-linux, aarch64-linux, aarch64-macos, - x86_64-macos, x86_64-windows) - -The download script pins grammar versions for reproducible builds. -The pinned version is set in the TS_GRAMMARS_VERSION variable at the -top of the script. + /usr/share/mc/syntax-ts//config.ini + /usr/share/mc/syntax-ts//highlights.scm + $(libdir)/mc/ts-grammars/.so How it works @@ -151,19 +118,19 @@ The renderer in editdraw.c calls this function for each byte position to get a color pair. When tree-sitter is active the following happens: 1. Initialization (edit_load_syntax / ts_init_for_file): - - The grammar config files are consulted to find a grammar matching - the current file. Lookup precedence: filenames (exact basename - match) > shebangs (interpreter match) > extensions (suffix match). - - The grammar is looked up via ts_grammar_registry_lookup(): - * Shared mode: g_module_open() loads the grammar's .so module on - demand from the grammar module directory. - * Static mode: a compile-time registry maps names to functions. + - The grammar registry is loaded on first use by scanning all + /share/mc/syntax-ts/*/config.ini files (user-local first, + then system). This builds lookup tables for extensions, filenames, + shebangs, display names, symbols, wrappers, and colors. + - The file is matched against a grammar using these lookup tables. + Precedence: filenames > shebangs > extensions. + - The grammar .so module is loaded via g_module_open() from + ~/.local/lib/mc/ts-grammars/ or $(libdir)/mc/ts-grammars/. - A TSParser is created for the grammar. - - The corresponding highlight query file (-highlights.scm) is - loaded and compiled with ts_query_new(). + - The highlight query file (/highlights.scm) is loaded and + compiled with ts_query_new(). - If an injections.scm query file exists for this grammar, injection - parsers and queries are initialized (multiple injections per - grammar supported). + parsers and queries are initialized. - If any step fails, ts_active is set to FALSE and the legacy system takes over transparently. @@ -177,8 +144,7 @@ get a color pair. When tree-sitter is active the following happens: - When the buffer is modified (insert, delete, backspace), the function edit_syntax_ts_notify_edit() is called from edit.c. - It calls ts_tree_edit() with the byte range of the change. The - actual re-parse is deferred to the next highlight cache rebuild, - avoiding redundant parses during bulk operations. + actual re-parse is deferred to the next highlight cache rebuild. 4. Highlight cache (ts_rebuild_highlight_cache): - A TSQueryCursor is restricted to a byte range around the viewport @@ -195,10 +161,8 @@ get a color pair. When tree-sitter is active the following happens: 5. Color lookup (ts_get_color_at): - For each byte, a linear scan in the highlight cache returns the - matching color (last match wins). - - When multiple captures overlap (e.g. a keyword inside a function - call), the last/innermost match wins because tree-sitter returns - parent nodes before child nodes. + matching color (last match wins, so innermost captures take + precedence). Language injection @@ -206,7 +170,7 @@ Language injection Language injection allows one grammar to delegate parsing of specific AST nodes to another grammar. Injections are configured via standard -tree-sitter injections.scm query files in misc/syntax-ts/queries/. +tree-sitter injections.scm query files in the per-grammar directory. An injections.scm file uses tree-sitter query syntax with special capture names and predicates: @@ -215,15 +179,7 @@ capture names and predicates: @injection.language -- a node whose text names the child grammar #set! injection.language "" -- a fixed child grammar name -For example, HTML injects JavaScript and CSS: - - ((script_element (raw_text) @injection.content) - (#set! injection.language "javascript")) - - ((style_element (raw_text) @injection.content) - (#set! injection.language "css")) - -Markdown injects inline elements and fenced code blocks: +For example, Markdown injects inline elements and fenced code blocks: ((inline) @injection.content (#set! injection.language "markdown_inline")) @@ -232,20 +188,12 @@ Markdown injects inline elements and fenced code blocks: (info_string (language) @injection.language) (code_fence_content) @injection.content) -When the primary grammar produces matching nodes, the captured byte -ranges are extracted and parsed by the child grammar using -ts_parser_set_included_ranges(). - -Dynamic injection (fenced code blocks in markdown, heredocs in bash, -etc.) works by capturing the language name from a node in the source -text (via @injection.language). If a matching grammar is available, -its content node is parsed with that grammar. Parsers and queries are -cached per language for efficiency. +Injections depend on other grammars being installed. If the injected +grammar is not installed, the injection is silently skipped and the text +stays in the parent language's default color. -Injections are recursive up to 3 levels deep. When an injected -language has its own injections.scm file, those nested injections are -also processed. For example, a Go template file wrapping Markdown -can highlight fenced code blocks within the Markdown content: +Injections are recursive up to 3 levels deep. For example, a Go +template file wrapping Markdown can highlight fenced code blocks: gotmpl -> markdown -> python (3 levels). @@ -253,22 +201,16 @@ Wrapper grammars ---------------- Wrapper grammars are template languages (like Go templates) that wrap -a host language. Content outside the template syntax lives in -specific AST nodes and can be highlighted by injecting the host -grammar into those nodes. +a host language. Content outside the template syntax lives in specific +AST nodes and can be highlighted by injecting the host grammar. -Wrapper grammars are configured in the wrappers file -(misc/syntax-ts/wrappers). Each line defines a wrapper: +Wrapper grammars are configured in the per-grammar config.ini: - wrapper_grammar content_node host1 host2 ... + [grammar] + wrapper=text yaml json toml html xml markdown css - wrapper_grammar The grammar name of the template language. - content_node The AST node type that holds host content. - host1 host2 ... Grammar names that this wrapper can wrap. - -Example: - - gotmpl text yaml json toml html xml markdown css +The first token is the AST content node name, the remaining tokens are +host grammar names that this wrapper can wrap. This enables two features: @@ -277,26 +219,10 @@ This enables two features: tried as an alternative. If the wrapper parses successfully, the host grammar is injected into the wrapper's content nodes. - Example: a .yaml file containing Go template syntax ({{ }}) fails - to parse as YAML. The system finds that gotmpl can wrap yaml, - parses the file with gotmpl, and injects YAML highlighting into - the text nodes. The result: Go template syntax is highlighted as - gotmpl, and the YAML portions between templates get proper YAML - highlighting. - 2. Compound extensions: for files like README.md.gotmpl, the inner extension (.md) identifies the host grammar, which is injected into the wrapper's content nodes automatically. - Example: README.md.gotmpl is matched as gotmpl by the .gotmpl - extension. The system detects .md as a compound extension, - resolves it to the markdown grammar, and injects Markdown - highlighting into the gotmpl text nodes. - -To add a new wrapper grammar, add a line to the wrappers file. No -code changes are required. The wrapper grammar must have an AST node -type that contains host language content (e.g. "text" for gotmpl). - Parse error highlighting ------------------------ @@ -305,46 +231,38 @@ When a tree-sitter grammar produces ERROR nodes (parse failures), the affected regions are highlighted in red. This provides a visual indication that the parser could not understand parts of the file. -If the tree root does not cover the entire file (the parser gave up -early), the uncovered portion is also highlighted in red. - Valid captures within ERROR regions take precedence over the red -error coloring via the "narrower wins" rule: specific node captures -are always narrower than the broad ERROR region. +error coloring via the "narrower wins" rule. Syntax highlighting modes ------------------------- -When compiled with tree-sitter support, the editor supports three -highlighting modes: +When compiled with tree-sitter support and Tree-sitter highlighting +is enabled (Options > General > Tree-sitter highlighting), the editor +supports three modes: - Tree-sitter (TS): AST-based highlighting using tree-sitter grammars. - Legacy: Regex-based highlighting using .syntax files. - None: Syntax highlighting disabled. The active mode is shown in the status bar as S:[TS], S:[Legacy], or -S:[None]. +S:[None]. When Tree-sitter highlighting is disabled in settings, the +S:[...] indicator is hidden (original look). Ctrl+S cycles forward through modes: TS -> Legacy -> None -> TS. -If tree-sitter initialization fails for a file (no grammar available), -the mode automatically falls to Legacy and TS is excluded from -cycling for that session. +When Tree-sitter highlighting is disabled, Ctrl+S cycles Legacy <-> None +only. Ctrl+T toggles directly between TS and Legacy (skips None). This is useful for quickly comparing tree-sitter and legacy highlighting. -The same toggle is available from the Command menu as "Toggle TS/legacy -syntax". +When Tree-sitter highlighting is disabled, Ctrl+T does nothing. -Manual syntax selection via Options -> Syntax highlighting works with -tree-sitter. When the user selects a syntax type (e.g. "YAML"), the -display name is reverse-looked up to a grammar name and tree-sitter -is tried with that grammar. If tree-sitter fails, the legacy system -handles the selected type. +The --no-tree-sitter command-line flag overrides the persistent setting +and disables tree-sitter for the entire session. -The --no-tree-sitter command-line flag permanently disables tree-sitter -for the entire session. When this flag is set, Ctrl+S cycles between -Legacy and None only. +The Tree-sitter highlighting setting is saved in ~/.config/mc/ini as +editor_use_tree_sitter under the [Midnight-Commander] section. File layout @@ -353,296 +271,166 @@ File layout Source files (src/editor/): syntax_ts.c Tree-sitter highlighting implementation. Contains: - - ts_input_read() -- TSInput callback - - ts_load_color_config() -- loads colors.ini - - ts_capture_name_to_color() -- color mapping - - ts_config_lookup_by_value() -- config file lookup - - ts_config_lookup_by_grammar() -- reverse lookup - - ts_config_reverse_lookup() -- display name to - grammar name lookup - - ts_find_grammar() -- matches filename via config - files (filenames > shebangs > extensions) + - Grammar registry scanner (scans per-grammar + config.ini files on first use) + - ts_find_grammar() -- matches filename via registry + lookup tables (filenames > shebangs > extensions) + - ts_capture_name_to_color() -- color mapping with + longest-prefix matching - ts_find_wrapper_for_host() -- finds a wrapper grammar for a failed host grammar - - ts_find_wrapper_content_node() -- gets the content - node name for a wrapper grammar - ts_setup_wrapper_injection() -- builds injection - query for wrapper grammars programmatically - - ts_load_query_file() -- loads .scm file + query for wrapper grammars + - ts_load_query_file() -- loads .scm file from + per-grammar directory - ts_init_injections() -- sets up injection parsers - from injections.scm query files - ts_get_dynamic_lang() -- lazy-loads dynamic grammar - ts_inject_and_highlight() -- parses and highlights an injected language, with recursive injection - support (up to TS_MAX_INJECTION_DEPTH levels) - - ts_init_for_file() -- initialization (accepts - optional forced_grammar for manual selection) - - ts_free() -- cleanup (primary + injections) - - ts_collect_error_highlights() -- collects ERROR - nodes for red highlighting - - ts_run_query_into_highlights() -- runs query on - tree + support (up to 3 levels) + - ts_init_for_file() -- initialization + - ts_free() -- cleanup - ts_rebuild_highlight_cache() -- query cursor, injection processing, error highlighting - ts_get_color_at() -- linear scan - Conditional include: ts-grammar-registry.h (static - mode) or ts-grammar-loader.h (shared mode). - - syntax_ts.h Public API for tree-sitter integration: - ts_init_for_file(), ts_free(), ts_get_color_at(), - ts_rebuild_highlight_cache(), - ts_config_reverse_lookup(). - - syntax.c Main syntax file. Tree-sitter integration points - are inside #ifdef HAVE_TREE_SITTER blocks: - - edit_load_syntax() calls ts_init_for_file() with - optional forced_grammar for manual selection + + syntax_ts.h Public API: ts_init_for_file(), ts_free(), + ts_get_color_at(), ts_rebuild_highlight_cache(), + ts_config_reverse_lookup(), + ts_load_grammar_registry(). + + syntax.c Integration points (inside #ifdef HAVE_TREE_SITTER): + - edit_load_syntax() calls ts_init_for_file() - edit_free_syntax_rules() calls ts_free() - edit_syntax_ts_notify_edit() -- incremental edit - notification for tree re-parsing ts-grammar-loader.h - Shared mode grammar loader. Provides - ts_grammar_registry_lookup() using g_module_open() - to load grammar .so modules on demand. Caches - loaded modules. Handles naming overrides (e.g. - cobol -> tree_sitter_COBOL). - - editdraw.c Status bar rendering includes the syntax - highlighting mode indicator (S:[TS], S:[Legacy], - S:[None]) in both simple and normal status bar - formats. - - editwidget.h WEdit struct extended with tree-sitter fields: - Primary: ts_parser, ts_tree, ts_highlight_query - (void*), ts_highlights (GArray*), - ts_highlights_start/end, ts_grammar_name, ts_active, - ts_need_reparse. - Injections: ts_injection_query (TSQuery*), - ts_injection_lang_cache (GHashTable*). - - edit-impl.h Declaration of edit_syntax_ts_notify_edit(). - - edit.c Calls edit_syntax_ts_notify_edit() from four - buffer modification functions: edit_insert(), - edit_insert_ahead(), edit_delete(), edit_backspace(). - -Grammar libraries (src/editor/ts-grammars/): - - Pre-built grammar libraries are downloaded at build time and are NOT - stored in the git repository. See "Downloading grammars" above. - - /.a Pre-built static library for each grammar - (static mode only, downloaded via --static). - - The following files ARE stored in the repository: - - ts-grammar-registry.h - Static mode only. Lookup table mapping grammar - names to tree_sitter_*() functions. Each grammar - is guarded by #ifdef HAVE_GRAMMAR_. - Makefile.am Merges per-grammar .a files into a single - libtsgrammars.a for linking into the mc binary. - .gitignore Excludes downloaded grammar directories and build - artifacts from git. - -Runtime data files (misc/syntax-ts/): - - extensions Maps file extensions to grammar names. - filenames Maps exact filenames (basenames) to grammar names. - shebangs Maps interpreter names to grammar names. - display-names Maps grammar names to human-readable display names. - symbols Overrides for non-standard tree_sitter_*() function - names (e.g. cobol -> tree_sitter_COBOL). - wrappers Wrapper grammar definitions for template languages. - Maps wrapper grammars to their content nodes and - supported host languages (see "Wrapper grammars"). - colors.ini Per-grammar color mappings (INI format, sections - named [grammar_name]). - queries/-highlights.scm - MC-specific highlight query files, one per - supported grammar. Tailored to MC's terminal - color scheme. - queries/-injections.scm - Injection query files for grammars that support - language injection. - -Shared grammar modules (shared mode only): - - .so One loadable module per grammar. Each exports a - single tree_sitter_() function. Loaded on - demand via g_module_open(). - - The loader checks two directories in order: - 1. ~/.local/lib/mc/ts-grammars/ (user-local, checked first) - 2. $(libdir)/mc/ts-grammars/ (system, set at configure time) - - Users can install custom grammar modules in the user-local path - without needing root access. - -Build system files: - - m4.include/mc-with-tree-sitter.m4 - --with-tree-sitter, --with-tree-sitter-grammars, - and --with-tree-sitter-static flags. In static - mode, auto-discovers .a files and generates - -DHAVE_GRAMMAR_* defines. In shared mode, - checks for GModule. - src/editor/Makefile.am - Conditional SUBDIRS = ts-grammars. In shared - mode links $(GMODULE_LIBS) and defines - TS_GRAMMAR_LIBDIR. - src/Makefile.am $(TREE_SITTER_LIBS) added to mc_LDADD. In - static mode, also links libtsgrammars.a. - misc/syntax-ts/Makefile.am - Installation rules for config files and - queries/*.scm files. - scripts/ts-grammars-download.sh - Script to download pre-built grammar libraries: - --shared (.so) or --static (.a). Included in - the make dist tarball so users building from - source can download grammars. - -Test files: - - tests/src/editor/edit_syntax_ts.c - Unit tests for tree-sitter integration. Works in - both static and shared modes. Tests grammar - lookup, query file compilation, C parsing, C query - captures, markdown inline injection, and HTML - multi-injection. - - -Grammar configuration files ---------------------------- - -Grammar-to-file mappings are split across several plain-text config files -in misc/syntax-ts/. Each file has a simple format with one entry per -line. Lines starting with # are comments. - -extensions -- maps file extensions to grammar names: - - <.ext1> <.ext2> ... - - Example: c .c .h - Example: python .py .pyw .pyi - -filenames -- maps exact basenames to grammar names: - - ... - - Example: make Makefile makefile GNUmakefile - Example: dockerfile Dockerfile - -shebangs -- maps interpreter names to grammar names: + Grammar loader using g_module_open() to load .so + modules on demand. Caches loaded modules. Reads + symbol overrides from config.ini via + ts_get_symbol_override(). - ... + editdraw.c Status bar rendering includes S:[TS]/S:[Legacy]/ + S:[None] when Tree-sitter highlighting is enabled. - MC extracts the interpreter from the shebang line, stripping the - path and "env" prefix. + editwidget.h WEdit struct extended with tree-sitter fields. - Example: python python python3 - Example: bash bash sh + editcmd.c Mode cycling (Ctrl+S, Ctrl+T) respects the + use_tree_sitter setting. -display-names -- human-readable names for the status bar: + edit.h edit_options_t includes use_tree_sitter (persistent), + syntax_highlight_mode, and ts_available (runtime). - +Installed per-grammar data (from mc-ts-grammars): - Example: c C Program - Example: hcl Terraform/HCL + /share/mc/syntax-ts// + config.ini Grammar metadata and color mappings (INI format). + highlights.scm MC-curated highlight query file. + injections.scm Injection query file (optional). -symbols -- overrides for non-standard function names: + /mc/ts-grammars/ + .so Shared grammar module. Each exports a single + tree_sitter_() function. - - - The default symbol is tree_sitter_(). Only grammars - that don't follow this convention need entries here. - - Example: cobol COBOL (function: tree_sitter_COBOL) + The loader checks two directories in order: + 1. ~/.local/lib/mc/ts-grammars/ (user-local, checked first) + 2. $(libdir)/mc/ts-grammars/ (system, set at configure time) -wrappers -- wrapper grammar definitions: +Build system files: - ... + m4.include/mc-with-tree-sitter.m4 + --with-tree-sitter flag. Checks for libtree-sitter + and gmodule-2.0. Sets TREE_SITTER_GRAMMAR_LIBDIR. - Defines a wrapper grammar (template language) that can wrap host - languages. See the "Wrapper grammars" section for details. + src/editor/Makefile.am + Links $(GMODULE_LIBS) and defines TS_GRAMMAR_LIBDIR. - Example: gotmpl text yaml json toml html xml markdown css -colors.ini -- color mappings for capture names (INI format): +Per-grammar configuration (config.ini) +-------------------------------------- - [python] - comment = brown; - function = brightcyan; - keyword = yellow; - string = green; - variable.builtin = brightred; +Each installed grammar has a config.ini file in INI format (parseable by +GKeyFile). It contains all metadata MC needs to register and render the +grammar: + + [grammar] + extensions=.py .pyw .pyi + filenames=SConstruct SConscript + shebangs=python python3 + display-name=Python Program + symbol=python + wrapper=text yaml json toml html xml markdown css + + [colors] + comment=brown; + keyword=yellow; + string=green; + variable.builtin=brightred; + +The [grammar] section holds: + + extensions Space-separated file extensions (must start with .). + filenames Space-separated exact basenames. + shebangs Space-separated interpreter names. + display-name Human-readable name for the status bar. + symbol Override for tree_sitter_() function + (only if different from grammar directory name). + wrapper Content node + space-separated host grammar names + (only for wrapper/template grammars). + +The [colors] section maps capture names to MC terminal colors: - Each grammar has its own self-contained section. The format is: = ; - Background can be omitted (inherits default). A foreground of "-" - means use the default foreground color. + Background can be omitted (inherits default). Each grammar has its + own self-contained color scheme. Lookup precedence when opening a file: -1. filenames -- exact basename match (highest priority) -2. shebangs -- interpreter from first line +1. filenames -- exact basename match (highest priority) +2. shebangs -- interpreter from first line 3. extensions -- file extension match (lowest priority) -All config files are read at runtime from -~/.local/share/mc/syntax-ts/ (user path, checked first) or the system -share directory. +At startup, MC scans per-grammar directories from: +1. ~/.local/share/mc/syntax-ts/*/config.ini (user-local, wins) +2. $(datadir)/mc/syntax-ts/*/config.ini (system) +User-local grammars fully replace system grammars of the same name. -Highlight query files (queries/*.scm) --------------------------------------- -Each grammar has a corresponding highlight query file named --highlights.scm. Query files are searched in this order: +Highlight query files +--------------------- -1. User queries: ~/.local/share/mc/syntax-ts/queries/ -2. System queries: $(datadir)/mc/syntax-ts/queries/ +Each grammar has a highlight query file named highlights.scm in its +per-grammar directory. Query files are searched in this order: -The first file found is used. All query files are MC-specific, -tailored to MC's terminal color scheme. Upstream query files from -grammar repositories are not used directly. +1. User: ~/.local/share/mc/syntax-ts//highlights.scm +2. System: $(datadir)/mc/syntax-ts//highlights.scm -Grammars that support language injection also have an injection query -file named -injections.scm (same search order). +All query files are MC-specific, tailored to MC's terminal color scheme. +Upstream query files from grammar repositories are not used directly. -Query files use the tree-sitter query syntax (S-expressions) to match -AST node patterns and assign capture names. For example: +Query files use tree-sitter query syntax (S-expressions) to match AST +node patterns and assign capture names: (function_definition name: (identifier) @function) (string_literal) @string (comment) @comment - [ - "if" - "else" - "return" - ] @keyword - -The capture names (the @name parts) are mapped to MC colors by the -ts_default_colors[] table in syntax.c. + ["if" "else" "return"] @keyword Critical constraint: every node name and anonymous string literal in the -query file MUST exist in the grammar's symbol table (ts_symbol_names[] -in parser.c). If any name is invalid, ts_query_new() returns NULL and -the entire query fails silently, causing fallback to legacy highlighting. +query file MUST exist in the grammar's symbol table. If any name is +invalid, ts_query_new() returns NULL and the language falls back to +legacy highlighting. Query files support hierarchical capture names (e.g. @keyword.control, -@string.special). The color lookup performs longest-prefix matching, so -@keyword.control is tried first, then @keyword. +@string.special). The color lookup performs longest-prefix matching. Color mapping ------------- -The colors config file (misc/syntax-ts/colors.ini) maps capture names -to MC foreground colors using INI format. Each grammar has its own -self-contained section (e.g. [python], [bash]) with explicit color -definitions. Colors are chosen to match MC's default syntax -highlighting appearance (blue background skin): +Colors are defined in the [colors] section of each grammar's config.ini. +Common capture names and their typical colors: Capture Foreground Color Purpose ------- ---------------- ------- @@ -653,36 +441,18 @@ highlighting appearance (blue background skin): function brightcyan Function names function.special brightred Preprocessor macros function.builtin brown Built-in functions (Go) - function.macro brightmagenta Macro invocations (Rust) - type yellow Type names - type.builtin brightgreen Built-in types (Go) string green String literals string.special brightgreen Char literals, escapes - number lightgray Numeric literals - number.builtin brightgreen Special numbers (Haskell) comment brown Comments - comment.error red CSS hex colors constant lightgray Constants - constant.builtin brightmagenta Built-in constants - variable (default) Variables variable.builtin brightred self, $vars - variable.special brightgreen Shell expansions operator brightcyan Operators - operator.word yellow Word-like operators delimiter brightcyan Brackets, punctuation - delimiter.special brightmagenta Semicolons, bitwise ops - property brightcyan Object properties - property.key yellow Config file keys - label cyan Labels tag brightcyan HTML/XML tags - markup.addition brightgreen Diff additions - markup.deletion brightred Diff deletions - markup.heading brightmagenta Diff headers -To change a color for all languages, edit the colors config file. -To change a color for a specific language, use a more specific capture -name in the query file (e.g. @keyword.control instead of @keyword) and -add a corresponding entry to the colors file. +Available colors: black, red, green, brown, blue, magenta, cyan, +lightgray, gray, brightred, brightgreen, yellow, brightblue, +brightmagenta, brightcyan, white. Adding a new language @@ -690,162 +460,97 @@ Adding a new language To add tree-sitter highlighting for a new language: -1. Register the grammar in config files. - - Edit the config files in misc/syntax-ts/: - - extensions: add file extension mappings - Example: newlang .nl .nlx - - filenames: add exact filename mappings (if applicable) - - shebangs: add interpreter mappings (if applicable) - - display-names: add a human-readable name - Example: newlang New Language - - If the grammar's tree_sitter_*() function does not follow the - standard tree_sitter_ convention, add an override to the - symbols file. - -2. Register the grammar for static mode. - - Edit src/editor/ts-grammars/ts-grammar-registry.h: - - Add an #ifdef HAVE_GRAMMAR_ guarded extern declaration: - #ifdef HAVE_GRAMMAR_ - extern const TSLanguage *tree_sitter_(void); - #endif - - Add a similarly guarded entry to the ts_grammar_registry[] array: - #ifdef HAVE_GRAMMAR_ - { "", tree_sitter_ }, - #endif - - For shared mode, no registration is needed -- the grammar is - discovered automatically from its .so file. - -3. Ensure the grammar exists in the external grammars repository. - - The grammar must exist in the grammars.yaml registry in the - tree-sitter-grammars repository so that it gets built and included - in release tarballs. Most grammars are already registered. If - the grammar is new, add it to grammars.yaml in that repository. - The configure step auto-discovers available .a files, so no changes - to configure.ac are needed. - -4. Create a highlight query file. - - Create misc/syntax-ts/queries/-highlights.scm with tree-sitter - query patterns. Use the grammar's node-types.json or the - ts_symbol_names[] array in parser.c to find valid node names. - - You can also reference the upstream grammar repository's queries/ - directory for inspiration, but those queries often use capture names - that don't match MC's color scheme and may reference nodes from - different grammar versions. +1. Ensure the grammar is in the mc-ts-grammars repository's + grammars.yaml registry. - Test the query by extracting all node names used in the query and - verifying they exist in the grammar's ts_symbol_names[] (see the - "Validating query files" section). +2. Create grammars//highlights.scm with tree-sitter query + patterns tailored to MC's color scheme. - If the grammar supports language injection, create an injection - query file: misc/syntax-ts/queries/-injections.scm. +3. Create grammars//config.ini with file matching rules and + colors. -5. Update the installation rules. +4. Set release: true in the grammar's grammars.yaml entry. - Edit misc/syntax-ts/Makefile.am and add -highlights.scm to the - TS_QUERYFILES list. +5. Submit a pull request to the mc-ts-grammars repository. -6. Rebuild. +For local testing without a PR: - Run autoreconf (or autogen.sh) to regenerate the build system, then - configure with --with-tree-sitter and build. Run the unit tests: - - make check -C tests/src/editor - - The edit_syntax_ts test validates all query files against their - grammars. + # Install the grammar .so and config manually + mkdir -p ~/.local/share/mc/syntax-ts/ + cp config.ini highlights.scm ~/.local/share/mc/syntax-ts// + cp .so ~/.local/lib/mc/ts-grammars/ Removing or disabling a language -------------------------------- -To exclude a grammar at build time (without modifying any files): -- Use --with-tree-sitter-grammars=LIST to specify only the grammars - you want. Unselected grammars are not compiled, downloaded, or - registered. +Grammars are managed externally by the mc-ts-grammar installer. -In shared mode, grammars can also be excluded at install time by simply -not installing the corresponding .so file and .scm query file. + # Remove a grammar + mc-ts-grammar uninstall -To disable a grammar without removing its files: -- Comment out or remove its lines from the extensions, filenames, - and shebangs config files. The grammar will still be compiled but - never matched at runtime. + # Remove all grammars + mc-ts-grammar uninstall --all -To fully remove a grammar: -1. Remove its lines from extensions, filenames, shebangs, and - display-names config files. -2. Remove its #ifdef-guarded entries from ts-grammar-registry.h. -3. Delete misc/syntax-ts/queries/-highlights.scm (and - -injections.scm if present). -4. Remove -highlights.scm from TS_QUERYFILES in - misc/syntax-ts/Makefile.am. +To disable tree-sitter entirely, uncheck "Tree-sitter highlighting" in +Options > General, or pass --no-tree-sitter on the command line. -Updating a grammar ------------------- +Updating grammars +----------------- -Grammar updates are handled by the external tree-sitter-grammars -repository, which has CI automation to check for upstream updates -weekly, generate parser.c from grammar.js, build new releases, and -publish tarballs containing both shared (.so) and static (.a) -libraries. + # Update all installed grammars to the latest release + mc-ts-grammar update --all -To update MC's grammars to a newer release: + # Update specific grammars + mc-ts-grammar update python bash yaml -1. Update the version in scripts/ts-grammars-download.sh - (TS_GRAMMARS_VERSION variable) or use --latest. -2. Re-download grammars: +Validating query files +---------------------- - scripts/ts-grammars-download.sh --shared # for shared mode - scripts/ts-grammars-download.sh --static # for static mode +A query file must only reference node names that exist in the grammar. +If any node name is invalid, tree-sitter silently rejects the entire +query and the language falls back to legacy highlighting. -3. Validate the query files against the new grammars. Node names may - have changed between versions. See "Validating query files" below. +The mc-ts-grammars repository includes a validation step in CI that +compiles all query files against their grammars using ts_query_new(). -4. Rebuild and test: +For local validation: - make -j$(nproc) - make check -C tests/src/editor + cd mc-ts-grammars + .github/actions/validate-grammar/scripts/validate.sh --build-dir=build --all -Validating query files ----------------------- +Testing with mc-syntax-dump +--------------------------- -A query file must only reference node names that exist in the grammar. -If any node name is invalid, tree-sitter silently rejects the entire -query and the language falls back to legacy highlighting. +The mc-syntax-dump tool (tests/syntax/) dumps syntax highlighting output +as ANSI-colored text for comparing tree-sitter and legacy rendering. -To extract the valid node names from a grammar, look at the -ts_symbol_names[] array in the grammar's parser.c source. This file -is generated from grammar.js during CI and is not stored in the -tree-sitter-grammars repository. To obtain it locally, run -tree-sitter generate in the grammar's directory. Also check -ts_field_names[] for valid field names. +Build the tool from the MC source tree: -To extract node names used in a query file: + cd tests/syntax + make - 1. Strip ;; comments and #match?/#not-match? predicate lines. - 2. Extract all bare identifiers (node names) and quoted strings - (anonymous nodes like "if", "return"). - 3. Ignore _ which is the tree-sitter wildcard. - 4. Check each against the grammar's symbol table. +Compare tree-sitter and legacy output: -The unit test (tests/src/editor/edit_syntax_ts.c) validates all query -files by compiling them with ts_query_new() against their respective -grammars. Run it with: + # TS highlighting using queries from the grammars repo + mc-syntax-dump --ts \ + --grammar-dir /grammars/python \ + --lib-dir /build \ + example.py - make check -C tests/src/editor + # Legacy highlighting + mc-syntax-dump --legacy example.py -If you find invalid nodes, either remove the offending patterns from the -query file or replace them with valid node names from the grammar. +The --grammar-dir option points to a per-grammar directory containing +highlights.scm and config.ini. The --lib-dir option points to the +directory with built .so files. This allows testing queries directly +from a development checkout without installing them. + +Each grammar in mc-ts-grammars includes a report.md with a comparison +of tree-sitter vs legacy highlighting quality. Troubleshooting @@ -860,43 +565,25 @@ Tree-sitter highlighting does not appear: above. Other possible causes: - - In shared mode, the grammar's .so module is not installed in + - Tree-sitter highlighting is disabled in Options > General. + - The grammar .so module is not installed in ~/.local/lib/mc/ts-grammars/ or $(libdir)/mc/ts-grammars/. - - In static mode, the grammar was not included in - --with-tree-sitter-grammars at configure time. - - The filename does not match any entry in the extensions, filenames, - or shebangs config files. - - The grammar name in the config files does not match the function - exported by the grammar module. - - The .scm file is not found in the queries/ directory. + - The filename does not match any entry in any grammar's config.ini. + - The grammar symbol name does not match the function exported by + the .so module. Colors look wrong: Check that the capture names in the query file match entries in the - ts_default_colors[] table. The lookup uses longest-prefix matching: - @keyword.control is matched before @keyword. If no prefix matches, - the capture is ignored (default editor color is used). + [colors] section of config.ini. The lookup uses longest-prefix + matching: @keyword.control is matched before @keyword. If no + prefix matches, the default editor color is used. A grammar causes high CPU usage or hangs: Some grammars may be slow on very large files. If a grammar - consistently causes problems, consider disabling it by removing its - entry from the config files. The editor will fall back to legacy - highlighting. - -Build fails with "grammar static libraries not found": - - Grammar .a files must be downloaded before building in static mode: - - scripts/ts-grammars-download.sh --static - -Build fails with undefined reference to tree_sitter_: - - In static mode, ensure the grammar has an entry in - ts-grammar-registry.h and that its .a file is present in - src/editor/ts-grammars//.a. In shared mode, ensure the - .so module exports the expected symbol name (check with: - nm -D .so | grep tree_sitter). + consistently causes problems, uninstall it or switch to legacy + highlighting with Ctrl+T. Limitations @@ -906,24 +593,16 @@ Limitations entire query is rejected. There is no partial fallback within a single query file. -- The color mapping is global. Per-language color overrides are - achieved by using more specific capture names in query files (e.g. - @keyword.directive for Make), not by configuring colors per language. - - The highlight cache covers the viewport plus 8 KB. Very long lines or extreme scroll speeds may cause brief flicker as the cache is rebuilt. -- Dynamic injection (inject_dynamic) relies on the language name - in the source matching a registered grammar name exactly. Common - aliases (e.g. "py" for "python", "js" for "javascript") are not - resolved automatically. - -- During rapid typing (e.g. holding spacebar or backspace), the - screen may not refresh until the key is released. This is because - MC's renderer is single-threaded: the tree-sitter parse and query - work runs synchronously in the rendering path, competing with input - processing. To mitigate this, injection processing (the most - expensive step) is skipped during rapid edits and refreshed once - input settles. Use Ctrl+T to switch to legacy highlighting for - extended bulk editing if needed. +- Dynamic injection relies on the language name in the source matching + a registered grammar name exactly. Common aliases (e.g. "py" for + "python") are not resolved automatically. + +- During rapid typing (e.g. holding spacebar or backspace), the screen + may not refresh until the key is released. MC's renderer is + single-threaded and tree-sitter work runs synchronously. Injection + processing is skipped during rapid edits and refreshed once input + settles. diff --git a/m4.include/mc-with-tree-sitter.m4 b/m4.include/mc-with-tree-sitter.m4 index 191b82cd9a..bdfc38adc3 100644 --- a/m4.include/mc-with-tree-sitter.m4 +++ b/m4.include/mc-with-tree-sitter.m4 @@ -10,18 +10,6 @@ AC_DEFUN([mc_WITH_TREE_SITTER], [ [with_tree_sitter=$withval], [with_tree_sitter=no]) - AC_ARG_WITH([tree-sitter-grammars], - AS_HELP_STRING([--with-tree-sitter-grammars=LIST], - [Comma-separated list of tree-sitter grammars to build (default: all)]), - [with_tree_sitter_grammars=$withval], - [with_tree_sitter_grammars=all]) - - AC_ARG_WITH([tree-sitter-static], - AS_HELP_STRING([--with-tree-sitter-static], - [Link tree-sitter grammars statically into the binary (default: build as shared modules)]), - [with_tree_sitter_static=$withval], - [with_tree_sitter_static=no]) - if test x"$with_tree_sitter" = xyes; then AC_CHECK_HEADER([tree_sitter/api.h], [], [AC_MSG_ERROR([tree-sitter headers not found (required for --with-tree-sitter)])]) @@ -29,100 +17,22 @@ AC_DEFUN([mc_WITH_TREE_SITTER], [ [AC_MSG_ERROR([tree-sitter library not found (required for --with-tree-sitter)])]) AC_DEFINE([HAVE_TREE_SITTER], [1], [Define if tree-sitter syntax highlighting is enabled]) - if test x"$with_tree_sitter_static" = xyes; then - AC_DEFINE([TREE_SITTER_STATIC], [1], [Define if tree-sitter grammars are linked statically]) - TREE_SITTER_BUILD_TARGET="libtsgrammars.a" - TREE_SITTER_BUILD_MODE="static" - else - AC_DEFINE([TREE_SITTER_SHARED], [1], [Define if tree-sitter grammars are loaded as shared modules]) - PKG_CHECK_MODULES([GMODULE], [gmodule-2.0], [], - [AC_MSG_ERROR([gmodule-2.0 required for shared tree-sitter grammars (use --with-tree-sitter-static to avoid)])]) - TREE_SITTER_BUILD_TARGET="" - TREE_SITTER_BUILD_MODE="shared" - dnl Resolve libdir for use in Makefile (which doesn't have automake's variable chain) - eval "ts_libdir=\"$libdir\"" - TREE_SITTER_GRAMMAR_LIBDIR="${ts_libdir}/mc/ts-grammars" - fi - AC_SUBST([TREE_SITTER_BUILD_TARGET]) - AC_SUBST([TREE_SITTER_BUILD_MODE]) - AC_SUBST([TREE_SITTER_GRAMMAR_LIBDIR]) + AC_DEFINE([TREE_SITTER_SHARED], [1], [Define if tree-sitter grammars are loaded as shared modules]) + PKG_CHECK_MODULES([GMODULE], [gmodule-2.0], [], + [AC_MSG_ERROR([gmodule-2.0 required for tree-sitter grammar loading])]) + + dnl Resolve libdir for use in Makefile + eval "ts_libdir=\"$libdir\"" + TREE_SITTER_GRAMMAR_LIBDIR="${ts_libdir}/mc/ts-grammars" - TREE_SITTER_GRAMMAR_DEFS="" - TREE_SITTER_GRAMMAR_ARCHIVES="" - TREE_SITTER_GRAMMARS="" TREE_SITTER_LIBS="-ltree-sitter" TREE_SITTER_CFLAGS="" - if test x"$with_tree_sitter_static" = xyes; then - dnl Static mode: discover available .a files and build grammar list - ts_grammar_dir="$srcdir/src/editor/ts-grammars" - - if test x"$with_tree_sitter_grammars" = xall; then - dnl Auto-discover all available .a files - tree_sitter_grammars="" - for a in "$ts_grammar_dir"/*/*.a; do - test -f "$a" || continue - ts_grammar_a_dir=`dirname "$a"` - g=`basename "$ts_grammar_a_dir"` - tree_sitter_grammars="$tree_sitter_grammars $g" - done - tree_sitter_grammars=`echo $tree_sitter_grammars` - else - tree_sitter_grammars=`echo "$with_tree_sitter_grammars" | tr ',' ' '` - fi - - for g in $tree_sitter_grammars; do - if test ! -f "$ts_grammar_dir/$g/$g.a"; then - AC_MSG_ERROR([tree-sitter grammar static library not found: $ts_grammar_dir/$g/$g.a]) - fi - upper=`echo "$g" | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'` - TREE_SITTER_GRAMMAR_DEFS="$TREE_SITTER_GRAMMAR_DEFS -DHAVE_GRAMMAR_${upper}=1" - TREE_SITTER_GRAMMAR_ARCHIVES="$TREE_SITTER_GRAMMAR_ARCHIVES $g/$g.a" - TREE_SITTER_GRAMMARS="$TREE_SITTER_GRAMMARS $g" - done - dnl Remove leading spaces - TREE_SITTER_GRAMMAR_DEFS=`echo $TREE_SITTER_GRAMMAR_DEFS` - TREE_SITTER_GRAMMAR_ARCHIVES=`echo $TREE_SITTER_GRAMMAR_ARCHIVES` - TREE_SITTER_GRAMMARS=`echo $TREE_SITTER_GRAMMARS` - fi - dnl Shared mode: no grammar compilation. .so files are loaded at runtime. - - AC_SUBST([TREE_SITTER_GRAMMAR_DEFS]) - AC_SUBST([TREE_SITTER_GRAMMAR_ARCHIVES]) - AC_SUBST([TREE_SITTER_GRAMMARS]) + AC_SUBST([TREE_SITTER_GRAMMAR_LIBDIR]) AC_SUBST([TREE_SITTER_LIBS]) AC_SUBST([TREE_SITTER_CFLAGS]) - if test x"$with_tree_sitter_static" = xyes; then - ts_grammar_count=`echo $tree_sitter_grammars | wc -w | tr -d ' '` - if test x"$with_tree_sitter_grammars" = xall; then - AC_MSG_NOTICE([tree-sitter: enabled (static) with all $ts_grammar_count grammars]) - else - AC_MSG_NOTICE([tree-sitter: enabled (static) with $ts_grammar_count grammars: $TREE_SITTER_GRAMMARS]) - fi - else - AC_MSG_NOTICE([tree-sitter: enabled (shared) -- grammar .so files loaded at runtime]) - fi - - dnl Check that grammar files exist - ts_grammars_found=no - if test x"$with_tree_sitter_static" = xyes; then - dnl Static: check for at least one grammar .a file - for g in $tree_sitter_grammars; do - if test -f "$srcdir/src/editor/ts-grammars/$g/$g.a"; then - ts_grammars_found=yes - break - fi - done - if test x"$ts_grammars_found" = xno; then - AC_MSG_ERROR([tree-sitter grammar static libraries not found. Run: - ./scripts/ts-grammars-download.sh --static]) - fi - else - dnl Shared mode: grammars loaded at runtime, no compile-time check needed. - dnl Grammar .so files are an external dependency. - ts_grammars_found=yes - fi + AC_MSG_NOTICE([tree-sitter: enabled (shared) -- grammar .so files loaded at runtime]) else AC_MSG_NOTICE([tree-sitter syntax highlighting disabled]) fi diff --git a/misc/syntax-ts/Makefile.am b/misc/syntax-ts/Makefile.am index 068f88863f..f401ffa10f 100644 --- a/misc/syntax-ts/Makefile.am +++ b/misc/syntax-ts/Makefile.am @@ -1,29 +1,5 @@ -CONFIGFILES = \ - extensions \ - filenames \ - shebangs \ - display-names \ - symbols \ - wrappers \ - colors.ini - +# Per-grammar config files and query files are installed by mc-ts-grammar. +# This directory is intentionally empty in the source tree. if USE_INTERNAL_EDIT -syntaxtsdir = $(pkgdatadir)/syntax-ts -dist_syntaxts_DATA = $(CONFIGFILES) - -queriesdir = $(pkgdatadir)/syntax-ts/queries - -install-data-local: - $(MKDIR_P) $(DESTDIR)$(queriesdir) - for f in $(srcdir)/queries/*-highlights.scm $(srcdir)/queries/*-injections.scm; do \ - test -f "$$f" && $(INSTALL_DATA) "$$f" $(DESTDIR)$(queriesdir)/; \ - done; true - -uninstall-local: - rm -f $(DESTDIR)$(queriesdir)/*-highlights.scm - rm -f $(DESTDIR)$(queriesdir)/*-injections.scm - -rmdir $(DESTDIR)$(queriesdir) +syntaxtsdir = $(pkgdatadir)/syntax-ts endif - -EXTRA_DIST = $(CONFIGFILES) \ - queries diff --git a/misc/syntax-ts/colors.ini b/misc/syntax-ts/colors.ini deleted file mode 100644 index c16a1eeb4d..0000000000 --- a/misc/syntax-ts/colors.ini +++ /dev/null @@ -1,787 +0,0 @@ -# Tree-sitter capture name to MC color mappings. -# -# Each grammar has its own section with explicit color definitions. -# No [default] inheritance -- each section is self-contained. -# Captures are mapped to match MC's original legacy syntax colors. -# -# Format: capture_name = foreground;background -# Background omitted = inherits editor default. -# Uses the 16 ANSI colors: black, red, green, yellow, blue, magenta, -# cyan, white, brightblack, brightred, brightgreen, brightyellow, -# brightblue, brightmagenta, brightcyan, brightwhite -# Plus MC extras: brown, lightgray - -[ada] -comment = brown; -constant.builtin = brightmagenta; -delimiter = brightcyan; -function.special = brightred; -keyword = yellow; -label = cyan; -property = brightcyan; -string = green; -string.special = brightgreen; - -[asm] -comment = brown; -constant = lightgray; -keyword.control = brightmagenta; -keyword.other = white; -label = cyan; -string = green; - -[awk] -comment = brown; -delimiter = brightcyan; -delimiter.special = brightmagenta; -function.special = brightred; -keyword.other = white; -operator.word = yellow; -string = green; -string.special = brightgreen; -variable.builtin = brightred; - -[bash] -comment = brown; -comment.shebang = brightcyan; -delimiter = brightcyan; -function = cyan; -function.definition = brightmagenta; -function.security = red; -keyword = yellow; -keyword.function = brightmagenta; -operator = brightcyan; -punctuation.backtick = brightred; -punctuation.special = brightred; -string = green; -string.special = brightgreen; -variable.builtin = brightred; -variable.positional = brightred; -variable.special = brightgreen; - -[bison] -comment = brown; -delimiter = brightcyan; -keyword = yellow; -number = lightgray; -string = green; -string.special = brightgreen; -tag = brightcyan; -type = yellow; - -[c] -comment = brown; -constant = lightgray; -delimiter = brightcyan; -delimiter.special = brightmagenta; -function.special = brightred; -keyword = yellow; -label = cyan; -number = lightgray; -operator.word = yellow; -string = green; -string.special = brightgreen; -type = yellow; - -[c_sharp] -comment = brown; -delimiter = brightcyan; -delimiter.special = brightmagenta; -function = brightcyan; -function.special = brightred; -keyword = yellow; -keyword.other = white; -label = cyan; -operator.word = yellow; -string = green; -string.special = brightgreen; -type = yellow; - -[caddy] -comment = brown; -constant = lightgray; -function.special = brightred; -keyword = yellow; -keyword.control = brightmagenta; -string = green; -string.special = brightgreen; - -[cmake] -comment = brown; -delimiter = brightcyan; -function.special = brightred; -string = green; -tag.special = white; -variable = lightgray; -variable.special = brightgreen; - -[cobol] -comment = brown; -constant = lightgray; -label = cyan; -string = green; -string.special = brightgreen; - -[commonlisp] -comment = brown; -constant.builtin = brightmagenta; -delimiter = brightcyan; -function = brightcyan; -keyword = yellow; -keyword.other = white; -number = lightgray; -string = green; -string.special = brightgreen; -variable = lightgray; -variable.builtin = brightred; - -[cpp] -comment = brown; -delimiter = brightcyan; -delimiter.special = brightmagenta; -function.special = brightred; -keyword = yellow; -label = cyan; -operator.word = yellow; -string = green; -string.special = brightgreen; -type = yellow; - -[css] -charset = brightred; -comment = brown; -comment.error = red; -constant = lightgray; -delimiter = brightcyan; -delimiter.special = brightmagenta; -function.special = brightred; -import = brightred; -keyframes = brightred; -keyword = yellow; -keyword.directive = magenta; -keyword.other = white; -media = brightred; -namespace = brightcyan; -operator = brightcyan; -scope = lightgray; -string = green; -string.special = brightgreen; -supports = brightred; -tag.special = white; - -[cuda] -comment = brown; -constant = lightgray; -delimiter = brightcyan; -delimiter.special = brightmagenta; -function.special = brightred; -keyword = yellow; -keyword.other = white; -label = cyan; -operator.word = yellow; -string = green; -string.special = brightgreen; -type = yellow; - -[d] -comment = brown; -delimiter = brightcyan; -delimiter.special = brightmagenta; -function.special = brightred; -keyword = yellow; -keyword.directive = magenta; -label = cyan; -operator.word = yellow; -string = green; -string.special = brightgreen; -type = yellow; -variable.builtin = brightred; - -[diff] -comment = brown; -delimiter = brightcyan; -function.special = brightred; -keyword = yellow; -markup.addition = brightgreen; -markup.deletion = brightred; -markup.heading = brightmagenta; -tag = brightcyan; - -[dockerfile] -comment = brown; -delimiter.special = brightmagenta; -function = brightcyan; -function.special = brightred; -keyword = yellow; -number.builtin = brightgreen; -property = brightcyan; -string = green; -string.special = brightgreen; -variable = lightgray; - -[dot] -comment = brown; -delimiter = brightcyan; -delimiter.special = brightmagenta; -function.special = brightred; -keyword = yellow; -keyword.other = white; -operator.word = yellow; -string = green; - -[erlang] -comment = brown; -constant = lightgray; -delimiter = brightcyan; -delimiter.special = brightmagenta; -function = brightcyan; -keyword = yellow; -keyword.other = white; -label = cyan; -string = green; -string.special = brightgreen; - -[fortran] -comment = brown; -constant.builtin = brightmagenta; -delimiter = brightcyan; -function.special = brightred; -keyword = yellow; -number.builtin = brightgreen; -operator.word = yellow; -property = brightcyan; -string = green; - -[glsl] -comment = brown; -delimiter = brightcyan; -delimiter.special = brightmagenta; -function.macro = brightmagenta; -function.special = brightred; -keyword = yellow; -keyword.other = white; -string = green; -string.special = brightgreen; -type = yellow; - -[go] -comment = brown; -constant = lightgray; -delimiter = brightcyan; -delimiter.special = brightmagenta; -function.builtin = brown; -keyword = yellow; -label = cyan; -operator = brightcyan; -string = green; -type.builtin = brightgreen; - -[gotmpl] -comment = brown; -constant.builtin = brightmagenta; -delimiter = brightcyan; -function = brightcyan; -function.builtin = yellow; -keyword = yellow; -number = lightgray; -operator = brightcyan; -property = white; -string = green; -string.special = brightgreen; -variable.builtin = brightred; - -[haskell] -comment = brown; -comment.special = brightgreen; -delimiter = brightcyan; -function = brightcyan; -keyword = yellow; -keyword.other = white; -number.builtin = brightgreen; -operator.word = yellow; -string = green; -string.special = brightgreen; - -[hcl] -comment = brown; -constant = lightgray; -delimiter = brightcyan; -keyword = yellow; -keyword.directive = brightmagenta; -number = lightgray; -operator = brightcyan; -string = green; -type = yellow; - -[terraform] -comment = brown; -constant = lightgray; -delimiter = brightcyan; -keyword = yellow; -keyword.directive = brightmagenta; -number = lightgray; -operator = brightcyan; -string = green; -type = yellow; - -[html] -comment = brown; -delimiter = brightcyan; -function.special = brightred; -label = cyan; -property.key = yellow; -string.special = brightgreen; -tag = brightcyan; -variable.builtin = brightred; - -[idl] -comment = brown; -delimiter = brightcyan; -delimiter.special = brightmagenta; -keyword = yellow; -keyword.control = brightmagenta; -operator.word = yellow; -string = green; -string.special = brightgreen; - -[ini] -comment = brown; -delimiter = brightcyan; -keyword = yellow; -label = cyan; -variable.builtin = brightred; - -[java] -comment = brown; -delimiter = brightcyan; -delimiter.special = brightmagenta; -function.special = brightred; -keyword = yellow; -label = cyan; -operator.word = yellow; -string = green; -string.special = brightgreen; - -[javascript] -comment = brown; -delimiter = brightcyan; -delimiter.special = brightmagenta; -keyword = yellow; -label = cyan; -number.builtin = brightgreen; -operator = brightcyan; -operator.word = yellow; -string = green; -string.special = brightgreen; - -[json] -comment = brown; -delimiter = brightcyan; -string = green; -string.special = brightgreen; - -[kotlin] -comment = brown; -delimiter = brightcyan; -function.builtin = brown; -keyword = yellow; -keyword.control = brightmagenta; -label = cyan; -operator = brightcyan; -string = green; -string.special = brightgreen; -type.builtin = brightgreen; -variable.builtin = brightred; - -[latex] -comment = brown; -constant = lightgray; -function = brightcyan; -function.macro = brightmagenta; -keyword.conditional = yellow; -keyword.directive = magenta; -keyword.import = yellow; -label = cyan; -markup.heading = brightmagenta; -markup.heading. = brightmagenta; -markup.italic = magenta; -markup.link = yellow; -markup.link.url = yellow; -markup.math = brightgreen; -markup.strong = brightmagenta; -module = brightcyan; -nospell = lightgray; -operator = brightcyan; -punctuation.bracket = brightcyan; -punctuation.delimiter = brightcyan; -punctuation.special = brightmagenta; -spell = lightgray; -string = green; -string.regexp = brightmagenta; -string.special.path = green; -variable = lightgray; -variable.parameter = lightgray; - -[lua] -comment = brown; -constant.builtin = brightmagenta; -keyword = yellow; -keyword.other = white; -label = cyan; -string = green; - -[mail] -markup.environment = brightcyan; -string = green; -tag = brightcyan; -type.builtin = brightgreen; - -[make] -comment = brown; -delimiter = brightcyan; -keyword = yellow; -keyword.directive = magenta; -property.key = yellow; -string = green; -tag.special = white; -variable = lightgray; - -[markdown] -delimiter = brightcyan; -function.special = brightred; -keyword = yellow; -label = cyan; -string = green; -variable = lightgray; - -[markdown_inline] -delimiter = brightcyan; -function.special = brightred; -keyword = yellow; -label = cyan; -markup.bold = brightmagenta; -markup.italic = magenta; -string.special = brightgreen; - -[matlab] -comment = brown; -delimiter = brightcyan; -keyword.other = white; -operator = brightcyan; -string = green; -string.special = brightgreen; - -[meson] -comment = brown; -keyword = yellow; -keyword.other = white; -property.key = yellow; -string = green; -variable.builtin = yellow; - -[muttrc] -comment = brown; -delimiter = brightcyan; -function.special = brightred; -keyword = yellow; -string = green; -string.special = brightgreen; - -[ocaml] -comment = brown; -delimiter = brightcyan; -function.special = brightred; -keyword = yellow; -label = cyan; -tag = brightcyan; -type = yellow; - -[pascal] -comment.special = brightgreen; -keyword.other = white; -label = cyan; -number = lightgray; -tag = brightcyan; -type = yellow; - -[perl] -comment = brown; -delimiter = brightcyan; -delimiter.special = brightmagenta; -keyword = yellow; -keyword.directive = magenta; -label = cyan; -operator.word = yellow; -string = green; -string.special = brightgreen; -variable.special = brightgreen; - -[php] -comment = brown; -delimiter = brightcyan; -delimiter.special = brightmagenta; -keyword = yellow; -keyword.control = brightmagenta; -keyword.other = white; -label = cyan; -string = green; -variable.special = brightgreen; - -[po] -comment = brown; -string = green; -tag = brightcyan; -tag.special = white; - -[properties] -comment = brown; -delimiter = brightcyan; -keyword.directive = magenta; -property.key = yellow; -variable = lightgray; -variable.special = brightgreen; - -[proto] -comment = brown; -delimiter = brightcyan; -delimiter.special = brightmagenta; -keyword = yellow; -operator.word = yellow; -string = green; -string.special = brightgreen; -tag = brightcyan; - -[puppet] -boolean = brightmagenta; -comment = brown; -conditional = yellow; -error = red; -float = lightgray; -function = brightcyan; -function.call = brightcyan; -include = yellow; -keyword = yellow; -keyword.function = yellow; -keyword.operator = yellow; -method = brightcyan; -method.call = brightcyan; -namespace = brightcyan; -number = lightgray; -operator = brightcyan; -parameter = lightgray; -property = brightcyan; -punctuation.bracket = brightcyan; -punctuation.delimiter = brightcyan; -punctuation.special = brightmagenta; -spell = lightgray; -string = green; -string.escape = brightgreen; -string.regex = green; -type = yellow; -type.builtin = brightgreen; -variable = lightgray; -variable.builtin = brightred; - -[python] -comment = brown; -constant = lightgray; -delimiter = brightcyan; -delimiter.special = brightmagenta; -function.special = brightred; -keyword = yellow; -operator.word = yellow; -string = green; -string.special = brightgreen; -variable.builtin = brightred; - -[qmljs] -comment = brown; -delimiter = brightcyan; -delimiter.special = brightmagenta; -keyword = yellow; -number.builtin = brightgreen; -operator = brightcyan; -operator.word = yellow; -string = green; -string.special = brightgreen; -type = yellow; - -[r] -comment = brown; -constant = lightgray; -delimiter = brightcyan; -function.special = brightred; -keyword = yellow; -keyword.control = brightmagenta; -operator.word = yellow; -property = brightcyan; -string.special = brightgreen; -type = yellow; - -[ruby] -comment = brown; -delimiter = brightcyan; -function.special = brightred; -keyword.directive = magenta; -keyword.other = white; -operator.word = yellow; -string = green; -string.special = brightgreen; -variable.special = brightgreen; - -[rust] -comment = brown; -function = brightcyan; -function.macro = brightmagenta; -keyword = yellow; -label = cyan; -number.builtin = brightgreen; -string = green; -string.special = brightgreen; -tag.special = white; - -[scala] -comment = brown; -constant.builtin = brightmagenta; -delimiter = brightcyan; -delimiter.special = brightmagenta; -keyword = yellow; -operator = brightcyan; -string = green; -string.special = brightgreen; - -[slang] -attribute = brightmagenta; -boolean = brightmagenta; -constant = lightgray; -constant.builtin = brightmagenta; -constructor = brightcyan; -function = brightcyan; -function.builtin = brown; -function.call = brightcyan; -function.method = brightcyan; -function.method.call = brightcyan; -keyword = yellow; -keyword.coroutine = yellow; -keyword.exception = yellow; -keyword.import = yellow; -keyword.modifier = yellow; -keyword.operator = yellow; -keyword.type = yellow; -module = brightcyan; -operator = brightcyan; -property = brightcyan; -punctuation.bracket = brightcyan; -punctuation.delimiter = brightcyan; -string = green; -type = yellow; -type.builtin = brightgreen; -type.definition = yellow; -variable.builtin = brightred; -variable.member = lightgray; -variable.parameter = lightgray; - -[smalltalk] -comment = brown; -constant = lightgray; -delimiter = brightcyan; -function.special = brightred; -keyword = yellow; -keyword.control = brightmagenta; -label = cyan; -tag = brightcyan; - -[sql] -comment = brown; -delimiter = brightcyan; -operator = brightcyan; -string = green; -type = yellow; - -[strace] -comment = brown; -constant = lightgray; -delimiter = brightcyan; -function.special = brightred; -keyword = yellow; -string = green; -tag = brightcyan; - -[tcl] -comment = brown; -delimiter = brightcyan; -delimiter.special = brightmagenta; -keyword = yellow; -string = green; -string.special = brightgreen; - -[toml] -comment = brown; -delimiter = brightcyan; -keyword = yellow; -operator = brightcyan; -string.special = brightgreen; -tag = brightcyan; -variable = lightgray; - -[turtle] -base = brightmagenta; -comment = brown; -function.special = brightred; -keyword = yellow; -keyword.control = brightmagenta; -keyword.directive = magenta; -keyword.other = white; -label = cyan; -prefix = brightmagenta; -string = green; -string.special = brightgreen; - -[typescript] -comment = brown; -delimiter = brightcyan; -delimiter.special = brightmagenta; -keyword = yellow; -label = cyan; -number.builtin = brightgreen; -operator = brightcyan; -operator.word = yellow; -string = green; -string.special = brightgreen; - -[verilog] -comment = brown; -delimiter = brightcyan; -delimiter.special = brightmagenta; -keyword = yellow; -string = green; - -[vhdl] -function = brightcyan; -keyword = yellow; -keyword.directive = magenta; -keyword.other = white; -label = cyan; -string = green; -string.special = brightgreen; -tag = brightcyan; - -[xml] -comment.special = brightgreen; -constant = lightgray; -delimiter = brightcyan; -keyword = yellow; -tag.special = white; -variable = lightgray; - -[yaml] -comment = brown; -constant.builtin = brightmagenta; -delimiter = brightcyan; -label = cyan; -number = lightgray; -operator = brightcyan; -property.key = yellow; -string = green; -string.special = brightgreen; -type = yellow; diff --git a/misc/syntax-ts/display-names b/misc/syntax-ts/display-names deleted file mode 100644 index 87f84ee75c..0000000000 --- a/misc/syntax-ts/display-names +++ /dev/null @@ -1,160 +0,0 @@ -# Tree-sitter grammar display names -# -# Format: -# Spaces are allowed in the display name. - -ada Ada Program -agda Agda -asm Assembly -astro Astro -awk AWK Program -bash Shell Script -bicep Bicep -bison Yacc/Bison -bibtex BibTeX -blueprint Blueprint -c C Program -c_sharp C# Program -caddy Caddyfile -cairo Cairo -clojure Clojure -cmake CMake -cobol COBOL -commonlisp Lisp -cpp C++ Program -css CSS -csv CSV -cuda CUDA -cue CUE -d D Program -dart Dart Program -devicetree Device Tree -dhall Dhall -diff Diff -dockerfile Dockerfile -dot Graphviz DOT -earthfile Earthfile -editorconfig EditorConfig -elixir Elixir -elm Elm -erlang Erlang Program -fennel Fennel -fish Fish Shell -forth Forth -fortran Fortran Program -gdscript GDScript -gdshader GDShader -gitattributes Git Attributes -gitignore Git Ignore -gleam Gleam -glsl GLSL Shader -go Go Program -gotmpl Go Template -graphql GraphQL -groovy Groovy -hack Hack -hare Hare -haskell Haskell Program -haxe Haxe -hcl HCL -terraform Terraform -heex HEEx -hjson HJSON -hlsl HLSL Shader -html HTML -idl IDL -ini INI File -java Java Program -javascript JavaScript Program -jq jq -json JSON -json5 JSON5 -jsonnet Jsonnet -julia Julia -just Justfile -kcl KCL -kdl KDL -kotlin Kotlin Program -latex LaTeX -lua Lua Program -make Makefile -markdown Markdown -matlab MATLAB/Octave -meson Meson Build -muttrc Muttrc -nickel Nickel -nim Nim -nix Nix -nu Nushell -objc Objective-C -ocaml OCaml Program -odin Odin -org Org Mode -pascal Pascal Program -perl Perl Program -php PHP Program -pkl Pkl -po PO Translation -powershell PowerShell -prisma Prisma -properties Java Properties -proto Protocol Buffers -prql PRQL -puppet Puppet -purescript PureScript -python Python Program -qmljs QML -r R Program -racket Racket -rescript ReScript -robot Robot Framework -roc Roc -ron RON -rst reStructuredText -ruby Ruby Program -rust Rust Program -satysfi SATySFi -scala Scala Program -scheme Scheme -scss SCSS -slim Slim -slint Slint -smalltalk Smalltalk -sml Standard ML -snakemake Snakemake -solidity Solidity -sql SQL -starlark Starlark -strace Strace Output -svelte Svelte -swift Swift Program -tcl Tcl Program -teal Teal -templ Templ -tera Tera -textproto Text Proto -thrift Thrift -tlaplus TLA+ -tmux tmux Config -toml TOML -tsv TSV -turtle RDF Turtle -twig Twig -typescript TypeScript Program -typst Typst -usd USD -vala Vala -verilog Verilog/SystemVerilog -vhdl VHDL Program -vim Vim Script -vue Vue -wgsl WGSL -wit WIT -xml XML -xquery XQuery -yaml YAML -yang YANG -yuck Yuck -zeek Zeek -zig Zig -zsh Zsh Script diff --git a/misc/syntax-ts/extensions b/misc/syntax-ts/extensions deleted file mode 100644 index 4a1a7eaa60..0000000000 --- a/misc/syntax-ts/extensions +++ /dev/null @@ -1,153 +0,0 @@ -# Tree-sitter grammar extension mappings -# -# Format: ... -# Extensions must start with a dot. -# MC matches the file's extension against these to select a grammar. - -ada .adb .ads .ada -agda .agda -asm .s .S .asm -astro .astro -awk .awk .gawk .mawk -bash .sh .bash -bicep .bicep -bison .y .yy .yxx .yacc -bibtex .bib -blueprint .blp -c .c .h -c_sharp .cs -caddy .caddyfile -cairo .cairo -clojure .clj .cljs .cljc .edn -cmake .cmake -cobol .cob .cbl .cpy -commonlisp .lisp .cl .lsp .asd .el -cpp .cpp .cxx .cc .c++ .hpp .hxx .hh .h++ .ii .i -css .css -csv .csv -cuda .cu .cuh -cue .cue -d .d .di -dart .dart -devicetree .dts .dtsi -dhall .dhall -diff .diff .patch -dockerfile .dockerfile -dot .dot .gv -elixir .ex .exs -elm .elm -erlang .erl .hrl -fennel .fnl -fish .fish -forth .4th .fth .forth -fortran .f90 .f95 .f03 .f08 .f .for .fpp .ftn -gdscript .gd -gdshader .gdshader -gleam .gleam -glsl .glsl .vert .frag .geom .tesc .tese .comp -go .go -gotmpl .tmpl .tpl .gotmpl .gohtml .gotxt -graphql .graphql .gql -groovy .groovy .gradle -hack .hack -hare .hare -haskell .hs .lhs -haxe .hx -hcl .hcl -terraform .tf .tfvars -heex .heex -hjson .hjson -hlsl .hlsl -html .html .htm .xhtml -idl .idl -ini .ini .cfg .inf -java .java -javascript .js .mjs .cjs .jsx -jq .jq -json .json .jsonc -json5 .json5 -jsonnet .jsonnet .libsonnet -julia .jl -kcl .k -kdl .kdl -kotlin .kt .kts -latex .tex .ltx .sty .cls -lua .lua -make .mk -markdown .md .markdown .mkd -matlab .m .octave -nickel .ncl -nim .nim .nims .nimble -nix .nix -nu .nu -objc .mm -ocaml .ml .mli -odin .odin -org .org -pascal .pas .pp .lpr .dpr -perl .pl .pm .t -php .php .phtml -pkl .pkl -po .po .pot -powershell .ps1 .psm1 .psd1 -prisma .prisma -properties .properties -proto .proto -prql .prql -puppet .puppet -purescript .purs -python .py .pyw .pyi -qmljs .qml -r .r .R .Rmd -racket .rkt -rescript .res .resi -robot .robot -roc .roc -ron .ron -rst .rst -ruby .rb .rake .gemspec -rust .rs -satysfi .saty .satyh -scala .scala .sc .sbt -scheme .scm .ss -scss .scss -slim .slim -smalltalk .st -slint .slint -sml .sml .sig .fun -snakemake .smk -solidity .sol -sql .sql -starlark .star .bzl -strace .strace -svelte .svelte -swift .swift -tcl .tcl .tk .wish .exp -teal .tl -templ .templ -tera .tera -textproto .textproto .pbtxt -thrift .thrift -tlaplus .tla -toml .toml -tsv .tsv -turtle .ttl .turtle -twig .twig -typescript .ts .mts .cts .tsx -typst .typ -usd .usda .usd -vala .vala .vapi -verilog .v .vh .sv .svh -vhdl .vhd .vhdl -vim .vim -vue .vue -wgsl .wgsl -wit .wit -xml .xml .xsl .xslt .xsd .svg .rss .atom .plist -xquery .xq .xquery -yaml .yml .yaml -yang .yang -yuck .yuck -zeek .zeek .bro -zig .zig -zsh .zsh diff --git a/misc/syntax-ts/filenames b/misc/syntax-ts/filenames deleted file mode 100644 index 285a6c9521..0000000000 --- a/misc/syntax-ts/filenames +++ /dev/null @@ -1,20 +0,0 @@ -# Tree-sitter grammar filename mappings -# -# Format: ... -# Matches exact basenames (no path component). - -bash .bashrc .bash_profile .profile PKGBUILD -caddy Caddyfile -cmake CMakeLists.txt -dockerfile Dockerfile -earthfile Earthfile -editorconfig .editorconfig -gitattributes .gitattributes -gitignore .gitignore -just justfile Justfile -make Makefile makefile GNUmakefile -meson meson.build meson_options.txt -muttrc muttrc .muttrc .neomuttrc neomuttrc -ruby Vagrantfile Gemfile Rakefile -tmux .tmux.conf -zsh .zshrc .zshenv .zprofile .zlogin .zlogout diff --git a/misc/syntax-ts/queries/ada-highlights.scm b/misc/syntax-ts/queries/ada-highlights.scm deleted file mode 100644 index 2084283f1e..0000000000 --- a/misc/syntax-ts/queries/ada-highlights.scm +++ /dev/null @@ -1,134 +0,0 @@ -;; Tree-sitter highlight queries for Ada language -;; Colors aligned with MC's ada95.syntax - -;; General keywords -> yellow (@keyword) -[ - "abort" - "abs" - "and" - "delay" - "delta" - "digits" - "do" - "in" - "is" - "mod" - "not" - "null" - "of" - "or" - "others" - "out" - "pragma" - "raise" - "range" - "renames" - "return" - "reverse" - "separate" - "task" - "use" - "with" - "xor" -] @keyword - -;; Control flow -> brightred (@function.special) -[ - "begin" - "case" - "declare" - "else" - "elsif" - "end" - "entry" - "exception" - "exit" - "for" - "if" - "loop" - "private" - "protected" - "select" - "then" - "until" - "when" - "while" -] @function.special - -;; Type-like keywords -> cyan (@label) -[ - "array" - "record" - "some" - "subtype" -] @label - -;; Declaration keywords -> brightcyan (@property) -[ - "abstract" - "accept" - "access" - "all" - "at" - "constant" - "goto" - "interface" - "limited" - "overriding" - "tagged" - "type" -] @property - -;; Definition keywords -> brightmagenta (@constant.builtin) -[ - "body" - "function" - "generic" - "new" - "package" - "procedure" -] @constant.builtin - -(comment) @comment -(string_literal) @string -(character_literal) @string.special - -;; Operators -> brightgreen (@string.special) to match MC -[ - "+" - "-" - "*" - "/" - "**" - "&" - "=" - "/=" - "<" - ">" - "<=" - ">=" - ":=" - "=>" - ".." - "<>" -] @string.special - -;; Delimiters -> brightcyan (@delimiter) -[ - "." - "," - ":" - ";" - "(" - ")" -] @delimiter - -;; Predefined type names -> yellow (keyword) to match legacy -((identifier) @keyword - (#any-of? @keyword - "Boolean" "Integer" "Natural" "Positive" "Float" - "Character" "String" "Duration" - "Wide_Character" "Wide_String" - "Wide_Wide_Character" "Wide_Wide_String")) - -(label (identifier) @label) diff --git a/misc/syntax-ts/queries/asm-highlights.scm b/misc/syntax-ts/queries/asm-highlights.scm deleted file mode 100644 index e9d8f2b8dd..0000000000 --- a/misc/syntax-ts/queries/asm-highlights.scm +++ /dev/null @@ -1,37 +0,0 @@ -;; Tree-sitter highlight queries for x86 Assembly -;; Colors aligned with MC's assembler.syntax -;; MC: keywords=white, registers=brightmagenta, comments=brown, strings=green - -;; Registers -> brightmagenta (@keyword.control) -(reg) @keyword.control - -;; Labels -> cyan (@label) -(label) @label - -;; Instructions (opcode mnemonics) -> white (@keyword.other) -(instruction) @keyword.other - -;; Meta directives -> white (@keyword.other) -(meta) @keyword.other - -;; Constants -> white (@keyword.other) -(const) @keyword.other - -;; Pointer types -> white (@keyword.other) -[ - "byte" - "word" - "dword" - "qword" - "ptr" -] @keyword.other - -;; Comments -> brown -(line_comment) @comment -(block_comment) @comment - -;; Strings -> green -(string) @string - -;; Identifiers -(ident) @constant diff --git a/misc/syntax-ts/queries/awk-highlights.scm b/misc/syntax-ts/queries/awk-highlights.scm deleted file mode 100644 index ffc51947e7..0000000000 --- a/misc/syntax-ts/queries/awk-highlights.scm +++ /dev/null @@ -1,89 +0,0 @@ -;; Tree-sitter highlight queries for AWK language -;; Colors aligned with MC's default awk.syntax - -;; Control flow keywords -> white (@keyword.other) -[ - "if" - "else" - "while" - "for" - "do" - "delete" - "exit" - "return" - "in" - "getline" - "switch" - "case" - "default" -] @keyword.other - -(break_statement) @keyword.other -(continue_statement) @keyword.other -(next_statement) @keyword.other -(nextfile_statement) @keyword.other - -;; BEGIN/END -> brightred (@function.special) -- MC uses red -[ - "BEGIN" - "END" - "BEGINFILE" - "ENDFILE" -] @function.special - -;; function keyword -> brightmagenta (@delimiter.special) -- MC uses brightmagenta -"function" @delimiter.special - -;; func (gawk extension) -> brightmagenta (@delimiter.special) -"func" @delimiter.special - -(comment) @comment -(string) @string -(regex) @string.special - -(field_ref) @variable.builtin - -;; print/printf -> white (@keyword.other) -- MC uses white -[ - "print" - "printf" -] @keyword.other - -;; Operators -> yellow (@operator.word) -- MC uses yellow/24 for math/comparison operators -[ - "+" - "-" - "*" - "/" - "%" - "^" - "**" - "=" - "+=" - "-=" - "*=" - "/=" - "%=" - "^=" - "==" - "!=" - "<" - ">" - "<=" - ">=" - "&&" - "||" - "!" - "~" - "!~" - "++" - "--" - "?" - ":" -] @operator.word - -;; Delimiters -[ - "," - ";" -] @delimiter diff --git a/misc/syntax-ts/queries/bash-highlights.scm b/misc/syntax-ts/queries/bash-highlights.scm deleted file mode 100644 index 0a0c17cdba..0000000000 --- a/misc/syntax-ts/queries/bash-highlights.scm +++ /dev/null @@ -1,192 +0,0 @@ -;; Tree-sitter highlight queries for Bash/Shell -;; Colors matched to MC's built-in sh.syntax - -;; ----------------------------------------------------------- -;; 1. Language keywords that are grammar tokens -> yellow -;; sh.syntax: break case continue declare do done echo elif -;; else esac exit export fi for getopts if in printf read -;; return select set shift source then trap umask unset -;; until wait while clear -;; ----------------------------------------------------------- - -;; Keywords that are anonymous grammar tokens -[ - "if" - "then" - "else" - "elif" - "fi" - "case" - "esac" - "for" - "while" - "until" - "do" - "done" - "in" - "select" - "declare" - "local" - "export" - "readonly" - "typeset" - "unset" -] @keyword - -;; "function" keyword -> brightmagenta -"function" @keyword.function - -;; ----------------------------------------------------------- -;; 2. Function definitions -> brightmagenta -;; sh.syntax: function...() and name() at line start -;; ----------------------------------------------------------- - -;; function foo() { ... } -(function_definition name: (word) @function.definition) -(function_definition "(" @function.definition) -(function_definition ")" @function.definition) - -;; ----------------------------------------------------------- -;; 3. Commands -> cyan (external) or yellow (builtins) -;; The builtin list matches sh.syntax yellow keywords. -;; Security commands (gpg, ssh, etc.) -> red. -;; Captures are on command_name (same node width) so -;; pattern order determines priority: builtins first. -;; ----------------------------------------------------------- - -;; Shell builtins -> yellow (must be before generic @function) -((command_name) @keyword - (#any-of? @keyword - "echo" "printf" "read" "set" "eval" "exec" "exit" - "return" "source" "trap" "wait" "getopts" "umask" - "shift" "break" "continue" "clear")) - -;; Security commands -> red -((command_name) @function.security - (#any-of? @function.security - "gpg" "md5sum" "openssl" "ssh" "scp")) - -;; All other commands -> cyan -(command_name) @function - -;; ----------------------------------------------------------- -;; 5. Comments -> brown -;; sh.syntax: context # \n brown -;; ----------------------------------------------------------- - -;; Shebang -> brightcyan (distinct from plain comments) -;; sh.syntax: context exclusive #! \n brightcyan/black -((comment) @comment.shebang - (#match? @comment.shebang "^#!")) - -(comment) @comment - -;; ----------------------------------------------------------- -;; 6. Strings -> green -;; sh.syntax: context " " green / context ' ' green -;; ----------------------------------------------------------- - -(string) @string -(raw_string) @string -(ansi_c_string) @string -(heredoc_body) @string -(heredoc_start) @string -(heredoc_end) @string - -;; ----------------------------------------------------------- -;; 7. Variable expansions -;; ----------------------------------------------------------- - -;; $VAR -> brightgreen -;; sh.syntax: keyword wholeright $+ brightgreen / keyword $ brightgreen -(simple_expansion) @variable.special - -;; Special variables $?, $#, $@, $*, $-, $$, $!, $_ -> brightred -;; sh.syntax: keyword $? brightred etc. -;; Capture both the $ and the special name inside simple_expansion -(simple_expansion "$" @variable.builtin (special_variable_name) @variable.builtin) - -;; Positional parameters $0-$9 -> brightred -;; sh.syntax: keyword wholeright $[0123456789] brightred -(simple_expansion "$" @variable.builtin - ((variable_name) @variable.positional - (#match? @variable.positional "^[0-9]$"))) - -;; ${VAR} -> brightgreen -;; sh.syntax: keyword ${*} brightgreen -(expansion) @variable.special - -;; $() command substitution delimiters -> brightgreen -;; sh.syntax: keyword $(*) brightgreen -;; Only color the $( and ) delimiters, not the whole content, -;; so commands inside get their normal colors (same as standalone). -"$(" @variable.special - -;; Backticks ` -> brightred -;; sh.syntax: keyword ` brightred -"`" @punctuation.backtick - -;; Variable names in assignments (left side of =) -> default (white) -;; sh.syntax does not color assignment targets -(variable_assignment - name: (variable_name) @variable) - -;; ----------------------------------------------------------- -;; 8. Punctuation and operators -;; ----------------------------------------------------------- - -;; ;; -> brightred -;; sh.syntax: keyword ;; brightred -";;" @punctuation.special - -;; ; -> brightcyan -;; sh.syntax: keyword ; brightcyan -";" @delimiter - -;; { } in compound statements -> brightcyan -;; sh.syntax: keyword { brightcyan / keyword } brightcyan -;; NOTE: not capturing bare "{" "}" globally to avoid overriding -;; the green color inside ${VAR} expansions (narrower wins) -(compound_statement "{" @delimiter) -(compound_statement "}" @delimiter) - -;; ( ) -> brightcyan -[ - "(" - ")" -] @delimiter - -;; Heredoc operators -> brightcyan -"<<" @operator -"<<-" @operator - -;; File descriptors (2>&1) -> brightred -;; sh.syntax: keyword whole 2>&1 brightred etc. -(file_descriptor) @variable.builtin - -;; Redirections -> brightcyan -[ - ">" - ">>" - "<" - ">&" - "&>" -] @operator - -;; Logical/pipe operators -> brightcyan -[ - "&&" - "||" - "|" - "&" -] @operator - -;; ----------------------------------------------------------- -;; 9. Miscellaneous -;; ----------------------------------------------------------- - -;; Regex patterns -> brightmagenta -(regex) @string.special - -;; Test operators (-f, -d, -z, -eq, -ne, -lt, -gt, etc.) -(test_operator) @operator diff --git a/misc/syntax-ts/queries/bison-highlights.scm b/misc/syntax-ts/queries/bison-highlights.scm deleted file mode 100644 index f7828dc37d..0000000000 --- a/misc/syntax-ts/queries/bison-highlights.scm +++ /dev/null @@ -1,36 +0,0 @@ -;; Tree-sitter highlight queries for Yacc/Bison -;; Colors aligned with MC's yxx.syntax -;; MC: keywords=yellow, preprocessor=brightred, comments=brown, strings=green - -;; Bison declaration names (%token, %type, %left, etc.) -> yellow (@keyword) -(declaration_name) @keyword - -;; Section markers -> brightcyan (@delimiter) -[ - "%{" - "%}" - "%%" -] @delimiter - -;; Comments -> brown -(comment) @comment - -;; Strings -> green -(string_literal) @string -(char_literal) @string.special - -;; Number literals -> lightgray -(number_literal) @number - -;; Grammar rule identifiers -> brightcyan (@tag) -(grammar_rule_identifier) @tag - -;; Type tags -> yellow (@type) -(type_tag) @type - -;; Punctuation -[ - ":" - "|" - ";" -] @delimiter diff --git a/misc/syntax-ts/queries/c-highlights.scm b/misc/syntax-ts/queries/c-highlights.scm deleted file mode 100644 index 1d804137ff..0000000000 --- a/misc/syntax-ts/queries/c-highlights.scm +++ /dev/null @@ -1,160 +0,0 @@ -;; Tree-sitter highlight queries for C language -;; Colors aligned with MC's default c.syntax - -;; Keywords -> yellow -[ - "break" - "case" - "const" - "continue" - "default" - "do" - "else" - "enum" - "for" - "goto" - "if" - "inline" - "return" - "sizeof" - "struct" - "switch" - "typedef" - "union" - "volatile" - "while" -] @keyword - -;; Storage class specifiers -> yellow (same as keyword in MC) -[ - "auto" - "extern" - "register" - "static" -] @keyword - -;; C11/C23 keywords that are grammar tokens -> yellow -[ - "_Atomic" - "_Noreturn" - "alignas" - "constexpr" -] @keyword - -;; C11/C23 keywords recognized as type identifiers by the grammar -((type_identifier) @keyword - (#any-of? @keyword - "_Bool" "_Complex" "_Imaginary" "wchar_t")) - -;; C11/C23 keywords recognized as identifiers by the grammar -((identifier) @keyword - (#any-of? @keyword - "static_assert" "_Static_assert" - "_Alignas" "_Alignof" - "_Noreturn" "_Generic" - "thread_local" "_Thread_local" - "typeof" "typeof_unqual" - "alignof" "restrict" "asm")) - -;; Ellipsis -> yellow -"..." @keyword - -;; Preprocessor -> brightred -[ - "#define" - "#elif" - "#else" - "#endif" - "#if" - "#ifdef" - "#ifndef" - "#include" -] @function.special -(preproc_directive) @function.special - -;; Arithmetic/comparison/logical operators -> yellow -[ - "!" - "!=" - "%" - "&&" - "*" - "+" - "++" - "+=" - "-" - "--" - "-=" - "/" - "<" - "<<" - "<=" - "=" - "==" - ">" - ">=" - ">>" - "||" -] @operator.word - -;; Field operators -> yellow -[ - "." - "->" -] @operator.word - -;; Bitwise operators -> brightmagenta -[ - "&" - "|" - "^" - "~" -] @delimiter.special - -;; Semicolons -> brightmagenta -";" @delimiter.special - -;; Ternary operators -> brightcyan -[ - "?" - ":" -] @delimiter - -;; Brackets -> brightcyan -[ - "," - "(" - ")" - "[" - "]" - "{" - "}" -] @delimiter - -;; Strings -> green -(string_literal) @string -(system_lib_string) @string - -;; Escape sequences inside strings -> brightgreen -(escape_sequence) @string.special - -;; Constants -> lightgray -(null) @constant -(true) @constant -(false) @constant - -;; Numbers -> lightgray -(number_literal) @number - -;; Character literals -> brightgreen -(char_literal) @string.special - -;; Labels -> cyan -(statement_identifier) @label - -;; Types -> yellow -(primitive_type) @type -(sized_type_specifier) @type - -;; Comments -> brown -(comment) @comment diff --git a/misc/syntax-ts/queries/c_sharp-highlights.scm b/misc/syntax-ts/queries/c_sharp-highlights.scm deleted file mode 100644 index b16fa87d58..0000000000 --- a/misc/syntax-ts/queries/c_sharp-highlights.scm +++ /dev/null @@ -1,145 +0,0 @@ -;; Tree-sitter highlight queries for C# language -;; Colors aligned with MC's cs.syntax - -;; Most keywords -> yellow (@keyword) -- MC uses yellow for majority -[ - "abstract" - "as" - "async" - "await" - "base" - "break" - "case" - "catch" - "checked" - "const" - "continue" - "default" - "do" - "event" - "explicit" - "extern" - "finally" - "fixed" - "for" - "foreach" - "goto" - "if" - "implicit" - "in" - "is" - "lock" - "new" - "operator" - "out" - "override" - "params" - "partial" - "protected" - "readonly" - "ref" - "return" - "sealed" - "sizeof" - "stackalloc" - "static" - "switch" - "this" - "throw" - "try" - "typeof" - "unchecked" - "unsafe" - "var" - "virtual" - "volatile" - "where" - "while" - "yield" -] @keyword - -;; Type declaration keywords -> white (@keyword.other) -- MC uses white -[ - "class" - "delegate" - "enum" - "interface" - "namespace" - "struct" - "record" -] @keyword.other - -;; Access modifiers -> brightred (@function.special) -- MC uses brightred -[ - "internal" - "private" - "public" -] @function.special - -;; using -> brightcyan (@function) -- MC uses brightcyan -"using" @function - -(predefined_type) @type - -(comment) @comment -(string_literal) @string -(verbatim_string_literal) @string -(interpolated_string_expression) @string -(character_literal) @string.special - -;; null, true, false -> yellow (@keyword) -- MC uses yellow -(null_literal) @keyword -(boolean_literal) @keyword - -(labeled_statement - (identifier) @label) - -;; Operators -> yellow (@operator.word) -- MC uses yellow -[ - "+" - "-" - "*" - "/" - "%" - "=" - "+=" - "-=" - "*=" - "/=" - "%=" - "==" - "!=" - "<" - ">" - "<=" - ">=" - "&&" - "||" - "!" - "&" - "|" - "^" - "~" - "<<" - ">>" - "++" - "--" - "??" - "=>" -] @operator.word - -;; Delimiters -> brightcyan (@delimiter) -- MC uses brightcyan -[ - "." - "," - ":" - "(" - ")" - "[" - "]" - "{" - "}" -] @delimiter - -;; Semicolons -> brightmagenta (@delimiter.special) -- MC uses brightmagenta -";" @delimiter.special diff --git a/misc/syntax-ts/queries/caddy-highlights.scm b/misc/syntax-ts/queries/caddy-highlights.scm deleted file mode 100644 index 475fdcaac4..0000000000 --- a/misc/syntax-ts/queries/caddy-highlights.scm +++ /dev/null @@ -1,37 +0,0 @@ -;; Tree-sitter highlight queries for Caddyfile -;; Colors aligned with MC's caddyfile.syntax -;; MC: directives=yellow, subdirectives=brightcyan, comments=brown, strings=green - -;; Directives -> yellow (@keyword) -(directive) @keyword - -;; Server addresses -> brightmagenta (@keyword.control) -(address) @keyword.control - -;; Snippet names -> brightmagenta (@keyword.control) -(snippet_name) @keyword.control - -;; Matchers -> brightmagenta (@keyword.control) -(matcher) @keyword.control - -;; Booleans -> brightgreen (@string.special) -(boolean) @string.special - -;; Placeholders -> brightred (@function.special) -(placeholder) @function.special - -;; Comments -> brown -(comment) @comment - -;; Strings -> green -(quoted_string_literal) @string -(string_literal) @constant - -;; Numeric literals -> brightgreen (@string.special) -(numeric_literal) @string.special - -;; Block delimiters -> yellow (@keyword) -[ - "{" - "}" -] @keyword diff --git a/misc/syntax-ts/queries/cmake-highlights.scm b/misc/syntax-ts/queries/cmake-highlights.scm deleted file mode 100644 index fe03f3bbcf..0000000000 --- a/misc/syntax-ts/queries/cmake-highlights.scm +++ /dev/null @@ -1,45 +0,0 @@ -;; Tree-sitter highlight queries for CMake language -;; Colors aligned with MC's default cmake.syntax - -;; Control flow keywords -> brightred (function.special, MC uses brightred for all commands) -(if) @function.special -(else) @function.special -(elseif) @function.special -(endif) @function.special -(foreach) @function.special -(endforeach) @function.special -(while) @function.special -(endwhile) @function.special -(function) @function.special -(endfunction) @function.special -(macro) @function.special -(endmacro) @function.special -(block) @function.special -(endblock) @function.special - -;; Commands -> brightred (function.special) -(normal_command - (identifier) @function.special) - -;; Comments -> brown (comment) -(line_comment) @comment -(bracket_comment) @comment - -;; Strings -> green (string) -(quoted_argument) @string - -;; Unquoted arguments -> default (variable) -(unquoted_argument) @variable - -;; All-caps arguments (properties/constants) -> white (tag.special) -((unquoted_argument) @tag.special - (#match? @tag.special "^[A-Z][A-Z\\d_]*$")) - -;; Variable references ${...} -> brightgreen (variable.special) -(variable_ref) @variable.special - -;; Parentheses -> brightcyan (delimiter) -[ - "(" - ")" -] @delimiter diff --git a/misc/syntax-ts/queries/cobol-highlights.scm b/misc/syntax-ts/queries/cobol-highlights.scm deleted file mode 100644 index 1d2a5dff04..0000000000 --- a/misc/syntax-ts/queries/cobol-highlights.scm +++ /dev/null @@ -1,40 +0,0 @@ -;; Tree-sitter highlight queries for COBOL -;; Colors aligned with MC's cobol.syntax -;; MC: keywords=yellow, division headings=cyan, I/O=brightred, comments=brown, strings=green - -;; Division/section headings -> cyan (@label) -(identification_division) @label -(environment_division) @label -(data_division) @label -(procedure_division) @label -(section_header) @label -(paragraph_header) @label - -;; Comments -> brown -(comment) @comment -(comment_entry) @comment - -;; Strings -> green -(string) @string -(x_string) @string -(h_string) @string -(n_string) @string - -;; Numbers -(integer) @constant -(decimal) @constant -(number) @constant - -;; Level numbers -> brightgreen (@string.special) -(level_number) @string.special -(level_number_88) @string.special - -;; PIC patterns -> brightgreen (@string.special) -(picture_x) @string.special -(picture_9) @string.special -(picture_a) @string.special -(picture_n) @string.special -(picture_edit) @string.special - -;; Identifiers -(qualified_word) @constant diff --git a/misc/syntax-ts/queries/commonlisp-highlights.scm b/misc/syntax-ts/queries/commonlisp-highlights.scm deleted file mode 100644 index d403aa35be..0000000000 --- a/misc/syntax-ts/queries/commonlisp-highlights.scm +++ /dev/null @@ -1,70 +0,0 @@ -;; Tree-sitter highlight queries for Common Lisp -;; Colors aligned with MC's lisp.syntax -;; Note: grammar directory is "lisp" but grammar name is "commonlisp" - -;; Core keywords/functions -> yellow (@keyword) -;; MC colors: apply, car, cdr, cons, defun, if, let, setq, etc. = yellow -[ - "defun" - "defmacro" - "defgeneric" - "defmethod" - "if" - "when" - "unless" - "do" - "loop" - "for" - "and" - "as" - "with" - "while" - "until" - "repeat" - "return" - "else" - "finally" - "in" - "then" - "into" -] @keyword - -(defun_keyword) @keyword - -(comment) @comment -(block_comment) @comment - -(str_lit) @string - -(num_lit) @number - -;; nil -> brightred (@variable.builtin) -- MC uses brightred for nil, t, lambda -(nil_lit) @variable.builtin - -;; :keyword-args -> white (@keyword.other) -- MC uses white for :keywords -(kwd_lit) @keyword.other - -(sym_lit) @variable - -(defun_header - function_name: (sym_lit) @function) -(defun_header - lambda_list: (list_lit - (sym_lit) @variable)) - -;; Function calls (first element of list) -(list_lit - . - (sym_lit) @function) - -;; Parentheses -> brightcyan (@delimiter) -- MC uses brightcyan -[ - "(" - ")" -] @delimiter - -(char_lit) @string.special - -;; Quote/unquote -> brightmagenta (@constant.builtin) -- MC uses brightmagenta for #', ', , -(quoting_lit) @constant.builtin -(unquoting_lit) @constant.builtin diff --git a/misc/syntax-ts/queries/cpp-highlights.scm b/misc/syntax-ts/queries/cpp-highlights.scm deleted file mode 100644 index 96c1f915f4..0000000000 --- a/misc/syntax-ts/queries/cpp-highlights.scm +++ /dev/null @@ -1,163 +0,0 @@ -;; Tree-sitter highlight queries for C++ language -;; Colors aligned with MC's default cxx.syntax - -;; Keywords -> yellow -[ - "alignas" - "alignof" - "break" - "case" - "catch" - "class" - "co_await" - "co_return" - "co_yield" - "concept" - "const" - "consteval" - "constexpr" - "constinit" - "continue" - "decltype" - "default" - "delete" - "do" - "else" - "enum" - "explicit" - "extern" - "final" - "for" - "friend" - "goto" - "if" - "inline" - "mutable" - "namespace" - "new" - "noexcept" - "operator" - "override" - "private" - "protected" - "public" - "requires" - "return" - "sizeof" - "static" - "static_assert" - "struct" - "switch" - "template" - "throw" - "try" - "typedef" - "typename" - "union" - "using" - "virtual" - "volatile" - "while" -] @keyword - -;; true/false/nullptr/this -> yellow (keyword) in MC cxx.syntax -(true) @keyword -(false) @keyword -"nullptr" @keyword -(this) @keyword - -;; Preprocessor -> brightred -[ - "#define" - "#elif" - "#else" - "#endif" - "#if" - "#ifdef" - "#ifndef" - "#include" -] @function.special -(preproc_directive) @function.special - -;; Operators -> yellow -[ - "!" - "!=" - "%" - "&&" - "*" - "+" - "++" - "+=" - "-" - "--" - "-=" - "/" - "<" - "<<" - "<=" - "=" - "==" - ">" - ">=" - ">>" - "||" - "::" -] @operator.word - -;; Semicolons -> brightmagenta -";" @delimiter.special - -;; Brackets/parens -> brightcyan -[ - "," - "(" - ")" - "[" - "]" - "{" - "}" -] @delimiter - -;; Other delimiters -> brightcyan -[ - "." - ":" -] @delimiter - -(string_literal) @string -(system_lib_string) @string -(raw_string_literal) @string - -(null) @keyword -(char_literal) @string.special - -(statement_identifier) @label -(primitive_type) @type -(sized_type_specifier) @type -(auto) @type - -;; Bitwise operators -> brightmagenta -[ - "&" - "|" - "^" - "~" -] @delimiter.special - -;; Alternative operator tokens -> yellow (keyword) -[ - "and" - "or" - "not" - "xor" - "bitand" - "bitor" - "compl" - "and_eq" - "or_eq" - "xor_eq" - "not_eq" -] @keyword - -(comment) @comment diff --git a/misc/syntax-ts/queries/css-highlights.scm b/misc/syntax-ts/queries/css-highlights.scm deleted file mode 100644 index 672f571ff9..0000000000 --- a/misc/syntax-ts/queries/css-highlights.scm +++ /dev/null @@ -1,80 +0,0 @@ -;; Tree-sitter highlight queries for CSS language -;; Colors aligned with MC's default css.syntax - -(comment) @comment - -;; Tag selectors -> white -(tag_name) @tag.special -;; Class selectors -> green -(class_selector) @string -(id_selector) @string -;; Pseudo-classes -> brightmagenta -(pseudo_class_selector (class_name) @delimiter.special) -;; Pseudo-elements -> white -(pseudo_element_selector (tag_name) @tag.special) - -;; Properties -> lightgray -(property_name) @constant - -(string_value) @keyword.other -;; Color values like #AABBCC -> red -(color_value) @comment.error -;; Numbers -> brightgreen (MC uses brightgreen for values) -(integer_value) @string.special -(float_value) @string.special - -;; Plain values (keywords like bold, center, etc) -> brightgreen -(plain_value) @string.special - -;; !important -> brightred -(important) @function.special - -;; Function names like rgb(), url() -> magenta -(function_name) @keyword.directive - -;; At-rules -> brightred -[ - "@media" - "@import" - "@charset" - "@keyframes" - "@supports" - "@namespace" - "@scope" -] @function.special - -(at_keyword) @function.special - -[ - "and" - "not" - "or" - "only" -] @keyword - -[ - "~" - ">" - "+" - "=" - "^=" - "|=" - "~=" - "$=" - "*=" -] @operator - -;; Semicolons -> brightmagenta -[ - ";" -] @delimiter.special - -;; Other delimiters -> brightcyan -[ - "," - ":" - "::" -] @delimiter - -;; Units like px, em -> brightgreen (MC uses brightgreen for units) -(unit) @string.special diff --git a/misc/syntax-ts/queries/cuda-highlights.scm b/misc/syntax-ts/queries/cuda-highlights.scm deleted file mode 100644 index 307fdcb7e9..0000000000 --- a/misc/syntax-ts/queries/cuda-highlights.scm +++ /dev/null @@ -1,128 +0,0 @@ -;; Tree-sitter highlight queries for CUDA language -;; Colors aligned with MC's default cuda.syntax -;; CUDA uses the C++ tree-sitter grammar with additional CUDA-specific keywords - -;; CUDA-specific function/variable type qualifiers -> white (keyword.other) -((identifier) @keyword.other - (#match? @keyword.other "^__(global|device|host|shared|constant|managed|restrict|noinline|forceinline)__$")) - -;; CUDA built-in variables -> white (keyword.other) -((identifier) @keyword.other - (#match? @keyword.other "^(threadIdx|blockIdx|blockDim|gridDim|warpSize)$")) - -;; CUDA synchronization -> white (keyword.other) -((identifier) @keyword.other - (#match? @keyword.other "^__(syncthreads|threadfence)$")) - -[ - "break" - "case" - "catch" - "class" - "const" - "constexpr" - "continue" - "default" - "delete" - "do" - "else" - "enum" - "explicit" - "extern" - "final" - "for" - "friend" - "if" - "inline" - "namespace" - "new" - "noexcept" - "override" - "private" - "protected" - "public" - "return" - "sizeof" - "static" - "struct" - "switch" - "template" - "throw" - "try" - "typedef" - "typename" - "union" - "using" - "virtual" - "volatile" - "while" -] @keyword - -;; true/false/nullptr -> yellow (keyword) in MC cuda.syntax -(true) @keyword -(false) @keyword -"nullptr" @keyword -(this) @keyword - -;; Preprocessor -> brightred -[ - "#define" - "#elif" - "#else" - "#endif" - "#if" - "#ifdef" - "#ifndef" - "#include" -] @function.special -(preproc_directive) @function.special - -;; Operators -> yellow (@operator.word) -- MC uses yellow for all operators -[ - "--" - "-" - "-=" - "->" - "=" - "!=" - "*" - "&" - "&&" - "+" - "++" - "+=" - "<" - "==" - ">" - ">=" - "<=" - "||" - "!" - "~" - "<<" - ">>" - "%" - "/" - "|" - "^" - "::" -] @operator.word - -;; Separators -"." @delimiter -"," @delimiter -":" @delimiter -";" @delimiter.special - -(string_literal) @string -(system_lib_string) @string -(raw_string_literal) @string - -(null) @constant -(char_literal) @string.special - -(statement_identifier) @label -(primitive_type) @type -(sized_type_specifier) @type - -(comment) @comment diff --git a/misc/syntax-ts/queries/d-highlights.scm b/misc/syntax-ts/queries/d-highlights.scm deleted file mode 100644 index 84a677d9b9..0000000000 --- a/misc/syntax-ts/queries/d-highlights.scm +++ /dev/null @@ -1,173 +0,0 @@ -;; Tree-sitter highlight queries for D language -;; Colors aligned with MC's d.syntax -;; D keywords are named nodes (sym_abstract, sym_break, etc.), not anonymous tokens - -;; Keywords -> yellow (@keyword) -- MC uses yellow for most keywords -[ - (abstract) - (alias) - (assert) - (break) - (case) - (cast) - (catch) - (class) - (const) - (continue) - (debug) - (default) - (delegate) - (delete) - (deprecated) - (do) - (else) - (enum) - (export) - (extern) - (final) - (finally) - (for) - (foreach) - (foreach_reverse) - (function) - (goto) - (if) - (immutable) - (in) - (interface) - (invariant) - (is) - (lazy) - (mixin) - (module) - (new) - (nothrow) - (out) - (override) - (package) - (pragma) - (private) - (protected) - (public) - (pure) - (ref) - (return) - (scope) - (shared) - (static) - (struct) - (switch) - (synchronized) - (template) - (throw) - (try) - (typeid) - (typeof) - (union) - (unittest) - (version) - (while) - (with) -] @keyword - -;; import -> magenta (@keyword.directive) -- MC uses magenta -(import) @keyword.directive - -;; Types -> yellow (@type) -- MC uses yellow for types too -[ - (auto) - (bool) - (byte) - (ubyte) - (short) - (ushort) - (int) - (uint) - (long) - (ulong) - (float) - (double) - (real) - (char) - (wchar) - (dchar) - (string) - (wstring) - (dstring) - (size_t) - (ptrdiff_t) - (void) -] @type - -(comment) @comment -(string_literal) @string -(raw_string) @string -(hex_string) @string -(token_string) @string -(char_literal) @string.special - -;; true, false, null, super, this -> brightred (@variable.builtin) -;; MC uses brightred for these -(true) @variable.builtin -(false) @variable.builtin -(null) @variable.builtin -(super) @variable.builtin -(this) @variable.builtin - -;; Special keywords (__FILE__ etc.) -> red -- closest is @function.special (brightred) -(special_keyword) @function.special - -(module_declaration - (module_fqn) @type) - -;; Operators -> yellow (@operator.word) -- MC uses yellow for all operators -[ - "+" - "-" - "*" - "/" - "%" - "^^" - "~" - "&" - "|" - "^" - "<<" - ">>" - "=" - "+=" - "-=" - "*=" - "/=" - "%=" - "&=" - "|=" - "^=" - "<<=" - ">>=" - "==" - "!=" - "<" - ">" - "<=" - ">=" - "&&" - "||" - "!" - "++" - "--" - ".." - "=>" - ":" -] @operator.word - -;; Delimiters -> brightcyan (@delimiter) -- MC uses brightcyan for parens, braces, etc. -[ - "." - "," -] @delimiter - -;; Semicolons -> brightmagenta (@delimiter.special) -- MC uses brightmagenta -";" @delimiter.special - -(label (identifier) @label) diff --git a/misc/syntax-ts/queries/diff-highlights.scm b/misc/syntax-ts/queries/diff-highlights.scm deleted file mode 100644 index 5dce3fa23c..0000000000 --- a/misc/syntax-ts/queries/diff-highlights.scm +++ /dev/null @@ -1,35 +0,0 @@ -;; Tree-sitter highlight queries for Diff/Patch output -;; Colors aligned with MC's default diff.syntax - -;; # comments -> brightcyan (tag, MC uses brightcyan for # lines) -(comment) @tag - -;; diff command line -> brightred (function.special, MC uses white+red) -(command) @function.special - -;; file change headers (new file, deleted file, etc) -> brightmagenta -(file_change) @markup.heading - -;; --- +++ *** filenames -> brightmagenta (markup.heading) -(filename) @markup.heading - -;; similarity index -> brown (comment) -(similarity) @comment - -;; index abc..def -> brown (comment) -(index) @comment - -;; mode changes -> brown (comment) -(mode) @comment - -;; @@ location -> brightcyan (delimiter) -(location) @delimiter - -;; Additions -> brightgreen -(addition) @markup.addition - -;; Deletions -> brightred -(deletion) @markup.deletion - -;; Context lines -> yellow (keyword, MC default context is yellow) -(context) @keyword diff --git a/misc/syntax-ts/queries/dockerfile-highlights.scm b/misc/syntax-ts/queries/dockerfile-highlights.scm deleted file mode 100644 index c1a184ae30..0000000000 --- a/misc/syntax-ts/queries/dockerfile-highlights.scm +++ /dev/null @@ -1,59 +0,0 @@ -;; Tree-sitter highlight queries for Dockerfile -;; Colors aligned with MC's default dockerfile.syntax - -;; Instructions -> yellow (@keyword) -- MC uses yellow for all instructions -[ - "FROM" - "RUN" - "CMD" - "COPY" - "ADD" - "EXPOSE" - "ENV" - "WORKDIR" - "ENTRYPOINT" - "VOLUME" - "USER" - "ARG" - "LABEL" - "HEALTHCHECK" - "SHELL" - "AS" - "ONBUILD" - "STOPSIGNAL" -] @keyword - -;; MAINTAINER -> brightred (@function.special) -- MC uses brightred (deprecated) -"MAINTAINER" @function.special - -(comment) @comment - -(image_spec - name: (image_name) @function) -(image_spec - tag: (image_tag) @string.special) -(image_spec - digest: (image_digest) @string.special) - -(double_quoted_string) @string -(single_quoted_string) @string -(unquoted_string) @string - -;; Variable expansions -> brightgreen (@string.special) -- MC uses brightgreen for ${*} -(expansion) @string.special - -;; Port numbers -> brightgreen (@number.builtin) -- MC uses brightgreen for numbers -(expose_port) @number.builtin - -(label_pair - key: (unquoted_string) @property) - -(env_pair - name: (unquoted_string) @variable) - -(arg_instruction - name: (unquoted_string) @variable) - -;; Params (--mount, --from, etc.) -> brightmagenta (@delimiter.special) -;; MC uses brightmagenta for options -(param) @delimiter.special diff --git a/misc/syntax-ts/queries/dot-highlights.scm b/misc/syntax-ts/queries/dot-highlights.scm deleted file mode 100644 index 0fbd06ffb0..0000000000 --- a/misc/syntax-ts/queries/dot-highlights.scm +++ /dev/null @@ -1,58 +0,0 @@ -;; Tree-sitter highlight queries for Graphviz DOT -;; Colors aligned with MC's dot.syntax -;; MC: graph keywords=brightred, node/edge=yellow, attributes=white, comments=brown, strings=green - -;; Graph keywords -> brightred (@function.special) -[ - "strict" - "graph" - "digraph" -] @function.special - -;; Subgraph keyword -> brightred (@function.special) -"subgraph" @function.special - -;; Node/edge keywords -> yellow (@keyword) -[ - "node" - "edge" -] @keyword - -;; Edge operators -> brightred (@function.special) -[ - "->" - "--" -] @function.special - -;; Attribute names -> white (@keyword.other) -(attribute - name: (id) @keyword.other) - -;; Operators (=, :, +) -> yellow (@operator.word) -(operator) @operator.word - -;; Comments -> brown -(comment) @comment - -;; Strings -> green -(string_literal) @string - -;; HTML strings -> green -(html_string) @string - -;; Preprocessor -> brightred (@function.special) -(preproc) @function.special - -;; Brackets -> brightcyan (@delimiter) -[ - "{" - "}" - "[" - "]" -] @delimiter - -;; Punctuation -> brightcyan (@delimiter) -"," @delimiter - -;; Semicolons -> brightmagenta (@delimiter.special) -";" @delimiter.special diff --git a/misc/syntax-ts/queries/erlang-highlights.scm b/misc/syntax-ts/queries/erlang-highlights.scm deleted file mode 100644 index 7e1b6c4b7c..0000000000 --- a/misc/syntax-ts/queries/erlang-highlights.scm +++ /dev/null @@ -1,112 +0,0 @@ -;; Tree-sitter highlight queries for Erlang -;; Colors aligned with MC's erlang.syntax - -;; Control flow keywords -> yellow (@keyword) -;; MC uses yellow for after, begin, case, catch, cond, end, fun, if, etc. -[ - "fun" - "end" - "if" - "case" - "of" - "receive" - "after" - "when" - "begin" - "try" - "catch" - "throw" -] @keyword - -;; Module directives -> brightmagenta (@delimiter.special) -;; MC uses brightmagenta for -module, -export, -compile, etc. -(module_attribute) @delimiter.special -(module_export) @delimiter.special - -;; Word operators -> brown (@comment) -- MC uses brown for and, or, band, etc. -;; This is a deliberate choice: MC colors these same as comments (brown) -[ - "and" - "andalso" - "band" - "bnot" - "bor" - "bsl" - "bsr" - "bxor" - "div" - "not" - "or" - "orelse" - "rem" - "xor" -] @comment - -(comment) @comment - -(string) @string -(char) @string.special - -;; Atoms -> lightgray (@constant) -(atom) @constant - -;; Variables -> white (@keyword.other) -- MC uses white for Variables -(variable) @keyword.other - -(function_clause - name: (atom) @function) -(expr_function_call - (atom) @function) - -;; Operators -> yellow (@keyword) -- MC uses yellow for -> <- -[ - "->" - "=>" - ":=" - "=" - "!" - "|" - "::" - "<-" - "+" - "-" - "*" - "++" - "--" - "<<" - ">>" -] @keyword - -;; Comparison operators -> brown (@comment) -- MC uses brown -[ - "==" - "/=" - "=:=" - "=/=" - "<" - ">" - "=<" - ">=" -] @comment - -;; Delimiters -> brightcyan (@delimiter) -- MC uses brightcyan -[ - "," - "." - ";" - "(" - ")" - "[" - "]" -] @delimiter - -;; Curly braces -> cyan (@label) -- MC uses cyan for { } -[ - "{" - "}" -] @label - -;; Pipe operators -> brightcyan (@delimiter) -- MC uses brightcyan -[ - "||" -] @delimiter diff --git a/misc/syntax-ts/queries/fortran-highlights.scm b/misc/syntax-ts/queries/fortran-highlights.scm deleted file mode 100644 index 5e935b1d38..0000000000 --- a/misc/syntax-ts/queries/fortran-highlights.scm +++ /dev/null @@ -1,150 +0,0 @@ -;; Tree-sitter highlight queries for Fortran -;; Colors aligned with MC's fortran.syntax - -;; Type declarations -> brightcyan (@property) -- MC uses brightcyan for types -[ - "integer" - "real" - "character" - "logical" - "complex" - "double" -] @property - -;; Program structure keywords -> yellow (@keyword) -- MC uses yellow -[ - "program" - "end" - "subroutine" - "function" - "module" - "use" - "implicit" - "none" - "only" - "result" -] @keyword - -;; Declaration keywords -> brightcyan (@property) -- MC uses brightcyan -[ - "dimension" - "allocatable" - "intent" - "in" - "out" - "inout" - "parameter" - "data" - "block" - "common" - "external" - "format" - "type" - "class" - "interface" - "contains" - "public" - "private" - "abstract" - "extends" - "procedure" - "intrinsic" - "save" - "target" - "pointer" - "optional" - "value" - "volatile" - "sequence" - "generic" -] @property - -;; Control flow -> brightgreen (@number.builtin) -- MC uses brightgreen -[ - "do" - "if" - "then" - "else" - "elseif" - "endif" - "enddo" - "call" - "return" - "stop" - "continue" - "cycle" - "exit" - "goto" - "while" - "where" - "forall" - "select" - "case" - "default" - "associate" - "critical" - "concurrent" - "assign" - "entry" - "pause" - "allocate" -] @number.builtin - -;; I/O keywords -> brightmagenta (@constant.builtin) -- MC uses brightmagenta -[ - "read" - "write" - "print" - "open" - "close" - "inquire" - "rewind" - "backspace" - "endfile" -] @constant.builtin - -(comment) @comment - -(string_literal) @string - -;; Boolean literals -> brightred (@function.special) -- MC uses brightred -(boolean_literal) @function.special - -;; Operators -> yellow (@operator.word) -- MC uses yellow for arithmetic/comparison -[ - "=" - "+" - "-" - "*" - "/" - "**" - "==" - "/=" - "<" - ">" - "<=" - ">=" -] @operator.word - -;; Logical/relational operators -> brightred (@function.special) -- MC uses brightred -(logical_expression - operator: _ @function.special) -(relational_expression - operator: [ - ".eq." - ".ne." - ".lt." - ".gt." - ".le." - ".ge." - ] @function.special) -(unary_expression - operator: ".not." @function.special) - -;; Delimiters -[ - "," - "::" - "(" - ")" -] @delimiter diff --git a/misc/syntax-ts/queries/glsl-highlights.scm b/misc/syntax-ts/queries/glsl-highlights.scm deleted file mode 100644 index 278736219d..0000000000 --- a/misc/syntax-ts/queries/glsl-highlights.scm +++ /dev/null @@ -1,130 +0,0 @@ -;; Tree-sitter highlight queries for GLSL (OpenGL Shading Language) -;; Colors aligned with MC's default glsl.syntax - -;; Keywords -> yellow -[ - "break" - "case" - "const" - "continue" - "default" - "do" - "else" - "for" - "if" - "return" - "struct" - "switch" - "while" - "uniform" - "varying" - "attribute" - "in" - "out" - "inout" - "layout" - "flat" - "smooth" - "noperspective" - "centroid" - "sample" - "patch" - "buffer" - "shared" - "coherent" - "volatile" - "restrict" - "readonly" - "writeonly" - "precision" - "highp" - "mediump" - "lowp" -] @keyword - -;; Preprocessor directives -> brightred -"#define" @function.special -"#elif" @function.special -"#else" @function.special -"#endif" @function.special -"#if" @function.special -"#ifdef" @function.special -"#ifndef" @function.special -"#include" @function.special -(preproc_directive) @function.special - -;; Types -> yellow -(primitive_type) @type -(type_identifier) @type -(sized_type_specifier) @type - -;; Comments -> brown -(comment) @comment - -;; Strings -> green, chars -> brightgreen -(string_literal) @string -(char_literal) @string.special - -;; Built-in functions -> brightmagenta (MC uses brightmagenta for builtins) -(call_expression - function: (identifier) @function.macro - (#any-of? @function.macro - "length" "normalize" "dot" "cross" "reflect" "refract" - "mix" "clamp" "smoothstep" "step" "min" "max" - "abs" "sign" "floor" "ceil" "fract" "mod" - "pow" "exp" "log" "sqrt" "inversesqrt" - "sin" "cos" "tan" "asin" "acos" "atan" - "radians" "degrees" - "texture" "texture2D" "texture3D" "textureCube" - "texelFetch" "textureSize" - "dFdx" "dFdy" "fwidth" - "transpose" "inverse" "determinant" "distance" - "lessThan" "greaterThan" "equal" "notEqual" - "any" "all" "not")) - -;; Operators -> white (MC uses white for arithmetic/comparison operators) -[ - "=" - "+=" - "-=" - "*=" - "/=" - "%=" - "==" - "!=" - "<" - ">" - "<=" - ">=" - "&&" - "||" - "!" - "&" - "|" - "^" - "~" - "+" - "-" - "*" - "/" - "%" - "++" - "--" -] @keyword.other - -;; Brackets -> brightcyan -[ - "{" - "}" - "(" - ")" - "[" - "]" -] @delimiter - -;; Delimiters -> brightcyan for comma/dot, brightmagenta for semicolons -[ - "," -] @delimiter - -";" @delimiter.special diff --git a/misc/syntax-ts/queries/go-highlights.scm b/misc/syntax-ts/queries/go-highlights.scm deleted file mode 100644 index 7e578fca09..0000000000 --- a/misc/syntax-ts/queries/go-highlights.scm +++ /dev/null @@ -1,110 +0,0 @@ -;; Tree-sitter highlight queries for Go -;; Colors aligned with MC's default go.syntax - -;; Keywords -> yellow -[ - "break" - "case" - "chan" - "const" - "continue" - "default" - "defer" - "else" - "fallthrough" - "for" - "func" - "go" - "goto" - "if" - "import" - "interface" - "map" - "package" - "range" - "return" - "select" - "struct" - "switch" - "type" - "var" -] @keyword - -(comment) @comment - -(interpreted_string_literal) @string -(raw_string_literal) @string -(rune_literal) @constant - -;; nil/true/false/iota -> brown (function.builtin) in MC go.syntax -[ - (true) - (false) - (nil) - (iota) -] @function.builtin - -(label_name) @label - -;; Channel operator <- -> brightmagenta -"<-" @delimiter.special - -;; Operators -> brightcyan -[ - "=" - ":=" - "+=" - "-=" - "*=" - "/=" - "%=" - "&=" - "|=" - "^=" - "<<=" - ">>=" - "==" - "!=" - "<" - ">" - "<=" - ">=" - "&&" - "||" - "!" - "+" - "-" - "*" - "/" - "%" - "&" - "|" - "^" - "<<" - ">>" -] @operator - -;; Builtin types -> brightgreen -((type_identifier) @type.builtin - (#match? @type.builtin "^(int|int8|int16|int32|int64|uint|uint8|uint16|uint32|uint64|uintptr|float32|float64|complex64|complex128|byte|rune|string|bool|error)$")) - -;; Builtin functions -> brown -((identifier) @function.builtin - (#match? @function.builtin "^(append|cap|close|complex|copy|delete|imag|len|make|new|panic|print|println|real|recover)$")) - -;; Brackets/parens -> brightcyan -[ - "(" - ")" - "[" - "]" - "{" - "}" -] @delimiter - -;; Delimiters -> brightcyan -[ - "." - ";" - "," -] @delimiter diff --git a/misc/syntax-ts/queries/gotmpl-highlights.scm b/misc/syntax-ts/queries/gotmpl-highlights.scm deleted file mode 100644 index f8383d948d..0000000000 --- a/misc/syntax-ts/queries/gotmpl-highlights.scm +++ /dev/null @@ -1,145 +0,0 @@ -;; Tree-sitter highlight queries for Go Template language -;; Colors chosen to match Go conventions where applicable - -;; Keywords -> yellow (keyword) -[ - "if" - "else" - "range" - "with" - "end" - "template" - "define" - "block" - "break" - "continue" -] @keyword - -;; Comments -> brown (comment) -(comment) @comment - -;; Builtin functions (Go template + Sprig) -> brown (function.builtin) -;; Must be before @function: for equal ranges, first match wins -(function_call - function: (identifier) @function.builtin - (#any-of? @function.builtin - ;; Go template builtins - "and" "call" "eq" "ge" "gt" "html" "index" "js" "le" "len" "lt" - "ne" "not" "or" "print" "printf" "println" "urlquery" - ;; Sprig: strings - "abbrev" "abbrevboth" "camelcase" "cat" "contains" "hasPrefix" - "hasSuffix" "indent" "initials" "kebabcase" "lower" "nindent" - "nospace" "plural" "quote" "repeat" "replace" "shuffle" - "snakecase" "squote" "substr" "swapcase" "title" "trim" "trimAll" - "trimPrefix" "trimSuffix" "trimall" "trunc" "untitle" "upper" - "wrap" "wrapWith" - ;; Sprig: strings conversion - "atoi" "float64" "int" "int64" "toString" "toStrings" "toDecimal" - ;; Sprig: defaults - "coalesce" "compact" "default" "empty" "fail" "fromJson" - "mustFromJson" "mustToDate" "mustToJson" "mustToPrettyJson" - "mustToRawJson" "ternary" "toDate" "toJson" "toPrettyJson" - "toRawJson" - ;; Sprig: lists - "append" "chunk" "concat" "first" "has" "initial" "last" "list" - "mustAppend" "mustChunk" "mustCompact" "mustFirst" "mustHas" - "mustInitial" "mustLast" "mustPrepend" "mustPush" "mustRest" - "mustReverse" "mustSlice" "mustUniq" "mustWithout" "prepend" - "push" "rest" "reverse" "seq" "slice" "sortAlpha" "uniq" - "until" "untilStep" "without" - ;; Sprig: dicts - "deepCopy" "deepEqual" "dict" "dig" "get" "hasKey" "keys" - "merge" "mergeOverwrite" "mustDeepCopy" "mustMerge" - "mustMergeOverwrite" "omit" "pick" "pluck" "set" "unset" "values" - ;; Sprig: math - "add" "add1" "add1f" "addf" "biggest" "ceil" "div" "divf" - "floor" "max" "maxf" "min" "minf" "mod" "mul" "mulf" "round" - "sub" "subf" - ;; Sprig: dates - "ago" "date" "dateInZone" "dateModify" "date_in_zone" - "date_modify" "duration" "durationRound" "htmlDate" - "htmlDateInZone" "must_date_modify" "mustDateModify" "now" - "unixEpoch" - ;; Sprig: crypto - "adler32sum" "bcrypt" "buildCustomCert" "decryptAES" - "derivePassword" "encryptAES" "genCA" "genCAWithKey" - "genPrivateKey" "genSelfSignedCert" "genSelfSignedCertWithKey" - "genSignedCert" "genSignedCertWithKey" "htpasswd" "sha1sum" - "sha256sum" "sha512sum" - ;; Sprig: encoding - "b32dec" "b32enc" "b64dec" "b64enc" - ;; Sprig: paths - "base" "clean" "dir" "ext" "isAbs" "osBase" "osClean" "osDir" - "osExt" "osIsAbs" - ;; Sprig: regex - "regexFind" "regexFindAll" "regexMatch" "regexQuoteMeta" - "regexReplaceAll" "regexReplaceAllLiteral" "regexSplit" - "mustRegexFind" "mustRegexFindAll" "mustRegexMatch" - "mustRegexReplaceAll" "mustRegexReplaceAllLiteral" "mustRegexSplit" - ;; Sprig: reflection - "kindIs" "kindOf" "typeIs" "typeIsLike" "typeOf" - ;; Sprig: misc - "all" "any" "env" "expandenv" "getHostByName" "hello" "join" - "randAlpha" "randAlphaNum" "randAscii" "randBytes" "randInt" - "randNumeric" "semver" "semverCompare" "split" "splitList" - "splitn" "tuple" "urlJoin" "urlParse" "uuidv4" - ;; Helm-specific functions - "fromJsonArray" "fromYaml" "fromYamlArray" "include" "lookup" - "mustToToml" "required" "toToml" "toYaml" "toYamlPretty" "tpl")) - -;; Function calls -> brightcyan (function) -(function_call - function: (identifier) @function) - -;; Method calls -> brightcyan (function) -(method_call - method: (selector_expression - field: (field_identifier) @function)) - -;; Fields (.Values, .Release, .Name) -> white (property) -[ - (field) - (field_identifier) -] @property - -;; Variables ($name, $pool) -> brightred (variable.builtin) -(variable) @variable.builtin - -;; Strings -> green (string) -[ - (interpreted_string_literal) - (raw_string_literal) - (rune_literal) -] @string - -;; Escape sequences -> brightgreen (string.special) -(escape_sequence) @string.special - -;; Numbers -> lightgray (number) -[ - (int_literal) - (float_literal) - (imaginary_literal) -] @number - -;; Constants -> brightmagenta (constant.builtin) -[ - (true) - (false) - (nil) -] @constant.builtin - -;; Operators -> brightcyan (operator) -"|" @operator -":=" @operator -"=" @operator - -;; Delimiters -> brightcyan (delimiter) -"." @delimiter -"," @delimiter -"{{" @delimiter -"}}" @delimiter -"{{-" @delimiter -"-}}" @delimiter -"(" @delimiter -")" @delimiter diff --git a/misc/syntax-ts/queries/haskell-highlights.scm b/misc/syntax-ts/queries/haskell-highlights.scm deleted file mode 100644 index efeb479f92..0000000000 --- a/misc/syntax-ts/queries/haskell-highlights.scm +++ /dev/null @@ -1,79 +0,0 @@ -;; Tree-sitter highlight queries for Haskell -;; Colors aligned with MC's default haskell.syntax - -;; Keywords -> yellow -[ - "module" - "where" - "import" - "qualified" - "as" - "hiding" - "data" - "newtype" - "type" - "class" - "instance" - "deriving" - "do" - "let" - "in" - "case" - "of" - "if" - "then" - "else" - "infixl" - "infixr" - "infix" - "forall" -] @keyword - -;; Comments -> brown -(comment) @comment -(haddock) @comment - -;; Pragmas -> green (MC: {-# ... #-} context is green) -(pragma) @comment.special - -;; Strings -> green -(string) @string - -;; Chars -> brightgreen -(char) @string.special - -;; Numbers -> brightgreen (MC uses brightgreen for digits) -(integer) @number.builtin -(float) @number.builtin - -;; Constructors / Type names -> white (MC uses white for uppercase identifiers) -(constructor) @keyword.other - -;; Function signatures -(signature - name: (variable) @function) - -;; Named operators -> white (MC uses white for &&, ||, |, ^, ~, and infix ops) -(operator) @keyword.other - -;; Symbol operators -> yellow for $, *, +, /, <, >, -, =; white for |, \, @, ~ -[ - "=" - "->" - "<-" -] @operator.word - -[ - "::" - "=>" - "|" - "\\" - "@" - "~" -] @keyword.other - -;; Delimiters -> brightcyan -[ - "," - ";" -] @delimiter diff --git a/misc/syntax-ts/queries/hcl-highlights.scm b/misc/syntax-ts/queries/hcl-highlights.scm deleted file mode 100644 index b269246d56..0000000000 --- a/misc/syntax-ts/queries/hcl-highlights.scm +++ /dev/null @@ -1,90 +0,0 @@ -;; Tree-sitter highlight queries for generic HCL files -;; For Terraform-specific highlighting (.tf/.tfvars), see terraform-highlights.scm -;; HCL is a generic language where block names are arbitrary identifiers. - -[ - "if" - "else" - "endif" - "for" - "endfor" - "in" -] @keyword - -[ - (ellipsis) - "?" - "=>" -] @operator - -[ - "!" - "*" - "/" - "%" - "+" - "-" - ">" - ">=" - "<" - "<=" - "==" - "!=" - "&&" - "||" -] @operator - -[ - "." - ".*" - "," - "[*]" -] @delimiter - -[ - "{" - "}" - "[" - "]" - "(" - ")" -] @delimiter - -[ - ":" - "=" -] @operator - -((identifier) @type - (#match? @type "^(bool|string|number|object|tuple|list|map|set|any)$")) - -;; Top-level block names -> brightmagenta (keyword.directive) -(config_file - (body - (block - (identifier) @keyword.directive))) - - -(comment) @comment -(null_lit) @constant -(numeric_lit) @number -(bool_lit) @constant - -[ - (template_interpolation_start) - (template_interpolation_end) - (template_directive_start) - (template_directive_end) - (strip_marker) -] @operator - -[ - (heredoc_identifier) - (heredoc_start) -] @string - -[ - (quoted_template_start) - (quoted_template_end) - (template_literal) -] @string diff --git a/misc/syntax-ts/queries/html-highlights.scm b/misc/syntax-ts/queries/html-highlights.scm deleted file mode 100644 index 42e76f0694..0000000000 --- a/misc/syntax-ts/queries/html-highlights.scm +++ /dev/null @@ -1,35 +0,0 @@ -;; Tree-sitter highlight queries for HTML -;; Colors aligned with MC's default html.syntax - -;; Tags -> brightcyan -(tag_name) @tag - -;; Attributes -> yellow -(attribute_name) @property.key - -;; Attribute values -> cyan (MC uses cyan for quoted values) -(attribute_value) @label -(quoted_attribute_value) @label - -;; Entities like & -> brightgreen -(entity) @string.special - -;; Comments -> brown -(comment) @comment - -;; -> brightred -(doctype) @function.special -(erroneous_end_tag_name) @tag - -;; = -> brightred -[ - "=" -] @variable.builtin - -;; Angle brackets -> brightcyan -[ - "<" - ">" - "" -] @delimiter diff --git a/misc/syntax-ts/queries/idl-highlights.scm b/misc/syntax-ts/queries/idl-highlights.scm deleted file mode 100644 index 2cb1fbcf1a..0000000000 --- a/misc/syntax-ts/queries/idl-highlights.scm +++ /dev/null @@ -1,139 +0,0 @@ -;; Tree-sitter highlight queries for IDL (Interface Definition Language) -;; Colors aligned with MC's idl.syntax -;; MC: keywords=yellow, operators=yellow, comments=brown, strings=green - -;; Keywords -> yellow (@keyword) -[ - "module" - "interface" - "struct" - "union" - "enum" - "typedef" - "const" - "exception" - "valuetype" - "eventtype" - "component" - "home" - "factory" - "finder" - "native" - "local" - "abstract" - "custom" - "truncatable" - "supports" - "public" - "private" - "switch" - "case" - "default" - "void" - "in" - "out" - "inout" - "raises" - "readonly" - "attribute" - "oneway" - "context" - "import" - "provides" - "uses" - "emits" - "publishes" - "consumes" - "manages" - "primarykey" - "porttype" - "port" - "mirrorport" - "connector" - "multiple" - "typeid" - "typeprefix" -] @keyword - -;; Type keywords -> yellow (@keyword) -[ - "short" - "long" - "unsigned" - "char" - "octet" - "wchar" - "float" - "double" - "string" - "wstring" - "any" - "sequence" - "map" - "fixed" - "native" - "Object" - "ValueBase" -] @keyword - -;; Boolean type -> yellow (@keyword) -(boolean_type) @keyword - -;; Boolean literals -> yellow (@keyword) -[ - "TRUE" - "FALSE" -] @keyword - -;; Operators -> yellow (@operator.word) -[ - "+" - "-" - "*" - "/" - "%" - "=" - "::" - "<<" - ">>" - "~" -] @operator.word - -;; Preprocessor -> brightmagenta (@keyword.control) -(preproc_define) @keyword.control -(preproc_call) @keyword.control -(preproc_include) @keyword.control - -;; Comments -> brown -(comment) @comment - -;; Strings -> green -(string_literal) @string -(system_lib_string) @string - -;; Char literals -> brightgreen (@string.special) -(char_literal) @string.special - -;; Escape sequences -> brightgreen -(escape_sequence) @string.special - -;; Brackets -> brightcyan (@delimiter) -[ - "{" - "}" - "(" - ")" - "[" - "]" -] @delimiter - -;; Punctuation -> brightcyan (@delimiter) -[ - "," - ":" - "<" - ">" -] @delimiter - -;; Semicolons -> brightmagenta (@delimiter.special) -";" @delimiter.special diff --git a/misc/syntax-ts/queries/ini-highlights.scm b/misc/syntax-ts/queries/ini-highlights.scm deleted file mode 100644 index d08647ba07..0000000000 --- a/misc/syntax-ts/queries/ini-highlights.scm +++ /dev/null @@ -1,25 +0,0 @@ -;; Tree-sitter highlight queries for INI/Config files -;; Colors aligned with MC's default ini.syntax - -;; Section names [] -> yellow (keyword) -(section_name) @keyword - -;; Keys -> cyan (label, MC default context is cyan for keys) -(setting_name) @label - -;; Values -> brightcyan (delimiter, MC uses brightcyan for values after =) -(setting_value) @delimiter - -;; Comments -> brown (comment) -(comment) @comment - -;; = -> brightred (variable.builtin) -[ - "=" -] @variable.builtin - -;; Section brackets -> yellow (keyword) -[ - "[" - "]" -] @keyword diff --git a/misc/syntax-ts/queries/java-highlights.scm b/misc/syntax-ts/queries/java-highlights.scm deleted file mode 100644 index 1c7e05b6ca..0000000000 --- a/misc/syntax-ts/queries/java-highlights.scm +++ /dev/null @@ -1,141 +0,0 @@ -;; Tree-sitter highlight queries for Java -;; Colors aligned with MC's default java.syntax - -;; Keywords -> yellow -[ - "abstract" - "assert" - "break" - "case" - "catch" - "class" - "continue" - "default" - "do" - "else" - "enum" - "extends" - "final" - "finally" - "for" - "if" - "implements" - "import" - "instanceof" - "interface" - "native" - "new" - "package" - "private" - "protected" - "public" - "return" - "static" - "strictfp" - "switch" - "synchronized" - "throw" - "throws" - "transient" - "try" - "volatile" - "while" - "yield" - "record" - "sealed" - "permits" - "non-sealed" -] @keyword - -(this) @keyword -(super) @keyword - -;; Primitive types -> yellow (keyword) -[ - "byte" - "char" - "double" - "float" - "int" - "long" - "short" -] @keyword - -(boolean_type) @keyword -(void_type) @keyword - -;; true/false/null -> yellow (keyword) in MC's java.syntax -[ - (true) - (false) - (null_literal) -] @keyword - -(line_comment) @comment -(block_comment) @comment - -(string_literal) @string -(character_literal) @string.special - -;; Annotations -> brightred -(marker_annotation - name: (identifier) @function.special) -(annotation - name: (identifier) @function.special) - -(labeled_statement - (identifier) @label) - -;; Operators -> yellow -[ - "!" - "!=" - "%" - "&" - "&&" - "*" - "+" - "++" - "+=" - "-" - "--" - "-=" - "/" - "<" - "<<" - "<=" - "=" - "==" - ">" - ">=" - ">>" - ">>>" - "->" - "*=" - "/=" - "%=" - "|" - "||" - "^" - "~" -] @operator.word - -;; Semicolons -> brightmagenta -";" @delimiter.special - -;; Brackets/parens -> brightcyan -[ - "," - "(" - ")" - "[" - "]" - "{" - "}" -] @delimiter - -;; Ternary/colon -> brightcyan -[ - "." - ":" -] @delimiter diff --git a/misc/syntax-ts/queries/javascript-highlights.scm b/misc/syntax-ts/queries/javascript-highlights.scm deleted file mode 100644 index 16871d98f0..0000000000 --- a/misc/syntax-ts/queries/javascript-highlights.scm +++ /dev/null @@ -1,133 +0,0 @@ -;; Tree-sitter highlight queries for JavaScript -;; Colors aligned with MC's default js.syntax - -;; Keywords -> yellow -[ - "async" - "await" - "break" - "case" - "catch" - "class" - "const" - "continue" - "debugger" - "default" - "delete" - "do" - "else" - "export" - "extends" - "finally" - "for" - "function" - "if" - "import" - "in" - "instanceof" - "let" - "new" - "of" - "return" - "static" - "switch" - "throw" - "try" - "typeof" - "var" - "void" - "while" - "with" - "yield" - "from" - "as" - "get" - "set" -] @keyword - -(this) @keyword -(super) @keyword - -;; undefined -> yellow (keyword) -(undefined) @keyword - -(comment) @comment - -(string) @string -(template_string) @string -(regex) @string.special - -;; Numbers -> brightgreen in MC js.syntax -(number) @number.builtin - -;; true/false/null -> brightgreen in MC js.syntax -[ - (true) - (false) - (null) -] @number.builtin - -;; Operators -> yellow -[ - "!" - "!=" - "!==" - "%" - "&" - "&&" - "*" - "**" - "+" - "++" - "+=" - "-" - "--" - "-=" - "." - "/" - "<" - "<<" - "<=" - "=" - "==" - "===" - ">" - ">=" - ">>" - ">>>" - "^" - "|" - "||" - "~" - "*=" - "/=" - "%=" - "**=" - "&&=" - "||=" -] @operator.word - -;; Arrow -> brightcyan -[ - "=>" - "..." -] @operator - -;; Semicolons -> brightmagenta -";" @delimiter.special - -;; Brackets/parens -> brightcyan -[ - "," - "(" - ")" - "[" - "]" - "{" - "}" -] @delimiter - -;; Colon -> brightcyan -":" @delimiter - -(statement_identifier) @label diff --git a/misc/syntax-ts/queries/json-highlights.scm b/misc/syntax-ts/queries/json-highlights.scm deleted file mode 100644 index 8d749c0d43..0000000000 --- a/misc/syntax-ts/queries/json-highlights.scm +++ /dev/null @@ -1,39 +0,0 @@ -;; Tree-sitter highlight queries for JSON -;; Colors aligned with MC's default json.syntax - -;; Keys (first string in pair) -> green (string) -(pair - key: (string) @string) - -;; Value strings -> green (string) -(pair - value: (string) @string) - -;; Top-level / array strings -> green (string) -(array (string) @string) - -;; Escape sequences -> brightgreen (string.special) -(escape_sequence) @string.special - -;; Numbers -> brightgreen (string.special, MC uses brightgreen) -(number) @string.special - -;; Constants -> brightgreen (string.special, MC uses brightgreen) -[ - (true) - (false) - (null) -] @string.special - -;; Comments (JSON5 / jsonc) -(comment) @comment - -;; Delimiters -> brightcyan -[ - "," - ":" - "{" - "}" - "[" - "]" -] @delimiter diff --git a/misc/syntax-ts/queries/kotlin-highlights.scm b/misc/syntax-ts/queries/kotlin-highlights.scm deleted file mode 100644 index 0ffe6573f9..0000000000 --- a/misc/syntax-ts/queries/kotlin-highlights.scm +++ /dev/null @@ -1,134 +0,0 @@ -;; Tree-sitter highlight queries for Kotlin -;; Colors aligned with MC's default kotlin.syntax - -;; Hard keywords -> yellow (keyword) -[ - "as" - "class" - "do" - "else" - "for" - "fun" - "if" - "in" - "interface" - "is" - "object" - "return" - "super" - "this" - "throw" - "try" - "typealias" - "val" - "var" - "when" - "while" -] @keyword - -;; package/import -> brown (function.builtin) -[ - "package" - "import" -] @function.builtin - -;; Soft keywords -> brightgreen (type.builtin) -[ - "by" - "catch" - "constructor" - "finally" - "get" - "init" - "set" - "where" -] @type.builtin - -;; Modifier keywords -> brightmagenta (keyword.control) -[ - "abstract" - "annotation" - "companion" - "const" - "crossinline" - "data" - "enum" - "expect" - "external" - "final" - "infix" - "inline" - "inner" - "internal" - "lateinit" - "noinline" - "open" - "operator" - "out" - "override" - "private" - "protected" - "public" - "sealed" - "suspend" - "tailrec" - "vararg" -] @keyword.control - -;; Comments -> brown (comment) -(line_comment) @comment -(block_comment) @comment - -(string_literal) @string -(multiline_string_literal) @string -(character_literal) @string.special - -;; Annotations -> brightcyan (delimiter) -(annotation) @delimiter - -;; Built-in types -> brightred (variable.builtin) -;; MC colors Double/Float/Long/Int/Short/Byte/Char/Boolean/Array/String as brightred -((identifier) @variable.builtin - (#match? @variable.builtin "^(Double|Float|Long|Int|Short|Byte|Char|Boolean|Array|String|ByteArray|ByteSequence)$")) - -(label) @label - -;; Operators -> brightcyan (operator) -[ - "=" - "+=" - "-=" - "*=" - "/=" - "%=" - "==" - "!=" - "<" - ">" - "<=" - ">=" - "&&" - "||" - "!" - "+" - "-" - "*" - "/" - "%" - "++" - "--" - "->" - "?:" - ".." - "::" - "!!" - "as?" -] @operator - -[ - "." - "," - ":" -] @delimiter - -";" @delimiter \ No newline at end of file diff --git a/misc/syntax-ts/queries/latex-highlights.scm b/misc/syntax-ts/queries/latex-highlights.scm deleted file mode 100644 index cd8e50a4b2..0000000000 --- a/misc/syntax-ts/queries/latex-highlights.scm +++ /dev/null @@ -1,401 +0,0 @@ -; General syntax -(command_name) @function @nospell - -(caption - command: _ @function) - -; Turn spelling on for text -(text) @spell - -; \text, \intertext, \shortintertext, ... -(text_mode - command: _ @function @nospell - content: (curly_group - (_) @none @spell)) - -; Variables, parameters -(placeholder) @variable - -(key_value_pair - key: (_) @variable.parameter @nospell - value: (_)) - -(curly_group_spec - (text) @variable.parameter) - -(curly_group_value - (value_literal) @constant) - -(brack_group_argc) @variable.parameter - -[ - (operator) - "=" - "_" - "^" -] @operator - -"\\item" @punctuation.special - -(delimiter) @punctuation.delimiter - -(math_delimiter - left_command: _ @punctuation.delimiter - left_delimiter: _ @punctuation.delimiter - right_command: _ @punctuation.delimiter - right_delimiter: _ @punctuation.delimiter) - -[ - "[" - "]" - "{" - "}" -] @punctuation.bracket ; "(" ")" has no syntactical meaning in LaTeX - -; General environments -(begin - command: _ @module - name: (curly_group_text - (text) @label @nospell)) - -(end - command: _ @module - name: (curly_group_text - (text) @label @nospell)) - -; Definitions and references -(new_command_definition - command: _ @function.macro @nospell) - -(old_command_definition - command: _ @function.macro @nospell) - -(let_command_definition - command: _ @function.macro @nospell) - -(environment_definition - command: _ @function.macro @nospell - name: (curly_group_text - (_) @label @nospell)) - -(theorem_definition - command: _ @function.macro @nospell - name: (curly_group_text_list - (_) @label @nospell)) - -(paired_delimiter_definition - command: _ @function.macro @nospell - declaration: (curly_group_command_name - (_) @function)) - -(counter_declaration - command: _ @function.macro @nospell - counter: (curly_group_word - (word) @variable) - supercounter: (brack_group_word - (word) @variable)?) - -(counter_within_declaration - command: _ @function.macro @nospell - counter: (curly_group_word - (word) @variable) - supercounter: (curly_group_word - (word) @variable)) - -(counter_without_declaration - command: _ @function.macro @nospell - counter: (curly_group_word - (word) @variable) - supercounter: (curly_group_word - (word) @variable)) - -(counter_value - command: _ @function.macro @nospell - counter: (curly_group_word - (word) @variable)) - -; The 'value' fields for the two following highlights -; are handled by counter_value and curly_group_value. -(counter_definition - command: _ @function.macro @nospell - counter: (curly_group_word - (word) @variable)) - -(counter_addition - command: _ @function.macro @nospell - counter: (curly_group_word - (word) @variable)) - -(counter_increment - command: _ @function.macro @nospell - counter: (curly_group_word - (word) @variable)) - -(counter_typesetting - command: _ @function.macro @nospell - counter: (curly_group_word - (word) @variable)) - -(label_definition - command: _ @function.macro - name: (curly_group_label - (_) @markup.link @nospell)) - -(label_reference_range - command: _ @function.macro - from: (curly_group_label - (_) @markup.link) - to: (curly_group_label - (_) @markup.link)) - -(label_reference - command: _ @function.macro - names: (curly_group_label_list - (_) @markup.link)) - -(label_number - command: _ @function.macro - name: (curly_group_label - (_) @markup.link) - number: (_) @markup.link) - -(citation - command: _ @function.macro @nospell - keys: (curly_group_text_list) @markup.link @nospell) - -((hyperlink - command: _ @function @nospell - uri: (curly_group_uri - (_) @markup.link.url @nospell)) @_hyperlink - (#set! @_hyperlink url @markup.link.url)) - -(glossary_entry_definition - command: _ @function.macro @nospell - name: (curly_group_text - (_) @markup.link @nospell)) - -(glossary_entry_reference - command: _ @function.macro - name: (curly_group_text - (_) @markup.link)) - -(acronym_definition - command: _ @function.macro @nospell - name: (curly_group_text - (_) @markup.link @nospell)) - -(acronym_reference - command: _ @function.macro - name: (curly_group_text - (_) @markup.link)) - -(color_definition - command: _ @function.macro - name: (curly_group_text - (_) @markup.link)) - -(color_reference - command: _ @function.macro - name: (curly_group_text - (_) @markup.link)?) - -; Sectioning -(title_declaration - command: _ @module - options: (brack_group - (_) @markup.heading.1)? - text: (curly_group - (_) @markup.heading.1)) - -(author_declaration - command: _ @module - authors: (curly_group_author_list - (author)+ @markup.heading.1)) - -(chapter - command: _ @module - toc: (brack_group - (_) @markup.heading.2)? - text: (curly_group - (_) @markup.heading.2)) - -(part - command: _ @module - toc: (brack_group - (_) @markup.heading.2)? - text: (curly_group - (_) @markup.heading.2)) - -(section - command: _ @module - toc: (brack_group - (_) @markup.heading.3)? - text: (curly_group - (_) @markup.heading.3)) - -(subsection - command: _ @module - toc: (brack_group - (_) @markup.heading.4)? - text: (curly_group - (_) @markup.heading.4)) - -(subsubsection - command: _ @module - toc: (brack_group - (_) @markup.heading.5)? - text: (curly_group - (_) @markup.heading.5)) - -(paragraph - command: _ @module - toc: (brack_group - (_) @markup.heading.6)? - text: (curly_group - (_) @markup.heading.6)) - -(subparagraph - command: _ @module - toc: (brack_group - (_) @markup.heading.6)? - text: (curly_group - (_) @markup.heading.6)) - -; Beamer frames -(generic_environment - (begin - name: (curly_group_text - (text) @label) - (#eq? @label "frame")) - . - (curly_group - (_) @markup.heading)) - -((generic_command - command: (command_name) @_name - arg: (curly_group - (_) @markup.heading)) - (#eq? @_name "\\frametitle")) - -((generic_command - command: (command_name) @_name - arg: (curly_group - (_) @markup.italic)) - (#any-of? @_name "\\emph" "\\textit" "\\mathit")) - -((generic_command - command: (command_name) @_name - arg: (curly_group - (_) @markup.strong)) - (#any-of? @_name "\\textbf" "\\mathbf")) - -(generic_command - (command_name) @keyword.conditional - (#match? @keyword.conditional "^\\if[a-zA-Z@]+$")) - -(generic_command - (command_name) @keyword.conditional - (#any-of? @keyword.conditional "\\fi" "\\else")) - -; File inclusion commands -(class_include - command: _ @keyword.import - path: (curly_group_path) @string) - -(package_include - command: _ @keyword.import - paths: (curly_group_path_list) @string) - -(latex_include - command: _ @keyword.import - path: (curly_group_path) @string.special.path) - -(verbatim_include - command: _ @keyword.import - path: (curly_group_path) @string.special.path) - -(import_include - command: _ @keyword.import - directory: (curly_group_path) @string.special.path - file: (curly_group_path) @string.special.path) - -(bibstyle_include - command: _ @keyword.import - path: (curly_group_path) @string) - -(bibtex_include - command: _ @keyword.import - paths: (curly_group_path_list) @string.special.path) - -(biblatex_include - "\\addbibresource" @keyword.import - glob: (curly_group_glob_pattern) @string.regexp) - -(graphics_include - command: _ @keyword.import - path: (curly_group_path) @string.special.path) - -(svg_include - command: _ @keyword.import - path: (curly_group_path) @string.special.path) - -(inkscape_include - command: _ @keyword.import - path: (curly_group_path) @string.special.path) - -(tikz_library_import - command: _ @keyword.import - paths: (curly_group_path_list) @string) - -; Turn spelling off for whole nodes -[ - (counter_declaration) - (counter_within_declaration) - (counter_without_declaration) - (counter_value) - (counter_definition) - (counter_addition) - (counter_increment) - (counter_typesetting) - (label_reference) - (label_reference_range) - (label_number) - (glossary_entry_reference) - (acronym_reference) - (color_definition) - (color_reference) - (class_include) - (package_include) - (latex_include) - (verbatim_include) - (import_include) - (bibstyle_include) - (bibtex_include) - (biblatex_include) - (graphics_include) - (svg_include) - (inkscape_include) - (tikz_library_import) -] @nospell - -; Math -[ - (displayed_equation) - (inline_formula) -] @markup.math @nospell - -(math_environment - (_) @markup.math) - -; Comments -[ - (line_comment) - (block_comment) - (comment_environment) -] @comment @spell - -((line_comment) @keyword.directive @nospell - (#match? @keyword.directive "^%% !TeX")) - -((line_comment) @keyword.directive @nospell - (#match? @keyword.directive "^%%&")) diff --git a/misc/syntax-ts/queries/lua-highlights.scm b/misc/syntax-ts/queries/lua-highlights.scm deleted file mode 100644 index d1bfa24248..0000000000 --- a/misc/syntax-ts/queries/lua-highlights.scm +++ /dev/null @@ -1,99 +0,0 @@ -;; Tree-sitter highlight queries for Lua language -;; Colors aligned with MC's default lua.syntax (keywords=white, operators=white) - -;; Keywords -> white -[ - "and" - "do" - "else" - "elseif" - "end" - "for" - "function" - "goto" - "if" - "in" - "local" - "not" - "or" - "repeat" - "return" - "then" - "until" - "while" -] @keyword.other - -(break_statement) @keyword.other - -;; Constants -> white -[ - (false) - (nil) - (true) -] @keyword.other - -;; Operators -> white -[ - "=" - "~=" - "==" - "<=" - ">=" - "<" - ">" - "+" - "-" - "*" - "/" - "//" - "%" - "^" - "#" - ".." -] @keyword.other - -;; Brackets/parens/braces -> white -[ - "(" - ")" - "{" - "}" - "[" - "]" -] @keyword.other - -;; Delimiters -> white -[ - "." - "," - ";" - ":" - "::" -] @keyword.other - -(string) @string - -(comment) @comment - -;; _VERSION, _G -> brightmagenta -((identifier) @constant.builtin - (#match? @constant.builtin "^_(VERSION|G)$")) - -;; Library functions -> yellow -(function_call - name: (identifier) @keyword) - -(function_call - name: (dot_index_expression - field: (identifier) @keyword)) - -(function_call - name: (method_index_expression - method: (identifier) @keyword)) - -(function_declaration - name: (identifier) @keyword) - -(label_statement (identifier) @label) - -(goto_statement (identifier) @label) diff --git a/misc/syntax-ts/queries/mail-highlights.scm b/misc/syntax-ts/queries/mail-highlights.scm deleted file mode 100644 index 86ca4abea7..0000000000 --- a/misc/syntax-ts/queries/mail-highlights.scm +++ /dev/null @@ -1,4 +0,0 @@ -(header_email) @type.builtin -(email) @string -(header_subject) @tag -(header_other) @markup.environment diff --git a/misc/syntax-ts/queries/make-highlights.scm b/misc/syntax-ts/queries/make-highlights.scm deleted file mode 100644 index 764858472a..0000000000 --- a/misc/syntax-ts/queries/make-highlights.scm +++ /dev/null @@ -1,84 +0,0 @@ -;; Tree-sitter highlight queries for Makefile language -;; Colors aligned with MC's default makefile.syntax - -;; Directives -> magenta (keyword.directive) -[ - "include" - "sinclude" - "-include" - "define" - "endef" - "ifdef" - "ifndef" - "ifeq" - "ifneq" - "else" - "endif" - "override" - "export" - "unexport" - "undefine" - "private" - "vpath" -] @keyword.directive - -;; Comments -> brown (comment) -(comment) @comment - -;; Variable assignment name -> yellow (property.key) -(variable_assignment - name: (word) @property.key) -(shell_assignment - name: (word) @property.key) - -;; Variable references $() ${} -> keyword (yellow) -(variable_reference - (word) @keyword) - -;; Automatic variables ($@, $<, $^, $*, $?, $%) -> brightred (function.special) -(automatic_variable) @function.special - -;; Escaped dollar $$ -> brightcyan (delimiter) -(escape) @delimiter - -;; Line continuation \ -> yellow (keyword) -"\\" @keyword - -;; Special targets -> white (tag.special) -(rule - (targets - (word) @tag.special - (#any-of? @tag.special - ".PHONY" ".SUFFIXES" ".DEFAULT" ".PRECIOUS" - ".INTERMEDIATE" ".SECONDARY" ".DELETE_ON_ERROR" - ".IGNORE" ".LOW_RESOLUTION_TIME" ".SILENT" - ".EXPORT_ALL_VARIABLES" ".NOTPARALLEL" ".NOEXPORT"))) - -;; Strings -> green (string) -(string) @string - -;; Assignment operators = := etc -> white (tag.special) -[ - "=" - ":=" - "::=" - "?=" - "+=" - "!=" -] @tag.special - -;; : -> yellow (keyword) -[ - ":" - "::" -] @keyword - -;; Semicolons, commas -> brightcyan (delimiter) -[ - ";" - "," -] @delimiter - -;; Function calls -> brightcyan (label) -(function_call - function: (_) @label) diff --git a/misc/syntax-ts/queries/markdown-highlights.scm b/misc/syntax-ts/queries/markdown-highlights.scm deleted file mode 100644 index 41c0cf2cd2..0000000000 --- a/misc/syntax-ts/queries/markdown-highlights.scm +++ /dev/null @@ -1,67 +0,0 @@ -;; Tree-sitter highlight queries for Markdown language -;; Block-level parser only (inline nodes are in a separate parser) -;; Colors aligned with MC's default markdown.syntax: -;; headings = brightred, heading #### = red (we use brightred for all) -;; blockquote > = green -;; code blocks = cyan -;; links = yellow - -;; Heading markers (#, ##, etc.) -> brightred (function.special) -(atx_heading - (atx_h1_marker) @function.special) - -(atx_heading - (atx_h2_marker) @function.special) - -(atx_heading - (atx_h3_marker) @function.special) - -(atx_heading - (atx_h4_marker) @function.special) - -(atx_heading - (atx_h5_marker) @function.special) - -(atx_heading - (atx_h6_marker) @function.special) - -;; Heading content -> brightred (function.special) -(atx_heading - heading_content: (_) @function.special) - -(setext_heading - heading_content: (_) @function.special) - -(setext_h1_underline) @function.special -(setext_h2_underline) @function.special - -;; Blockquote marker -> green (string) -(block_quote - (block_quote_marker) @string) - -;; Code block delimiters and info string -> label (cyan) -;; Content is left uncolored so injected language highlights show correctly. -(fenced_code_block - (fenced_code_block_delimiter) @label) -(fenced_code_block - (info_string) @label) - -(indented_code_block) @label - -;; Thematic breaks (---) -> delimiter -(thematic_break) @delimiter - -;; Links -> yellow (keyword) -(link_destination) @keyword -(link_label) @keyword -(link_title) @keyword - -;; List markers -> default -(list_marker_plus) @variable -(list_marker_minus) @variable -(list_marker_star) @variable -(list_marker_dot) @variable -(list_marker_parenthesis) @variable - -;; HTML blocks -> function.special (brightred, same as preprocessor) -(html_block) @function.special diff --git a/misc/syntax-ts/queries/markdown-injections.scm b/misc/syntax-ts/queries/markdown-injections.scm deleted file mode 100644 index 57f065c57a..0000000000 --- a/misc/syntax-ts/queries/markdown-injections.scm +++ /dev/null @@ -1,33 +0,0 @@ -; MC-specific markdown injections. -; Adds pipe_table_cell injection for markdown_inline (upstream only has inline). - -(fenced_code_block - (info_string - (language) @injection.language) - (code_fence_content) @injection.content) - -((html_block) @injection.content - (#set! injection.language "html")) - -(document - . - (section - . - (thematic_break) - (_) @injection.content - (thematic_break)) - (#set! injection.language "yaml")) - -((minus_metadata) @injection.content - (#set! injection.language "yaml")) - -((plus_metadata) @injection.content - (#set! injection.language "toml")) - -; Inline content (paragraphs, headings) -((inline) @injection.content - (#set! injection.language "markdown_inline")) - -; Table cells (not wrapped in inline nodes) -((pipe_table_cell) @injection.content - (#set! injection.language "markdown_inline")) diff --git a/misc/syntax-ts/queries/markdown_inline-highlights.scm b/misc/syntax-ts/queries/markdown_inline-highlights.scm deleted file mode 100644 index f3ecb5d93b..0000000000 --- a/misc/syntax-ts/queries/markdown_inline-highlights.scm +++ /dev/null @@ -1,48 +0,0 @@ -;; Tree-sitter highlight queries for Markdown inline parser -;; Used via language injection from the Markdown block parser. -;; Colors aligned with MC's default markdown.syntax: -;; bold = brightmagenta (markup.bold) -;; italic = magenta (markup.italic) -;; code spans = cyan (label) -;; links = yellow (keyword) -;; images = yellow (keyword) -;; HTML tags = brightred (function.special) -;; escape sequences = brightgreen (string.special) - -;; Emphasis (*italic* and _italic_) -> magenta (markup.italic) -(emphasis) @markup.italic - -;; Strong emphasis (**bold** and __bold__) -> brightmagenta (markup.bold) -(strong_emphasis) @markup.bold - -;; Strikethrough (~~text~~) -> brightcyan (delimiter) -(strikethrough) @delimiter - -;; Code spans (`code`) -> cyan (label) -(code_span) @label - -;; Links -> yellow (keyword) -(shortcut_link) @keyword -(full_reference_link) @keyword -(collapsed_reference_link) @keyword -(inline_link) @keyword - -;; Images -> yellow (keyword) -(image) @keyword - -;; Autolinks -> yellow (keyword) -(uri_autolink) @keyword -(email_autolink) @keyword - -;; Inline HTML tags -> brightred (function.special) -(html_tag) @function.special - -;; Escape sequences -> brightgreen (string.special) -(backslash_escape) @string.special - -;; Entity references -> brightgreen (string.special) -(entity_reference) @string.special -(numeric_character_reference) @string.special - -;; LaTeX spans -> cyan (label) -(latex_block) @label diff --git a/misc/syntax-ts/queries/matlab-highlights.scm b/misc/syntax-ts/queries/matlab-highlights.scm deleted file mode 100644 index 3afb434836..0000000000 --- a/misc/syntax-ts/queries/matlab-highlights.scm +++ /dev/null @@ -1,65 +0,0 @@ -;; Tree-sitter highlight queries for MATLAB/Octave -;; Colors aligned with MC's octave.syntax -;; MC: control keywords=white, functions=yellow, operators=brightcyan, comments=brown, strings=green - -;; Control flow keywords -> white (@keyword.other) -[ - "if" - "else" - "elseif" - "end" - "for" - "parfor" - "while" - "switch" - "case" - "otherwise" - "try" - "catch" - "return" - "continue" - "break" - "function" - "endfunction" - "classdef" - "properties" - "methods" - "events" - "enumeration" - "spmd" - "global" - "persistent" - "arguments" -] @keyword.other - -;; Comments -> brown -(comment) @comment - -;; Strings -> green -(string) @string - -;; Escape sequences -> brightgreen -(escape_sequence) @string.special - -;; Operators -> brightcyan (@operator) -(binary_operator) @operator -(comparison_operator) @operator -(boolean_operator) @operator -(unary_operator) @operator -(not_operator) @operator -(postfix_operator) @operator - -;; Assignment -"=" @operator - -;; Delimiters -> brightcyan (@delimiter) -[ - "," - ";" - "(" - ")" - "[" - "]" - "{" - "}" -] @delimiter diff --git a/misc/syntax-ts/queries/meson-highlights.scm b/misc/syntax-ts/queries/meson-highlights.scm deleted file mode 100644 index 6338606cc4..0000000000 --- a/misc/syntax-ts/queries/meson-highlights.scm +++ /dev/null @@ -1,45 +0,0 @@ -;; Tree-sitter highlight queries for Meson build language -;; Colors aligned with MC's default meson.syntax -;; MC: functions=white, built-in objects=yellow, comments=brown, -;; single-quoted strings=green, double-quoted strings=brightred - -;; Keywords -> yellow -[ - "if" - "elif" - "else" - "endif" - "foreach" - "endforeach" - "and" - "or" - "not" -] @keyword - -(keyword_break) @keyword -(keyword_continue) @keyword - -;; Boolean literals -> yellow (MC: built-in objects are yellow) -[ - "true" - "false" -] @keyword - -;; Built-in objects -> yellow (MC: legacy colors these as keyword/yellow) -((identifier) @variable.builtin - (#any-of? @variable.builtin - "meson" "host_machine" "build_machine" "target_machine")) - -;; Strings -> green (MC uses green for single-quoted strings) -(string) @string - -;; Comments -> brown -(comment) @comment - -;; Function calls -> white (MC uses white for function names) -(normal_command - command: (identifier) @keyword.other) - -;; Dictionary keys -> yellow (property.key maps to yellow) -(pair - key: (identifier) @property.key) diff --git a/misc/syntax-ts/queries/muttrc-highlights.scm b/misc/syntax-ts/queries/muttrc-highlights.scm deleted file mode 100644 index 445ed93ca8..0000000000 --- a/misc/syntax-ts/queries/muttrc-highlights.scm +++ /dev/null @@ -1,48 +0,0 @@ -;; Tree-sitter highlight queries for Muttrc -;; Colors aligned with MC's muttrc.syntax -;; MC: primary commands=brightgreen, hooks=brightcyan, options=yellow, comments=brown - -;; Set directive options -> yellow (@keyword) -(set_directive - (option) @keyword) - -;; Comments -> brown -(comment) @comment - -;; Primary commands (set, color, macro, source, etc.) -> brightgreen (@string.special) -;; These are aliased to "command" in the grammar -(command) @string.special - -;; Hook keywords -> brightcyan (@delimiter) -[ - "account-hook" - "charset-hook" - "iconv-hook" - "crypt-hook" - "fcc-hook" - "save-hook" - "folder-hook" - "mbox-hook" - "message-hook" - "open-hook" - "close-hook" - "append-hook" - "reply-hook" - "send-hook" - "send2-hook" - "timeout-hook" - "startup-hook" - "shutdown-hook" -] @delimiter - -;; Alias keyword -> brightcyan (@delimiter) -"alias" @delimiter - -;; String delimiters -> green (@string) -[ - "'" - "\"" -] @string - -;; Backtick (shell command) -> brightred (@function.special) -"`" @function.special diff --git a/misc/syntax-ts/queries/ocaml-highlights.scm b/misc/syntax-ts/queries/ocaml-highlights.scm deleted file mode 100644 index 9111a88ede..0000000000 --- a/misc/syntax-ts/queries/ocaml-highlights.scm +++ /dev/null @@ -1,119 +0,0 @@ -;; Tree-sitter highlight queries for OCaml language -;; Colors aligned with MC's default ml.syntax -;; MC: keywords=yellow, operators/delimiters=cyan, strings=brightcyan, comments=brown - -;; Keywords -> yellow -[ - "and" - "as" - "assert" - "begin" - "class" - "constraint" - "do" - "done" - "downto" - "else" - "end" - "exception" - "external" - "for" - "fun" - "function" - "functor" - "if" - "in" - "include" - "inherit" - "initializer" - "lazy" - "let" - "match" - "method" - "module" - "mutable" - "new" - "nonrec" - "object" - "of" - "open" - "private" - "rec" - "sig" - "struct" - "then" - "to" - "try" - "type" - "val" - "virtual" - "when" - "while" - "with" -] @keyword - -;; Boolean constants -> yellow (MC: true/false are keywords=yellow) -[ - "true" - "false" -] @keyword - -;; Operators -> cyan (MC uses cyan for operators) -[ - "=" - "<" - ">" - "|" - "->" - "::" - ";;" - "~" - "!" - ":=" - "+" - "-" - "*" -] @label - -;; Delimiters -> cyan (MC uses cyan for delimiters) -[ - "." - "," - ":" -] @label - -;; Brackets/parens/braces -> brightcyan (MC legacy colors these as cyan) -[ - "(" - ")" - "[" - "]" - "{" - "}" -] @delimiter - -;; Semicolons -> brightred (MC: ;; and ; are brightred) -";" @function.special - -;; Strings -> brightcyan (MC uses brightcyan for strings in "") -(string) @tag - -;; Characters -> brightcyan -(character) @tag - -;; Booleans (named node) -(boolean) @keyword - -;; Comments -> brown -(comment) @comment - -;; Types -> yellow -(type_constructor) @type -(constructor_name) @type -(module_name) @type - -;; Labels -> cyan -(label_name) @label - -;; Fields -> cyan -(field_name) @label diff --git a/misc/syntax-ts/queries/pascal-highlights.scm b/misc/syntax-ts/queries/pascal-highlights.scm deleted file mode 100644 index c46104ee12..0000000000 --- a/misc/syntax-ts/queries/pascal-highlights.scm +++ /dev/null @@ -1,105 +0,0 @@ -;; Tree-sitter highlight queries for Pascal language -;; Colors aligned with MC's default pascal.syntax -;; MC: keywords=white, logical-ops=cyan, default-context=yellow, comments=brightgreen, -;; strings=brightcyan, delimiters=lightgray - -;; Keywords -> white (keyword.other) -[ - (kProgram) - (kUnit) - (kUses) - (kInterface) - (kImplementation) - (kBegin) - (kEnd) - (kVar) - (kConst) - (kType) - (kArray) - (kOf) - (kRecord) - (kClass) - (kObject) - (kConstructor) - (kDestructor) - (kInherited) - (kProperty) - (kRead) - (kWrite) - (kIf) - (kThen) - (kElse) - (kCase) - (kFor) - (kTo) - (kDownto) - (kWhile) - (kRepeat) - (kUntil) - (kWith) - (kDo) - (kTry) - (kExcept) - (kFinally) - (kRaise) - (kSet) - (kFunction) - (kProcedure) -] @keyword.other - -;; Logical operators -> cyan (label) -[ - (kAnd) - (kOr) - (kNot) - (kXor) - (kDiv) - (kMod) - (kShl) - (kShr) - (kIn) - (kIs) - (kAs) -] @label - -;; Constants -[ - (kNil) - (kTrue) - (kFalse) -] @keyword.other - -;; String type keyword -[ - (kString) -] @type - -;; Operators -> cyan (MC uses cyan for operators) -[ - (kEq) - (kNeq) - (kLt) - (kGt) - (kLte) - (kGte) - (kAdd) - (kSub) - (kMul) - (kFdiv) - (kAssign) - (kAt) - (kHat) -] @label - -;; Delimiters -> lightgray (MC uses lightgray for ; : , . ( ) [ ]) -[ - ";" - ":" - "," -] @number - -;; Comments -> brightgreen (MC uses brightgreen for comments) -(comment) @comment.special - -;; Literal strings -> brightcyan (MC uses brightcyan for ' ' strings) -(literalString) @tag diff --git a/misc/syntax-ts/queries/perl-highlights.scm b/misc/syntax-ts/queries/perl-highlights.scm deleted file mode 100644 index 0c16d23a8d..0000000000 --- a/misc/syntax-ts/queries/perl-highlights.scm +++ /dev/null @@ -1,113 +0,0 @@ -;; Tree-sitter highlight queries for Perl language -;; Colors aligned with MC's default perl.syntax - -;; sub -> yellow -"sub" @keyword - -;; Control flow keywords -> magenta -[ - "my" - "our" - "local" - "use" - "require" - "package" - "return" - "if" - "elsif" - "else" - "unless" - "while" - "until" - "for" - "foreach" - "do" - "last" - "next" - "redo" - "and" - "or" - "goto" - "BEGIN" - "END" -] @keyword.directive - -;; __END__ / __DATA__ -> brown -(data_section) @comment - -;; Builtin functions -> yellow -[ - "chomp" - "chop" - "defined" - "undef" - "eval" -] @keyword - -;; Operators -> yellow -[ - "=" - "==" - "!=" - "eq" - "ne" - "lt" - "gt" - "le" - "ge" - "<=>" - "cmp" - "<" - ">" - "<=" - ">=" - "=~" - "!~" - "+" - "-" - "*" - "/" - "%" - "**" - "." - ".." - "&&" - "||" - "!" - "->" -] @operator.word - -;; Semicolons -> brightmagenta -";" @delimiter.special - -;; Comma -> brightcyan -"," @delimiter - -(string_literal) @string -(interpolated_string_literal) @string -(heredoc_content) @string -(heredoc_token) @string -(command_string) @string - -(match_regexp) @string.special -(quoted_regexp) @string.special - -(comment) @comment - -;; Variables -> brightgreen -(container_variable) @variable.special -(scalar) @variable.special -(array) @variable.special -(hash) @variable.special - -;; Brackets/parens -> brightcyan -[ - "(" - ")" - "[" - "]" - "{" - "}" -] @delimiter - -(label) @label diff --git a/misc/syntax-ts/queries/php-highlights.scm b/misc/syntax-ts/queries/php-highlights.scm deleted file mode 100644 index 1e35e63420..0000000000 --- a/misc/syntax-ts/queries/php-highlights.scm +++ /dev/null @@ -1,165 +0,0 @@ -;; Tree-sitter highlight queries for PHP language -;; Colors aligned with MC's default php.syntax (keywords=brightmagenta) - -;; Keywords -> brightmagenta -[ - "abstract" - "and" - "array" - "as" - "break" - "case" - "catch" - "class" - "clone" - "const" - "continue" - "declare" - "default" - "do" - "echo" - "else" - "elseif" - "enddeclare" - "endfor" - "endforeach" - "endif" - "endswitch" - "endwhile" - "exit" - "extends" - "final" - "finally" - "fn" - "for" - "foreach" - "function" - "global" - "goto" - "if" - "implements" - "include" - "include_once" - "instanceof" - "insteadof" - "interface" - "list" - "match" - "namespace" - "new" - "or" - "print" - "private" - "protected" - "public" - "readonly" - "require" - "require_once" - "return" - "static" - "switch" - "throw" - "trait" - "try" - "unset" - "use" - "while" - "xor" - "yield" -] @keyword.control - -;; null/true/false -> brightmagenta -"null" @keyword.control -(boolean) @keyword.control - -;; UPPERCASE constants -> white -((name) @keyword.other - (#match? @keyword.other "^[A-Z][A-Z\\d_]+$")) - -;; Operators -> white -[ - "=" - "==" - "===" - "!=" - "!==" - "<>" - "<" - ">" - "<=" - ">=" - "<=>" - "+" - "-" - "*" - "/" - "%" - "**" - "." - ".=" - "+=" - "-=" - "*=" - "/=" - "&&" - "||" - "!" - "&" - "|" - "^" - "~" - "<<" - ">>" - "->" - "=>" - "??" - "::" -] @keyword.other - -;; Semicolons -> brightmagenta -";" @delimiter.special - -;; Brackets/parens -> brightcyan -[ - "," - ":" -] @delimiter - -(string) @string -(encapsed_string) @string -(heredoc_body) @string -(heredoc) @string -(nowdoc_body) @string - -(comment) @comment - -;; $variables -> brightgreen -(variable_name) @variable.special - -;; Functions -> yellow -(function_definition name: (name) @keyword) -(method_declaration name: (name) @keyword) - -(function_call_expression - function: (name) @keyword) - -(function_call_expression - function: (qualified_name) @keyword) - -(member_call_expression - name: (name) @keyword) - -(scoped_call_expression - name: (name) @keyword) - -;; Brackets/parens -> brightcyan -[ - "(" - ")" - "[" - "]" - "{" - "}" -] @delimiter - -(named_label_statement (name) @label) diff --git a/misc/syntax-ts/queries/po-highlights.scm b/misc/syntax-ts/queries/po-highlights.scm deleted file mode 100644 index bc2bfbf40b..0000000000 --- a/misc/syntax-ts/queries/po-highlights.scm +++ /dev/null @@ -1,30 +0,0 @@ -;; Tree-sitter highlight queries for PO (gettext) files -;; Colors aligned with MC's default po.syntax -;; MC: msgid/msgstr=brightcyan, # comments=brown, #:/,=white, strings inherit context - -;; Keywords -> brightcyan (tag) -[ - "msgid" - "msgstr" - "msgctxt" - "msgid_plural" -] @tag - -;; Strings -> green (MC: msgstr context is green, msgid context is cyan) -(string) @string - -;; Translator comments (# ...) -> brown -(comment) @comment -(translator_comment) @comment - -;; Extracted comments (#. ...) -> white -(extracted_comment) @tag.special - -;; Reference comments (#: ...) -> white -(reference) @tag.special - -;; Flag comments (#, ...) -> white -(flag) @tag.special - -;; Previous untranslated strings (#~ ...) -> white -(previous_untranslated_string) @tag.special diff --git a/misc/syntax-ts/queries/properties-highlights.scm b/misc/syntax-ts/queries/properties-highlights.scm deleted file mode 100644 index b82780a179..0000000000 --- a/misc/syntax-ts/queries/properties-highlights.scm +++ /dev/null @@ -1,25 +0,0 @@ -;; Tree-sitter highlight queries for Java properties files -;; Colors aligned with MC's default properties.syntax - -;; Keys -> yellow (property.key) -(property - (key) @property.key) - -;; Values -> default (variable) -(property - (value) @variable) - -;; Substitutions ${...} -> brightgreen (variable.special) -(substitution) @variable.special - -;; Escape sequences -> keyword.directive (magenta) -(escape) @keyword.directive - -;; Comments -> brown (comment) -(comment) @comment - -;; Separators = : -> brightcyan (delimiter) -[ - "=" - ":" -] @delimiter diff --git a/misc/syntax-ts/queries/proto-highlights.scm b/misc/syntax-ts/queries/proto-highlights.scm deleted file mode 100644 index 535c2ff618..0000000000 --- a/misc/syntax-ts/queries/proto-highlights.scm +++ /dev/null @@ -1,94 +0,0 @@ -;; Tree-sitter highlight queries for Protocol Buffers -;; Colors aligned with MC's protobuf.syntax -;; MC: keywords=yellow, comments=brown, strings=green - -;; Keywords -> yellow (@keyword) -[ - "syntax" - "edition" - "import" - "weak" - "public" - "package" - "option" - "message" - "enum" - "service" - "rpc" - "returns" - "stream" - "extend" - "oneof" - "map" - "reserved" - "extensions" - "to" - "max" - "optional" - "required" - "repeated" - "group" -] @keyword - -;; Type keywords -> yellow (@keyword) -[ - "int32" - "int64" - "uint32" - "uint64" - "sint32" - "sint64" - "fixed32" - "fixed64" - "sfixed32" - "sfixed64" - "bool" - "string" - "double" - "float" - "bytes" -] @keyword - -;; Boolean literals -> yellow -(true) @keyword -(false) @keyword - -;; Assignment -> yellow (@operator.word) -"=" @operator.word - -;; Comments -> brown -(comment) @comment - -;; Strings -> green -(string) @string - -;; Escape sequences -> brightgreen -(escape_sequence) @string.special - -;; Message/enum/service names -> brightcyan (@tag) -(message_name) @tag -(enum_name) @tag -(service_name) @tag -(rpc_name) @tag - -;; Brackets -> brightcyan (@delimiter) -[ - "{" - "}" - "(" - ")" - "[" - "]" -] @delimiter - -;; Punctuation -> brightcyan (@delimiter) -[ - "," - ":" - "." - "<" - ">" -] @delimiter - -;; Semicolons -> brightmagenta (@delimiter.special) -";" @delimiter.special diff --git a/misc/syntax-ts/queries/puppet-highlights.scm b/misc/syntax-ts/queries/puppet-highlights.scm deleted file mode 100644 index 40bb75305e..0000000000 --- a/misc/syntax-ts/queries/puppet-highlights.scm +++ /dev/null @@ -1,193 +0,0 @@ -; Variables - -(identifier) @variable - -; Includes - -"include" @include - -(include_statement (identifier) @type) - -(include_statement (class_identifier (identifier) @type . )) - -; Keywords - -[ - "class" - "inherits" - "node" - "type" - "tag" -] @keyword - -[ - "define" - "function" -] @keyword.function - -[ - "if" - "elsif" - "else" - "unless" - "case" -] @conditional - -(default_case "default" @conditional) - -; Properties - -(attribute name: (identifier) @property) -(attribute name: (variable (identifier) @property)) - -; Parameters - -(lambda (variable (identifier) @parameter)) - -(parameter (variable (identifier) @parameter)) - -(function_call (identifier) @parameter) - -; Functions - -(function_declaration - "function" . (identifier) @function) - -(function_call - (identifier) @function.call "(") - -(function_call - (field_expression "." (identifier) @method.call) "(") - -(defined_resource_type - "define" . (identifier) @function) - -; Methods - -(function_declaration - "function" . (class_identifier (identifier) @method . )) - -(function_call - (class_identifier (identifier) @method.call . )) - -(defined_resource_type - "define" . (class_identifier (identifier) @method . )) - -; Types - -(type) @type - -(builtin_type) @type.builtin - -(class_definition - (identifier) @type) -(class_definition - (class_identifier (identifier) @type . )) - -(class_inherits (identifier) @type) -(class_inherits (class_identifier (identifier) @type . )) - -(resource_declaration - (identifier) @type) -(resource_declaration - (class_identifier (identifier) @type . )) - -(node_definition (node_name (identifier) @type)) - -((identifier) @type - (#lua-match? @type "^[A-Z]")) - -((identifier) @type.builtin - (#any-of? @type.builtin "Boolean" "Integer" "Float" "String" "Array" "Hash" "Regexp" "Variant" "Data" "Undef" "Default" "File")) - -; "Namespaces" - -(class_identifier . (identifier) @namespace) - -; Operators - -[ - "or" - "and" - "in" -] @keyword.operator - -[ - "=" - "+=" - "->" - "~>" - "<<|" - "<|" - "|>" - "|>>" - "?" - ">" - ">=" - "<=" - "<" - "==" - "!=" - "<<" - ">>" - "+" - "-" - "*" - "/" - "%" - "=~" - "!~" -] @operator - -; Punctuation - -[ - "|" - "." - "," - ";" - ":" - "::" - "=>" -] @punctuation.delimiter - -[ "{" "}" ] @punctuation.bracket - -[ "[" "]" ] @punctuation.bracket - -[ "(" ")" ] @punctuation.bracket - -(interpolation [ "${" "}" ] @punctuation.special) - -[ - "$" - "@" - "@@" -] @punctuation.special - -; Literals - -(number) @number - -(float) @float - -(string) @string - -(escape_sequence) @string.escape - -(regex) @string.regex - -(boolean) @boolean - -[ - (undef) - (default) -] @variable.builtin - -; Comments - -(comment) @comment @spell - -; Errors - -(ERROR) @error diff --git a/misc/syntax-ts/queries/python-highlights.scm b/misc/syntax-ts/queries/python-highlights.scm deleted file mode 100644 index 743c3fd187..0000000000 --- a/misc/syntax-ts/queries/python-highlights.scm +++ /dev/null @@ -1,122 +0,0 @@ -;; Tree-sitter highlight queries for Python language -;; Colors aligned with MC's default python.syntax - -;; Keywords -> yellow (matching MC: keyword whole ... yellow) -[ - "and" - "as" - "assert" - "async" - "await" - "break" - "class" - "continue" - "def" - "del" - "elif" - "else" - "except" - "finally" - "for" - "from" - "global" - "if" - "import" - "in" - "is" - "lambda" - "nonlocal" - "not" - "or" - "pass" - "raise" - "return" - "try" - "while" - "with" - "yield" -] @keyword - -;; Operators -> yellow (matching MC: keyword +,-,*,/,%,=,!=,==,<,> yellow) -[ - "=" - "==" - "!=" - "<" - ">" - "<=" - ">=" - "+" - "-" - "*" - "**" - "/" - "//" - "%" - "@" - "|" - "&" - "^" - "~" - "<<" - ">>" - "+=" - "-=" - "*=" - "/=" - "//=" - "%=" - "**=" - "<<=" - ">>=" - "&=" - "|=" - "^=" - "->" - ":=" -] @operator.word - -;; Colon -> brightred (matching MC: keyword : brightred) -":" @variable.builtin - -;; Brackets/parens -> brightcyan (matching MC: keyword {,},(,),[,] brightcyan) -[ - "(" - ")" - "[" - "]" - "{" - "}" -] @delimiter - -;; Comma -> brightcyan (matching MC: keyword , brightcyan) -"," @delimiter - -;; Semicolons -> brightmagenta (matching MC: keyword ; brightmagenta) -";" @delimiter.special - -;; Strings -> green (matching MC: context " " green, etc.) -(string) @string -(concatenated_string) @string - -;; Escape sequences in strings -> brightgreen (matching MC: keyword \\" brightgreen) -(escape_sequence) @string.special - -;; Comments -> brown (matching MC: context # \n brown) -(comment) @comment - -;; self -> brightred (matching MC: keyword whole self brightred) -((identifier) @variable.builtin - (#match? @variable.builtin "^self$")) - -;; Dunder methods -> lightgray (matching MC: keyword whole __init__ etc.) -((identifier) @constant - (#match? @constant "^__.*__$")) - -;; Decorators -> brightred (matching MC: keyword whole __+__ brightred) -(decorator - (identifier) @function.special) - -(decorator - (attribute - attribute: (identifier) @function.special)) diff --git a/misc/syntax-ts/queries/qmljs-highlights.scm b/misc/syntax-ts/queries/qmljs-highlights.scm deleted file mode 100644 index eb8a099f16..0000000000 --- a/misc/syntax-ts/queries/qmljs-highlights.scm +++ /dev/null @@ -1,124 +0,0 @@ -;; Tree-sitter highlight queries for QML (Qt Modeling Language) -;; Based on JavaScript/TypeScript syntax with QML extensions. - -;; QML keywords -> yellow -[ - "import" - "pragma" - "component" - "property" - "signal" - "required" - "readonly" - "default" - "on" -] @keyword - -;; JS/TS keywords -> yellow -[ - "if" - "else" - "for" - "while" - "switch" - "case" - "break" - "continue" - "return" - "function" - "var" - "let" - "const" - "class" - "enum" - "new" - "delete" - "typeof" - "instanceof" - "in" - "of" - "try" - "catch" - "finally" - "throw" - "async" - "await" - "yield" - "export" - "void" - "with" -] @keyword - -;; Named keyword nodes -> yellow -(this) @keyword -(super) @keyword - -;; Boolean and null literals -> brightgreen -[ - (true) - (false) - (null) - (undefined) -] @number.builtin - -;; Number literals -> brightgreen -(number) @number.builtin - -;; Strings -> green -(string) @string -(template_string) @string - -;; Regex -> brightgreen -(regex) @string.special - -;; Comments -> brown -(comment) @comment - -;; Type annotations -> yellow -(type_identifier) @type - -;; Operators -> yellow -[ - "=" - "+=" - "-=" - "*=" - "/=" - "%=" - "+" - "-" - "*" - "/" - "%" - "==" - "===" - "!=" - "!==" - "<" - ">" - "<=" - ">=" - "&&" - "||" - "!" - "?" - ":" - "." -] @operator.word - -;; Arrow -> brightcyan -"=>" @operator - -;; Semicolons -> brightmagenta -";" @delimiter.special - -;; Delimiters -> brightcyan -[ - "," - "(" - ")" - "[" - "]" - "{" - "}" -] @delimiter diff --git a/misc/syntax-ts/queries/r-highlights.scm b/misc/syntax-ts/queries/r-highlights.scm deleted file mode 100644 index 67a62fca70..0000000000 --- a/misc/syntax-ts/queries/r-highlights.scm +++ /dev/null @@ -1,90 +0,0 @@ -;; Tree-sitter highlight queries for R language -;; Colors aligned with MC's default r.syntax - -;; Control flow keywords -> brightmagenta (keyword.control) -[ - "if" - "for" - "while" - "repeat" - "in" -] @keyword.control - -;; return/next/break -> brightmagenta (keyword.control) -(return) @keyword.control -(next) @keyword.control -(break) @keyword.control - -;; function keyword -> yellow (keyword) -"function" @keyword - -;; Boolean/null/special constants -(true) @constant -(false) @constant -(null) @constant -(inf) @constant -(nan) @constant -(na) @constant - -;; Assignment operators -> brightred (function.special) -[ - "<-" - "<<-" - "->" - "->>" -] @function.special - -;; Equals sign (assignment) -> brightred (function.special) -"=" @function.special - -;; Comparison and arithmetic operators -> yellow (operator.word) -[ - "==" - "!=" - "<" - ">" - "<=" - ">=" - "+" - "-" - "*" - "/" - "^" - "|" - "||" - "&" - "&&" - "!" - "~" - "|>" - "$" - "@" - ":" - "::" - ":::" - "**" -] @operator.word - -(comma) @delimiter - -;; Strings -> brightgreen (string.special) to match MC -(string) @string.special - -(comment) @comment - -;; Function calls -> yellow (keyword) to match MC -(call - function: (identifier) @keyword) - -(call - function: (namespace_operator - rhs: (identifier) @keyword)) - -(namespace_operator - lhs: (identifier) @type) - -(parameter - name: (identifier) @property) - -(extract_operator - (identifier) @property .) \ No newline at end of file diff --git a/misc/syntax-ts/queries/ruby-highlights.scm b/misc/syntax-ts/queries/ruby-highlights.scm deleted file mode 100644 index a3fe4ae32f..0000000000 --- a/misc/syntax-ts/queries/ruby-highlights.scm +++ /dev/null @@ -1,132 +0,0 @@ -;; Tree-sitter highlight queries for Ruby language -;; Colors aligned with MC's default ruby.syntax (keywords=magenta, operators=yellow) - -;; Keywords -> magenta -[ - "BEGIN" - "END" - "alias" - "and" - "begin" - "break" - "case" - "class" - "def" - "do" - "else" - "elsif" - "end" - "ensure" - "for" - "if" - "in" - "module" - "next" - "not" - "or" - "redo" - "rescue" - "retry" - "return" - "then" - "undef" - "unless" - "until" - "when" - "while" - "yield" -] @keyword.directive - -;; self/super -> magenta -(self) @keyword.directive -(super) @keyword.directive - -;; nil/true/false -> brightred -[ - "nil" - (true) - (false) -] @function.special - -;; Built-in class methods -> magenta (same as keywords) -((identifier) @keyword.directive - (#match? @keyword.directive "^(require|require_relative|include|extend|attr_reader|attr_writer|attr_accessor|public|private|protected|raise)$")) - -;; Operators -> yellow -[ - "=" - "==" - "===" - "!=" - "<=>" - "<" - ">" - "<=" - ">=" - "+" - "-" - "*" - "/" - "%" - "**" - "&&" - "||" - "!" - "&" - "|" - "^" - "~" - "<<" - ">>" - ".." - "..." - "=>" - "+=" - "-=" - "*=" - "/=" - "||=" - "&&=" - "=~" - "!~" -] @operator.word - -;; Brackets/parens -> brightcyan -[ - "(" - ")" - "[" - "]" - "{" - "}" -] @delimiter - -;; Other delimiters -> brightcyan -[ - "," - ":" - "::" -] @delimiter - -(string) @string -(bare_string) @string -(string_array) @string -(heredoc_body) @string -(heredoc_beginning) @string - -(escape_sequence) @string.special - -(regex) @string.special - -;; Symbols -> white -(simple_symbol) @keyword.other -(hash_key_symbol) @keyword.other -(bare_symbol) @keyword.other - -;; Global variables -> brightgreen -(global_variable) @variable.special - -;; Instance variables -> white -(instance_variable) @keyword.other - -(comment) @comment diff --git a/misc/syntax-ts/queries/rust-highlights.scm b/misc/syntax-ts/queries/rust-highlights.scm deleted file mode 100644 index 76efcddc36..0000000000 --- a/misc/syntax-ts/queries/rust-highlights.scm +++ /dev/null @@ -1,83 +0,0 @@ -;; Tree-sitter highlight queries for Rust -;; Colors aligned with MC's default rust.syntax - -;; Keywords -> yellow -[ - "as" - "async" - "await" - "break" - "const" - "continue" - "dyn" - "else" - "enum" - "extern" - "fn" - "for" - "if" - "impl" - "in" - "let" - "loop" - "match" - "mod" - "move" - "pub" - "ref" - "return" - "static" - "struct" - "trait" - "type" - "unsafe" - "use" - "where" - "while" - "yield" -] @keyword - -(crate) @keyword -(super) @keyword -(mutable_specifier) @keyword - -;; self -> brightgreen -(self) @string.special - -;; true/false -> brightgreen -(boolean_literal) @string.special - -;; Macros -> brightmagenta -(macro_invocation - macro: (identifier) @function.macro) -(macro_invocation - macro: (scoped_identifier - name: (identifier) @function.macro)) -(macro_definition - name: (identifier) @function.macro) - -;; Types -> brightcyan (MC colors builtin types as brightcyan) -(type_identifier) @function -(primitive_type) @function - -(line_comment) @comment -(block_comment) @comment - -(string_literal) @string -(raw_string_literal) @string -(char_literal) @string.special - -;; Numbers -> brightgreen -(integer_literal) @number.builtin -(float_literal) @number.builtin - -;; Attributes -> white -[ - "#" -] @tag.special - -;; Enum variants Some/None/Ok/Err -> brightgreen -((identifier) @string.special - (#match? @string.special "^(Some|None|Ok|Err)$")) - -(label (identifier) @label) diff --git a/misc/syntax-ts/queries/scala-highlights.scm b/misc/syntax-ts/queries/scala-highlights.scm deleted file mode 100644 index af5710d578..0000000000 --- a/misc/syntax-ts/queries/scala-highlights.scm +++ /dev/null @@ -1,80 +0,0 @@ -;; Tree-sitter highlight queries for Scala -;; No MC syntax file — using reasonable defaults: -;; keywords=yellow, strings=green, comments=brown - -;; Keywords -> yellow -[ - "abstract" - "case" - "catch" - "class" - "def" - "do" - "else" - "extends" - "final" - "finally" - "for" - "if" - "implicit" - "import" - "lazy" - "match" - "new" - "object" - "override" - "package" - "private" - "protected" - "return" - "sealed" - "this" - "throw" - "trait" - "try" - "type" - "val" - "var" - "while" - "with" - "yield" -] @keyword - -;; Null/boolean literals -> brightmagenta (constant.builtin) -(null_literal) @constant.builtin -(boolean_literal) @constant.builtin - -;; Comments -> brown -(comment) @comment -(block_comment) @comment - -;; Strings -> green -(string) @string -(interpolated_string_expression) @string - -;; Chars -> brightgreen -(character_literal) @string.special - -;; Operators -> brightcyan -[ - "=" - "=>" - "<-" - "+" - "-" - "*" - "!" - ">" - "|" - "~" -] @operator - -;; Delimiters -> brightcyan -[ - "." - "," - ":" -] @delimiter - -;; Semicolons -> brightmagenta -";" @delimiter.special diff --git a/misc/syntax-ts/queries/slang-highlights.scm b/misc/syntax-ts/queries/slang-highlights.scm deleted file mode 100644 index 70144c2ee5..0000000000 --- a/misc/syntax-ts/queries/slang-highlights.scm +++ /dev/null @@ -1,350 +0,0 @@ -; inherits: c - -; cpp -((identifier) @variable.member - (#match? @variable.member "^m_.*$")) - -(parameter_declaration - declarator: (reference_declarator) @variable.parameter) - -; function(Foo ...foo) -(variadic_parameter_declaration - declarator: (variadic_declarator - (_) @variable.parameter)) - -; int foo = 0 -(optional_parameter_declaration - declarator: (_) @variable.parameter) - -;(field_expression) @variable.parameter ;; How to highlight this? -((field_expression - (field_identifier) @function.method) @_parent - (#has-parent? @_parent template_method function_declarator)) - -(field_declaration - (field_identifier) @variable.member) - -(field_initializer - (field_identifier) @property) - -(function_declarator - declarator: (field_identifier) @function.method) - -(concept_definition - name: (identifier) @type.definition) - -(alias_declaration - name: (type_identifier) @type.definition) - -(namespace_identifier) @module - -((namespace_identifier) @type - (#match? @type "^[%u]")) - -(case_statement - value: (qualified_identifier - (identifier) @constant)) - -(using_declaration - . - "using" - . - "namespace" - . - [ - (qualified_identifier) - (identifier) - ] @module) - -(destructor_name - (identifier) @function.method) - -; functions -(function_declarator - (qualified_identifier - (identifier) @function)) - -(function_declarator - (qualified_identifier - (qualified_identifier - (identifier) @function))) - -(function_declarator - (qualified_identifier - (qualified_identifier - (qualified_identifier - (identifier) @function)))) - -((qualified_identifier - (qualified_identifier - (qualified_identifier - (qualified_identifier - (identifier) @function)))) @_parent - (#has-ancestor? @_parent function_declarator)) - -(function_declarator - (template_function - (identifier) @function)) - -(operator_name) @function - -"operator" @function - -"static_assert" @function.builtin - -(call_expression - (qualified_identifier - (identifier) @function.call)) - -(call_expression - (qualified_identifier - (qualified_identifier - (identifier) @function.call))) - -(call_expression - (qualified_identifier - (qualified_identifier - (qualified_identifier - (identifier) @function.call)))) - -((qualified_identifier - (qualified_identifier - (qualified_identifier - (qualified_identifier - (identifier) @function.call)))) @_parent - (#has-ancestor? @_parent call_expression)) - -(call_expression - (template_function - (identifier) @function.call)) - -(call_expression - (qualified_identifier - (template_function - (identifier) @function.call))) - -(call_expression - (qualified_identifier - (qualified_identifier - (template_function - (identifier) @function.call)))) - -(call_expression - (qualified_identifier - (qualified_identifier - (qualified_identifier - (template_function - (identifier) @function.call))))) - -((qualified_identifier - (qualified_identifier - (qualified_identifier - (qualified_identifier - (template_function - (identifier) @function.call))))) @_parent - (#has-ancestor? @_parent call_expression)) - -; methods -(function_declarator - (template_method - (field_identifier) @function.method)) - -(call_expression - (field_expression - (field_identifier) @function.method.call)) - -; constructors -((function_declarator - (qualified_identifier - (identifier) @constructor)) - (#match? @constructor "^%u")) - -((call_expression - function: (identifier) @constructor) - (#match? @constructor "^%u")) - -((call_expression - function: (qualified_identifier - name: (identifier) @constructor)) - (#match? @constructor "^%u")) - -((call_expression - function: (field_expression - field: (field_identifier) @constructor)) - (#match? @constructor "^%u")) - -; constructing a type in an initializer list: Constructor (): **SuperType (1)** -((field_initializer - (field_identifier) @constructor - (argument_list)) - (#match? @constructor "^%u")) - -; Constants -(this) @variable.builtin - -(null - "nullptr" @constant.builtin) - -(true) @boolean - -(false) @boolean - -; Literals -(raw_string_literal) @string - -; Keywords -[ - "try" - "catch" - "noexcept" - "throw" -] @keyword.exception - -[ - "decltype" - "explicit" - "friend" - "override" - "using" - "requires" - "constexpr" -] @keyword - -[ - "class" - "namespace" - "template" - "typename" - "concept" -] @keyword.type - -[ - "co_await" - "co_yield" - "co_return" -] @keyword.coroutine - -[ - "public" - "private" - "protected" - "final" - "virtual" -] @keyword.modifier - -[ - "new" - "delete" - "xor" - "bitand" - "bitor" - "compl" - "not" - "xor_eq" - "and_eq" - "or_eq" - "not_eq" - "and" - "or" -] @keyword.operator - -"<=>" @operator - -"::" @punctuation.delimiter - -(template_argument_list - [ - "<" - ">" - ] @punctuation.bracket) - -(template_parameter_list - [ - "<" - ">" - ] @punctuation.bracket) - -(literal_suffix) @operator - -; hlsl -[ - "in" - "out" - "inout" - "uniform" - "shared" - "groupshared" - "discard" - "cbuffer" - "row_major" - "column_major" - "globallycoherent" - "centroid" - "noperspective" - "nointerpolation" - "sample" - "linear" - "snorm" - "unorm" - "point" - "line" - "triangleadj" - "lineadj" - "triangle" -] @keyword.modifier - -((identifier) @variable.builtin - (#match? @variable.builtin "^SV_")) - -(hlsl_attribute) @attribute - -(hlsl_attribute - [ - "[" - "]" - ] @attribute) - -[ - "var" - "let" - "This" -] @type.builtin - -[ - "interface" - "extension" - "property" - "associatedtype" - "where" -] @keyword - -"__init" @constructor - -[ - "__subscript" - "get" - "set" -] @function.builtin - -(interface_requirements - (identifier) @type) - -(binary_expression - [ - "is" - "as" - ] - right: (identifier) @type) - -[ - "as" - "is" -] @keyword.operator - -[ - "__exported" - "import" -] @keyword.import - -(property_declaration - (identifier) @property) diff --git a/misc/syntax-ts/queries/smalltalk-highlights.scm b/misc/syntax-ts/queries/smalltalk-highlights.scm deleted file mode 100644 index 5bc6935f54..0000000000 --- a/misc/syntax-ts/queries/smalltalk-highlights.scm +++ /dev/null @@ -1,63 +0,0 @@ -;; Tree-sitter highlight queries for Smalltalk -;; Colors aligned with MC's smalltalk.syntax -;; MC: class/meta keywords=yellow, message keywords=brightmagenta, comments=brown, strings=brightcyan - -;; Special variables -> yellow (@keyword) -(self) @keyword -(super) @keyword -(nil) @keyword -(thisContext) @keyword - -;; Boolean literals -> brightmagenta (@keyword.control) -(true) @keyword.control -(false) @keyword.control - -;; Keywords in keyword messages -> brightmagenta (@keyword.control) -(keyword) @keyword.control - -;; Unary selectors -> yellow (@keyword) -(unary_selector) @keyword - -;; Binary operators -> cyan (@label) -(binary_operator) @label -(binary_selector) @label - -;; Return -> brightred (@function.special) -"^" @function.special - -;; Statement separators -> brightred (@function.special) -"." @function.special - -;; Temporaries delimiters -> brightred (@function.special) -"|" @function.special - -;; Assignment -> cyan (@label) -":=" @label - -;; Comments -> brown -(comment) @comment - -;; Strings -> brightcyan (@tag) -(string) @tag - -;; Symbols -> yellow (@keyword) -(symbol) @keyword - -;; Characters -> brightcyan (@tag) -(character) @tag - -;; Block arguments -(block_argument) @constant - -;; Cascade separator -";" @delimiter - -;; Brackets -[ - "(" - ")" - "[" - "]" - "{" - "}" -] @delimiter diff --git a/misc/syntax-ts/queries/sql-highlights.scm b/misc/syntax-ts/queries/sql-highlights.scm deleted file mode 100644 index 76fc503f47..0000000000 --- a/misc/syntax-ts/queries/sql-highlights.scm +++ /dev/null @@ -1,44 +0,0 @@ -;; Tree-sitter highlight queries for SQL -;; Colors aligned with MC's default sql.syntax -;; MC: keywords=yellow, comments=brown, strings=green, operators=brightcyan - -;; Types -> yellow (same as keywords in MC) -(type) @type - -;; Comments -> brown -(comment) @comment - -;; Strings -> green -(string) @string - -;; Operators -> brightcyan -[ - "=" - "!=" - "<>" - "<" - ">" - "<=" - ">=" - "+" - "-" - "*" - "/" - "%" - "^" - "&" - "|" - "~" - "<<" - ">>" - "&&" - "||" -] @operator - -;; Delimiters -> brightcyan -[ - ";" - "," - "(" - ")" -] @delimiter diff --git a/misc/syntax-ts/queries/strace-highlights.scm b/misc/syntax-ts/queries/strace-highlights.scm deleted file mode 100644 index d99520b2e5..0000000000 --- a/misc/syntax-ts/queries/strace-highlights.scm +++ /dev/null @@ -1,44 +0,0 @@ -;; Tree-sitter highlight queries for strace output -;; Colors aligned with MC's strace.syntax -;; MC colors syscalls by category: file I/O=cyan, read/write=magenta, memory=red, etc. - -;; Syscall names -> brightred (@function.special) -- most common default -(syscall) @function.special - -;; Strings -> green (@string) -(string) @string - -;; Return values -> yellow (@keyword) -(returnValue) @keyword - -;; Error names -> brightred (@function.special) -(errorName) @function.special - -;; Error descriptions -> brown (@comment) -(errorDescription) @comment - -;; Signal names -> brightred (@function.special) -(signal) @function.special - -;; PIDs -> brightcyan (@tag) -(pid) @tag - -;; Pointers -> lightgray (@constant) -(pointer) @constant - -;; Integers -> lightgray (@constant) -(integer) @constant - -;; Macros/flags -> yellow (@keyword) -(macro) @keyword - -;; Comments -> brown -(comment) @comment - -;; Punctuation -[ - "(" - ")" - "," - "=" -] @delimiter diff --git a/misc/syntax-ts/queries/swift-highlights.scm b/misc/syntax-ts/queries/swift-highlights.scm deleted file mode 100644 index fc1ffb630b..0000000000 --- a/misc/syntax-ts/queries/swift-highlights.scm +++ /dev/null @@ -1 +0,0 @@ -;; Swift — empty query (grammar ABI version 10 is too old for tree-sitter 0.25) diff --git a/misc/syntax-ts/queries/tcl-highlights.scm b/misc/syntax-ts/queries/tcl-highlights.scm deleted file mode 100644 index 7962505271..0000000000 --- a/misc/syntax-ts/queries/tcl-highlights.scm +++ /dev/null @@ -1,62 +0,0 @@ -;; Tree-sitter highlight queries for Tcl -;; Colors aligned with MC's default tcl.syntax -;; MC: keywords=yellow, comments=brown, strings=green, brackets=brightcyan, ;=brightmagenta - -;; Keywords that are anonymous string literals in the grammar -[ - "if" - "else" - "elseif" - "while" - "foreach" - "proc" - "set" - "regexp" - "try" - "catch" - "finally" - "error" - "on" - "namespace" - "global" - "expr" -] @keyword - -;; Named keyword nodes -(while) @keyword -(foreach) @keyword -(global) @keyword -(namespace) @keyword -(try) @keyword -(expr_cmd) @keyword -(regexp) @keyword - -;; Tcl built-in command names matched by regex -((simple_word) @keyword - (#match? @keyword "^(return|break|continue|puts|gets|open|close|read|eval|exec|source|package|require|variable|upvar|uplevel|array|list|lindex|lappend|llength|lrange|lsearch|lsort|lreplace|string|regsub|incr|append|format|scan|info|rename|trace|after|vwait|update|interp|switch)$")) - -;; Comments -> brown -(comment) @comment - -;; Quoted strings -> green -(quoted_word) @string - -;; Braced words -> green -(braced_word) @string - -;; Variables -> brightgreen (MC uses brightgreen for $vars) -(variable_substitution) @string.special - -;; Escape sequences -> brightgreen -(escaped_character) @string.special - -;; Brackets -> brightcyan (MC legacy colors these as brightcyan) -[ - "[" - "]" - "{" - "}" -] @delimiter - -;; Semicolons -> brightmagenta (MC legacy colors ; as brightmagenta) -";" @delimiter.special diff --git a/misc/syntax-ts/queries/terraform-highlights.scm b/misc/syntax-ts/queries/terraform-highlights.scm deleted file mode 100644 index 5a629a3809..0000000000 --- a/misc/syntax-ts/queries/terraform-highlights.scm +++ /dev/null @@ -1,95 +0,0 @@ -;; Tree-sitter highlight queries for Terraform (.tf/.tfvars) files -;; Uses the HCL grammar with Terraform-specific variable reference prefixes. -;; Block name coloring follows the same generic principle as HCL. - -[ - "if" - "else" - "endif" - "for" - "endfor" - "in" -] @keyword - -[ - (ellipsis) - "?" - "=>" -] @operator - -[ - "!" - "*" - "/" - "%" - "+" - "-" - ">" - ">=" - "<" - "<=" - "==" - "!=" - "&&" - "||" -] @operator - -[ - "." - ".*" - "," - "[*]" -] @delimiter - -[ - "{" - "}" - "[" - "]" - "(" - ")" -] @delimiter - -[ - ":" - "=" -] @operator - -((identifier) @type - (#match? @type "^(bool|string|number|object|tuple|list|map|set|any)$")) - -;; Top-level block names -> brightmagenta (keyword.directive) -(config_file - (body - (block - (identifier) @keyword.directive))) - - -;; Terraform variable reference prefixes -(variable_expr - (identifier) @keyword - (#match? @keyword "^(var|local|data|module|path|terraform|count|each|self)$")) - -(comment) @comment -(null_lit) @constant -(numeric_lit) @number -(bool_lit) @constant - -[ - (template_interpolation_start) - (template_interpolation_end) - (template_directive_start) - (template_directive_end) - (strip_marker) -] @operator - -[ - (heredoc_identifier) - (heredoc_start) -] @string - -[ - (quoted_template_start) - (quoted_template_end) - (template_literal) -] @string diff --git a/misc/syntax-ts/queries/toml-highlights.scm b/misc/syntax-ts/queries/toml-highlights.scm deleted file mode 100644 index 985fbb5b1f..0000000000 --- a/misc/syntax-ts/queries/toml-highlights.scm +++ /dev/null @@ -1,43 +0,0 @@ -;; Tree-sitter highlight queries for TOML -;; Colors aligned with MC's default toml.syntax - -;; Booleans -> brightcyan (tag, MC uses brightcyan for true/false) -(boolean) @tag - -;; Comments -> brown (comment) -(comment) @comment - -;; Bare keys -> white/default (variable) -(bare_key) @variable -(dotted_key) @variable -(quoted_key) @variable - -;; Strings -> brightgreen (string.special) -(string) @string.special - -;; Numbers -> brightcyan (tag, MC uses brightcyan for numbers) -(integer) @tag -(float) @tag - -;; Date/time values -> brightgreen (string.special) -(offset_date_time) @string.special -(local_date_time) @string.special -(local_date) @string.special -(local_time) @string.special - -;; Table brackets -> keyword (yellow, MC uses yellow for [ ]) -[ - "[" - "]" - "[[" - "]]" -] @keyword - -[ - "=" -] @operator - -[ - "." - "," -] @delimiter diff --git a/misc/syntax-ts/queries/turtle-highlights.scm b/misc/syntax-ts/queries/turtle-highlights.scm deleted file mode 100644 index fc0d2cc283..0000000000 --- a/misc/syntax-ts/queries/turtle-highlights.scm +++ /dev/null @@ -1,64 +0,0 @@ -;; Tree-sitter highlight queries for RDF Turtle -;; Colors aligned with MC's turtle.syntax -;; MC: declarations=magenta, keyword 'a'=yellow, comments=brown, strings=green, URIs=brightred - -;; Directives -> magenta (@keyword.directive) -[ - "@prefix" - "@base" - "BASE" - "PREFIX" -] @keyword.directive - -;; Keyword 'a' (rdf:type) -> yellow (@keyword) -"a" @keyword - -;; Boolean literals -> yellow (@keyword) -[ - "true" - "false" -] @keyword - -;; IRI references -> brightred (@function.special) -(iri_reference) @function.special - -;; Prefixed names -> cyan (@label) -(prefixed_name) @label -(namespace) @label - -;; Blank nodes -> cyan (@label) -(blank_node_label) @label - -;; Type annotation -> brightmagenta (@keyword.control) -"^^" @keyword.control - -;; Language tags -> brightmagenta (@keyword.control) -(lang_tag) @keyword.control - -;; Comments -> brown -(comment) @comment - -;; Strings -> green -(string) @string - -;; Escape sequences -> brightgreen -(echar) @string.special - -;; Punctuation -> white (@keyword.other) -[ - "." - "," - ";" -] @keyword.other - -;; Collection parens -> brightmagenta (@keyword.control) -[ - "(" - ")" -] @keyword.control - -;; Brackets -> cyan (@label) -[ - "[" - "]" -] @label diff --git a/misc/syntax-ts/queries/typescript-highlights.scm b/misc/syntax-ts/queries/typescript-highlights.scm deleted file mode 100644 index 97ea41ddcc..0000000000 --- a/misc/syntax-ts/queries/typescript-highlights.scm +++ /dev/null @@ -1,137 +0,0 @@ -;; Tree-sitter highlight queries for TypeScript -;; Colors aligned with MC's default ts.syntax - -;; Keywords -> yellow -[ - "as" - "async" - "await" - "break" - "case" - "catch" - "class" - "const" - "continue" - "debugger" - "default" - "delete" - "do" - "else" - "export" - "extends" - "finally" - "for" - "from" - "function" - "get" - "if" - "import" - "in" - "instanceof" - "let" - "new" - "of" - "return" - "set" - "static" - "switch" - "throw" - "try" - "typeof" - "var" - "void" - "while" - "with" - "yield" - "type" - "interface" - "enum" - "implements" - "declare" - "namespace" - "abstract" - "keyof" - "readonly" - "infer" - "is" - "asserts" - "override" - "satisfies" -] @keyword - -(this) @keyword -(super) @keyword - -;; true/false -> brightgreen -(true) @number.builtin -(false) @number.builtin - -;; null/undefined -> cyan (basic types in MC ts.syntax) -(null) @label -(undefined) @label - -;; Predefined types -> cyan (basic types in MC ts.syntax) -(predefined_type) @label - -(comment) @comment - -(string) @string -(template_string) @string -(regex) @string.special - -;; Numbers -> brightgreen in MC ts.syntax -(number) @number.builtin - -;; Operators -> yellow -[ - "!" - "!=" - "!==" - "%" - "&" - "&&" - "*" - "**" - "+" - "++" - "-" - "--" - "." - "/" - "<" - "<<" - "<=" - "=" - "==" - "===" - ">" - ">=" - ">>" - ">>>" - "^" - "|" - "||" - "~" -] @operator.word - -;; Arrow -> brightcyan -"=>" @operator - -;; Semicolons -> brightmagenta -";" @delimiter.special - -;; Brackets/parens -> brightcyan -[ - "," - "(" - ")" - "[" - "]" - "{" - "}" -] @delimiter - -;; Other delimiters -> brightcyan -[ - ":" -] @delimiter diff --git a/misc/syntax-ts/queries/verilog-highlights.scm b/misc/syntax-ts/queries/verilog-highlights.scm deleted file mode 100644 index 1624b9c702..0000000000 --- a/misc/syntax-ts/queries/verilog-highlights.scm +++ /dev/null @@ -1,124 +0,0 @@ -;; Tree-sitter highlight queries for Verilog -;; Colors aligned with MC's default verilog.syntax -;; MC: keywords=yellow, comments=brown, strings=green, operators=yellow, -;; brackets=brightcyan, ;=brightmagenta, bitwise=brightmagenta - -;; Keywords -> yellow -[ - "module" - "endmodule" - "input" - "output" - "inout" - "wire" - "reg" - "integer" - "real" - "parameter" - "localparam" - "assign" - "always" - "initial" - "begin" - "end" - "if" - "else" - "case" - "casex" - "casez" - "endcase" - "for" - "while" - "repeat" - "forever" - "generate" - "endgenerate" - "function" - "endfunction" - "task" - "endtask" - "posedge" - "negedge" - "specify" - "endspecify" - "default" - "signed" - "unsigned" - "supply0" - "supply1" - "tri" - "triand" - "trior" - "tri0" - "tri1" - "wand" - "wor" - "genvar" - "defparam" - "disable" - "event" - "force" - "release" - "wait" -] @keyword - -;; Gate primitives -> yellow -[ - "and" - "or" - "not" - "nand" - "nor" - "xor" - "xnor" - "buf" -] @keyword - -;; Comments -> brown -(comment) @comment - -;; Strings -> green -(string_literal) @string -(double_quoted_string) @string - -;; Arithmetic/comparison operators -> yellow (MC uses yellow for these) -[ - "=" - "==" - "!=" - "===" - "!==" - "<" - ">" - "<=" - ">=" - "+" - "-" - "*" - "/" - "%" - "&&" - "||" - "!" - "<<" - ">>" - "?" - ":" -] @keyword - -;; Bitwise operators -> brightmagenta (MC uses brightmagenta for & | ^ ~) -[ - "&" - "|" - "^" - "~" -] @delimiter.special - -;; Delimiters -> brightcyan (MC uses brightcyan for brackets, comma, dot) -[ - "." - "," -] @delimiter - -;; Semicolons -> brightmagenta -";" @delimiter.special diff --git a/misc/syntax-ts/queries/vhdl-highlights.scm b/misc/syntax-ts/queries/vhdl-highlights.scm deleted file mode 100644 index 4b70730fb3..0000000000 --- a/misc/syntax-ts/queries/vhdl-highlights.scm +++ /dev/null @@ -1,152 +0,0 @@ -;; Tree-sitter highlight queries for VHDL -;; Colors aligned with MC's default vhdl.syntax -;; MC: keywords=yellow, word-operators=green, symbol-operators=brightgreen, -;; comments=magenta, strings=green, types=cyan, booleans=brightred, -;; type/subtype decl=brightcyan, ports(in/out/etc)=white - -;; Core keywords -> yellow -[ - "library" - "use" - "entity" - "is" - "port" - "architecture" - "of" - "begin" - "end" - "signal" - "variable" - "constant" - "array" - "range" - "to" - "downto" - "process" - "if" - "then" - "else" - "elsif" - "case" - "when" - "others" - "for" - "while" - "loop" - "generate" - "component" - "generic" - "map" - "wait" - "until" - "assert" - "return" - "function" - "procedure" - "package" - "body" - "attribute" - "file" - "access" - "alias" - "record" - "with" - "select" - "after" - "transport" - "inertial" - "reject" - "block" - "configuration" - "impure" - "pure" - "shared" - "open" - "null" - "new" -] @keyword - -;; Word operators -> green (MC uses green for and/or/not/nand/etc) -[ - "not" - "and" - "or" - "nand" - "nor" - "xor" - "xnor" -] @string - -;; Port direction keywords -> white (MC uses white for in/out/inout/buffer) -[ - "in" - "out" - "inout" - "buffer" -] @keyword.other - -;; Type/subtype declarations -> brightcyan -[ - "type" - "subtype" -] @tag - -;; Entity/architecture/component names -> cyan (type) -(entity_declaration - name: (identifier) @label) -(architecture_body - (identifier) @label) -(component_declaration - name: (identifier) @label) -(full_type_declaration - (identifier) @label) -(subtype_declaration - (identifier) @label) - -;; Function/procedure calls and declarations -> brightcyan -(procedure_call_statement - procedure: (simple_name) @function) -(function_call - function: (simple_name) @function) -(function_declaration - designator: (identifier) @function) -(procedure_declaration - designator: (identifier) @function) - -;; Comments -> magenta (MC uses magenta for comments) -(comment) @keyword.directive - -;; Strings -> green -(string_literal) @string - -;; Character/bit literals -> brightgreen (string.special) -(character_literal) @string.special -(bit_string_literal) @string.special - -;; Symbol operators -> brightgreen (MC uses brightgreen for := . ; : , ' etc) -[ - "=>" - "<=" - ":=" - "=" - "/=" - "<" - ">" - "+" - "-" - "*" - "/" - "&" - "**" -] @string.special - -;; Delimiters -> brightgreen (MC uses brightgreen for . ; : , ( ) [ ] |) -[ - "." - ";" - "," - ":" -] @string.special - -;; Labels -(label (identifier) @label) diff --git a/misc/syntax-ts/queries/xml-highlights.scm b/misc/syntax-ts/queries/xml-highlights.scm deleted file mode 100644 index 45728e71e9..0000000000 --- a/misc/syntax-ts/queries/xml-highlights.scm +++ /dev/null @@ -1,43 +0,0 @@ -;; Tree-sitter highlight queries for XML -;; Colors aligned with MC's default xml.syntax - -;; Tag names -> white (tag.special) -(Name) @tag.special - -;; Attribute values -> brightcyan (delimiter) -(AttValue) @delimiter - -;; Comments -> brightgreen (comment.special) -(Comment) @comment.special - -;; prolog -> keyword (yellow) -(XMLDecl) @keyword - -;; -> keyword (yellow) -(doctypedecl) @keyword - -;; Processing instructions -> constant (lightgray) -(PI) @constant - -;; Character/entity references -> tag.special (white) -(EntityRef) @tag.special -(CharRef) @tag.special -(PEReference) @tag.special - -;; CharData (text content) -> default -(CharData) @variable - -;; = -> keyword (yellow, MC uses yellow for \s*=) -[ - "=" -] @keyword - -;; Angle brackets / delimiters -> tag.special (white) -[ - "<" - ">" - "" - "" -] @tag.special diff --git a/misc/syntax-ts/queries/yaml-highlights.scm b/misc/syntax-ts/queries/yaml-highlights.scm deleted file mode 100644 index 5a119b9af7..0000000000 --- a/misc/syntax-ts/queries/yaml-highlights.scm +++ /dev/null @@ -1,63 +0,0 @@ -;; Tree-sitter highlight queries for YAML -;; Colors aligned with MC's default yaml.syntax - -;; Keys in mappings -> yellow (property.key) -(block_mapping_pair - key: (_) @property.key) -(flow_pair - key: (_) @property.key) - -;; Booleans and null -> brightmagenta (constant.builtin) -(boolean_scalar) @constant.builtin -(null_scalar) @constant.builtin - -;; Comments -> brown -(comment) @comment - -;; Quoted strings -> green (unquoted string_scalar left as default, matching MC) -(double_quote_scalar) @string -(single_quote_scalar) @string - -;; Block scalars -> brown (comment) - MC uses brown context for | and > blocks -(block_scalar) @comment - -;; Escape sequences in strings -> brightgreen -(escape_sequence) @string.special - -;; Numbers -> lightgray -(integer_scalar) @number -(float_scalar) @number - -;; Timestamps -(timestamp_scalar) @string.special - -;; Anchors and aliases -> cyan -(anchor) @label -(alias) @label - -;; Tags -> yellow -(tag) @type - -;; Document markers -> brightcyan -[ - "---" - "..." -] @delimiter - -;; Operators -[ - ":" - "-" - ">" - "|" - "?" -] @operator - -;; Delimiters -> brightcyan -[ - "," - "[" - "]" - "{" - "}" -] @delimiter diff --git a/misc/syntax-ts/shebangs b/misc/syntax-ts/shebangs deleted file mode 100644 index dd96a117a5..0000000000 --- a/misc/syntax-ts/shebangs +++ /dev/null @@ -1,15 +0,0 @@ -# Tree-sitter grammar shebang mappings -# -# Format: ... -# MC extracts the interpreter from #!/path/to/interpreter (stripping path and env). - -awk awk gawk mawk -bash bash sh -javascript node -lua lua -perl perl -php php -python python python3 -ruby ruby -tcl tclsh -zsh zsh diff --git a/misc/syntax-ts/symbols b/misc/syntax-ts/symbols deleted file mode 100644 index 28e8324f88..0000000000 --- a/misc/syntax-ts/symbols +++ /dev/null @@ -1,15 +0,0 @@ -# Tree-sitter grammar symbol overrides -# -# Format: -# The default symbol is tree_sitter_(). -# Only grammars that don't follow this convention need entries here. - -cobol COBOL -cql cgsql -dtd xml -html_blade blade -lua nlua -robots robots_txt -terraform hcl -tsx typescript -vbnet tree_sitter_vb_dotnet diff --git a/misc/syntax-ts/wrappers b/misc/syntax-ts/wrappers deleted file mode 100644 index 2b4204e785..0000000000 --- a/misc/syntax-ts/wrappers +++ /dev/null @@ -1,31 +0,0 @@ -# Wrapper grammar configuration. -# -# Wrapper grammars are template languages that wrap a host language. -# Content outside the template syntax (the "host" content) lives in a -# specific AST node type and can be highlighted by injecting the host -# grammar into those nodes. -# -# This enables two features: -# -# 1. ERROR fallback: when a host grammar produces a catastrophic parse -# failure (ERROR root node), each wrapper that lists that host is -# tried as an alternative. If the wrapper parses successfully, the -# host grammar is injected into the wrapper's content node. -# Example: a .yaml file with {{ }} Go template syntax fails to -# parse as YAML, so gotmpl is tried. The YAML portions are then -# highlighted via injection into gotmpl's "text" nodes. -# -# 2. Compound extensions: for files like README.md.gotmpl, the inner -# extension (.md) identifies the host grammar, which is injected -# into the wrapper's content node. -# Example: README.md.gotmpl -> gotmpl grammar with markdown -# injected into "text" nodes. -# -# Format: -# wrapper_grammar content_node host1 host2 host3 ... -# -# wrapper_grammar Grammar name of the wrapper (must be in extensions). -# content_node AST node type that holds host language content. -# host1 host2 ... Grammar names that this wrapper can wrap. - -gotmpl text yaml json toml html xml markdown css diff --git a/scripts/ts-grammars-download.sh b/scripts/ts-grammars-download.sh deleted file mode 100755 index 244a7a0df1..0000000000 --- a/scripts/ts-grammars-download.sh +++ /dev/null @@ -1,312 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -TS_GRAMMARS_VERSION='2026.04.11' -REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" -BASE_URL='https://github.com/jtyr/tree-sitter-grammars/releases/download' - -usage() { - cat <<'USAGE' -Usage: ts-grammars-download.sh [OPTIONS] - -Download tree-sitter grammar files from GitHub releases. - -Options: - --source Download source tarball (for static builds from source) - --shared Download shared library tarball (.so/.dylib/.dll) - --static Download static library tarball (.a) - --latest Use latest release instead of pinned version - --platform= Override platform auto-detection - Supported: x86_64-linux, aarch64-linux, - aarch64-macos, x86_64-macos, - x86_64-windows - -h, --help Show this help message - -At least one of --source, --shared, or --static must be specified. -USAGE -} - -detect_platform() { - local arch os - - arch="$(uname -m)" - os="$(uname -s)" - - case "$arch/$os" in - x86_64/Linux) - echo 'x86_64-linux' - ;; - aarch64/Linux) - echo 'aarch64-linux' - ;; - arm64/Darwin) - echo 'aarch64-macos' - ;; - x86_64/Darwin) - echo 'x86_64-macos' - ;; - x86_64/MINGW*) - echo 'x86_64-windows' - ;; - *) - echo "Error: unsupported platform: $arch/$os" >&2 - exit 1 - ;; - esac -} - -fetch_latest_version() { - local version - - version="$(curl -s 'https://api.github.com/repos/jtyr/tree-sitter-grammars/releases/latest' \ - | grep -o '"tag_name": "[^"]*"' \ - | cut -d'"' -f4)" - - if [[ -z $version ]]; then - echo 'Error: failed to fetch latest version from GitHub API' >&2 - exit 1 - fi - - echo "$version" -} - -download_and_verify() { - local version=$1 - local filename=$2 - local dest_dir=$3 - - local tarball_url="$BASE_URL/$version/$filename" - local checksum_url="$BASE_URL/$version/tree-sitter-grammars.sha256" - local tarball_path="$dest_dir/$filename" - local checksum_path="$dest_dir/tree-sitter-grammars.sha256" - - echo "Downloading $tarball_url ..." - curl -fSL -o "$tarball_path" "$tarball_url" - - echo "Downloading $checksum_url ..." - curl -fSL -o "$checksum_path" "$checksum_url" - - echo 'Verifying checksum ...' - local expected - expected="$(grep "$filename" "$checksum_path" | awk '{print $1}')" - - if [[ -z $expected ]]; then - echo "Error: checksum not found for $filename" >&2 - exit 1 - fi - - local actual - actual="$(sha256sum "$tarball_path" | awk '{print $1}')" - - if [[ $actual != "$expected" ]]; then - echo "Error: checksum mismatch for $filename" >&2 - echo " Expected: $expected" >&2 - echo " Actual: $actual" >&2 - exit 1 - fi - - echo 'Checksum OK.' -} - -extract_source() { - local version=$1 - local tmp_dir - - tmp_dir="$(mktemp -d)" - trap "rm -rf '$tmp_dir'" EXIT - - local filename='tree-sitter-grammars-src.tar.gz' - - download_and_verify "$version" "$filename" "$tmp_dir" - - echo 'Extracting source tarball ...' - tar -xzf "$tmp_dir/$filename" -C "$tmp_dir" - - local grammars_dest="$REPO_ROOT/src/editor/ts-grammars" - local count=0 - - # Tarball contains a top-level directory; iterate grammar dirs inside it - for lang_dir in "$tmp_dir"/tree-sitter-grammars-*/*/; do - local lang - lang="$(basename "$lang_dir")" - - # Skip if not a grammar directory - [[ -f "$lang_dir/src/parser.c" ]] || continue - - # Skip C++ scanners - if [[ -f "$lang_dir/src/scanner.cc" ]]; then - echo " Skipping $lang (C++ scanner)" - continue - fi - - echo " Extracting source: $lang" - - mkdir -p "$grammars_dest/$lang" - cp "$lang_dir"/src/* "$grammars_dest/$lang/" - - count=$((count + 1)) - done - - echo "Source extraction complete: $count grammars." -} - -extract_shared() { - local version=$1 - local platform=$2 - local tmp_dir - - tmp_dir="$(mktemp -d)" - # Only set trap if not already set by extract_source - trap "rm -rf '$tmp_dir'" EXIT - - local filename="tree-sitter-grammars-$platform-shared.tar.gz" - - download_and_verify "$version" "$filename" "$tmp_dir" - - echo 'Extracting shared tarball ...' - tar -xzf "$tmp_dir/$filename" -C "$tmp_dir" - - local shared_dest="$REPO_ROOT/ts-grammars-shared" - local count=0 - - mkdir -p "$shared_dest" - - # Tarball contains a top-level directory; iterate grammar dirs inside it - for lang_dir in "$tmp_dir"/tree-sitter-grammars-*/*/; do - local lang - lang="$(basename "$lang_dir")" - - echo " Extracting shared: $lang" - - mkdir -p "$shared_dest/$lang" - - # Copy shared library files - for ext in so dylib dll; do - for lib in "$lang_dir"/*."$ext"; do - [[ -f $lib ]] && cp "$lib" "$shared_dest/$lang/" - done - done - - count=$((count + 1)) - done - - echo "Shared extraction complete: $count grammars." -} - -extract_static() { - local version=$1 - local platform=$2 - local tmp_dir - - tmp_dir="$(mktemp -d)" - trap "rm -rf '$tmp_dir'" EXIT - - local filename="tree-sitter-grammars-$platform-static.tar.gz" - - download_and_verify "$version" "$filename" "$tmp_dir" - - echo 'Extracting static tarball ...' - tar -xzf "$tmp_dir/$filename" -C "$tmp_dir" - - local grammars_dest="$REPO_ROOT/src/editor/ts-grammars" - local count=0 - - mkdir -p "$grammars_dest" - - # Tarball contains a top-level directory; iterate grammar dirs inside it - for lang_dir in "$tmp_dir"/tree-sitter-grammars-*/*/; do - local lang - lang="$(basename "$lang_dir")" - - # Skip if no .a file - [[ -f "$lang_dir/$lang.a" ]] || continue - - echo " Extracting static: $lang" - - mkdir -p "$grammars_dest/$lang" - cp "$lang_dir/$lang.a" "$grammars_dest/$lang/" - - count=$((count + 1)) - done - - echo "Static extraction complete: $count grammars." -} - -main() { - local do_source=0 - local do_shared=0 - local do_static=0 - local use_latest=0 - local platform='' - - while [[ $# -gt 0 ]]; do - case $1 in - --source) - do_source=1 - shift - ;; - --shared) - do_shared=1 - shift - ;; - --static) - do_static=1 - shift - ;; - --latest) - use_latest=1 - shift - ;; - --platform=*) - platform="${1#--platform=}" - shift - ;; - -h|--help) - usage - exit 0 - ;; - *) - echo "Error: unknown option: $1" >&2 - usage >&2 - exit 1 - ;; - esac - done - - if [[ $do_source -eq 0 && $do_shared -eq 0 && $do_static -eq 0 ]]; then - echo 'Error: at least one of --source, --shared, or --static must be specified' >&2 - usage >&2 - exit 1 - fi - - local version=$TS_GRAMMARS_VERSION - - if [[ $use_latest -eq 1 ]]; then - echo 'Fetching latest version ...' - version="$(fetch_latest_version)" - fi - - echo "Using version: $version" - - if [[ ($do_shared -eq 1 || $do_static -eq 1) && -z $platform ]]; then - platform="$(detect_platform)" - echo "Detected platform: $platform" - fi - - if [[ $do_source -eq 1 ]]; then - extract_source "$version" - fi - - if [[ $do_shared -eq 1 ]]; then - extract_shared "$version" "$platform" - fi - - if [[ $do_static -eq 1 ]]; then - extract_static "$version" "$platform" - fi - - echo 'Done.' -} - -main "$@" diff --git a/src/Makefile.am b/src/Makefile.am index 5fce3b04d5..dfd15456ce 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -59,10 +59,6 @@ mc_LDADD = \ libinternal.la \ $(TREE_SITTER_LIBS) -if TREE_SITTER_STATIC -mc_LDADD += editor/ts-grammars/libtsgrammars.a -endif - if ENABLE_MCLIB libinternal_la_LIBADD += \ $(top_builddir)/lib/libmc.la diff --git a/src/editor/Makefile.am b/src/editor/Makefile.am index 81f8fb5ae6..775c1c8fe4 100644 --- a/src/editor/Makefile.am +++ b/src/editor/Makefile.am @@ -1,11 +1,3 @@ -EXTRA_DIST = ts-grammar-loader.h - -if USE_TREE_SITTER -SUBDIRS = ts-grammars -else -SUBDIRS = -endif - if USE_INTERNAL_EDIT noinst_LTLIBRARIES = libedit.la else @@ -28,7 +20,8 @@ libedit_la_SOURCES = \ etags.c etags.h \ format.c \ syntax.c \ - syntax_ts.c syntax_ts.h + syntax_ts.c syntax_ts.h \ + ts-grammar-loader.h if USE_ASPELL if HAVE_GMODULE @@ -40,13 +33,8 @@ endif AM_CPPFLAGS = $(GLIB_CFLAGS) $(TREE_SITTER_CFLAGS) -I$(top_srcdir) if USE_TREE_SITTER -if TREE_SITTER_STATIC -AM_CPPFLAGS += @TREE_SITTER_GRAMMAR_DEFS@ -libedit_la_LIBADD = $(TREE_SITTER_LIBS) -else AM_CPPFLAGS += $(GMODULE_CFLAGS) -DTS_GRAMMAR_LIBDIR=\""$(libdir)/mc/ts-grammars"\" libedit_la_LIBADD = $(TREE_SITTER_LIBS) $(GMODULE_LIBS) -endif else libedit_la_LIBADD = endif diff --git a/src/editor/edit.c b/src/editor/edit.c index 90963f2548..fbb8d281b9 100644 --- a/src/editor/edit.c +++ b/src/editor/edit.c @@ -94,6 +94,7 @@ edit_options_t edit_options = { .save_position = TRUE, .syntax_highlighting = TRUE, #ifdef HAVE_TREE_SITTER + .use_tree_sitter = TRUE, .syntax_highlight_mode = SYNTAX_HIGHLIGHT_TS, .ts_available = TRUE, #endif diff --git a/src/editor/edit.h b/src/editor/edit.h index c2bafc9428..803d182443 100644 --- a/src/editor/edit.h +++ b/src/editor/edit.h @@ -62,8 +62,9 @@ typedef struct gboolean save_position; gboolean syntax_highlighting; #ifdef HAVE_TREE_SITTER + gboolean use_tree_sitter; /* persistent: prefer TS highlighting */ syntax_highlight_mode_t syntax_highlight_mode; - gboolean ts_available; /* FALSE if TS init failed for current file */ + gboolean ts_available; /* runtime: FALSE if TS init failed for current file */ #endif gboolean group_undo; char *backup_ext; diff --git a/src/editor/editcmd.c b/src/editor/editcmd.c index 34d1e19b64..ce75a0ef71 100644 --- a/src/editor/editcmd.c +++ b/src/editor/editcmd.c @@ -849,30 +849,33 @@ edit_syntax_onoff_cmd (WDialog *h) { #ifdef HAVE_TREE_SITTER /* Cycle through available modes. - If TS is available: TS -> Legacy -> None -> TS - If TS is not available: Legacy -> None -> Legacy */ - switch (edit_options.syntax_highlight_mode) + If TS enabled: TS -> Legacy -> None -> TS + If TS disabled: Legacy -> None -> Legacy */ + if (!edit_options.use_tree_sitter || mc_args__no_tree_sitter) { - case SYNTAX_HIGHLIGHT_TS: - edit_options.syntax_highlight_mode = SYNTAX_HIGHLIGHT_LEGACY; - edit_options.syntax_highlighting = TRUE; - break; - case SYNTAX_HIGHLIGHT_LEGACY: - edit_options.syntax_highlight_mode = SYNTAX_HIGHLIGHT_NONE; - edit_options.syntax_highlighting = FALSE; - break; - case SYNTAX_HIGHLIGHT_NONE: - default: - if (edit_options.ts_available && !mc_args__no_tree_sitter) - { - edit_options.syntax_highlight_mode = SYNTAX_HIGHLIGHT_TS; - } - else + /* No TS: just toggle Legacy <-> None */ + edit_options.syntax_highlighting = !edit_options.syntax_highlighting; + edit_options.syntax_highlight_mode = edit_options.syntax_highlighting + ? SYNTAX_HIGHLIGHT_LEGACY : SYNTAX_HIGHLIGHT_NONE; + } + else + { + switch (edit_options.syntax_highlight_mode) { + case SYNTAX_HIGHLIGHT_TS: edit_options.syntax_highlight_mode = SYNTAX_HIGHLIGHT_LEGACY; + edit_options.syntax_highlighting = TRUE; + break; + case SYNTAX_HIGHLIGHT_LEGACY: + edit_options.syntax_highlight_mode = SYNTAX_HIGHLIGHT_NONE; + edit_options.syntax_highlighting = FALSE; + break; + case SYNTAX_HIGHLIGHT_NONE: + default: + edit_options.syntax_highlight_mode = SYNTAX_HIGHLIGHT_TS; + edit_options.syntax_highlighting = TRUE; + break; } - edit_options.syntax_highlighting = TRUE; - break; } #else edit_options.syntax_highlighting = !edit_options.syntax_highlighting; @@ -888,8 +891,8 @@ edit_syntax_toggle_ts_cmd (WDialog *h) { #ifdef HAVE_TREE_SITTER /* Toggle between TS and Legacy (skip None). - If TS not available, do nothing. */ - if (!edit_options.ts_available || mc_args__no_tree_sitter) + Do nothing if TS is disabled. */ + if (!edit_options.use_tree_sitter || mc_args__no_tree_sitter) return; if (edit_options.syntax_highlight_mode == SYNTAX_HIGHLIGHT_TS) diff --git a/src/editor/editdraw.c b/src/editor/editdraw.c index ee142ff9a5..b661499977 100644 --- a/src/editor/editdraw.c +++ b/src/editor/editdraw.c @@ -155,70 +155,101 @@ status_string (WEdit *edit, char *s, int w) character_code = format_character_code (edit); #ifdef HAVE_TREE_SITTER - switch (edit_options.syntax_highlight_mode) + if (edit_options.use_tree_sitter) { - case SYNTAX_HIGHLIGHT_TS: - syntax_mode_label = edit->ts.active ? "TS" : "Legacy"; - break; - case SYNTAX_HIGHLIGHT_LEGACY: - syntax_mode_label = "Legacy"; - break; - case SYNTAX_HIGHLIGHT_NONE: - default: - syntax_mode_label = "None"; - break; + switch (edit_options.syntax_highlight_mode) + { + case SYNTAX_HIGHLIGHT_TS: + syntax_mode_label = edit->ts.active ? "TS" : "Legacy"; + break; + case SYNTAX_HIGHLIGHT_LEGACY: + syntax_mode_label = "Legacy"; + break; + case SYNTAX_HIGHLIGHT_NONE: + default: + syntax_mode_label = "None"; + break; + } } #endif // The field lengths just prevent the status line from shortening too much if (edit_options.simple_statusbar) - g_snprintf (s, w, -#ifdef HAVE_TREE_SITTER - "%c%c%c%c %3ld %5ld/%ld %6ld/%ld [%s] S:[%s] %s", -#else - "%c%c%c%c %3ld %5ld/%ld %6ld/%ld [%s] %s", -#endif - edit->mark1 != edit->mark2 ? (edit->column_highlight ? 'C' : 'B') : '-', // - edit->modified != 0 ? 'M' : '-', // - macro_index < 0 ? '-' : 'R', // - edit->overwrite == 0 ? '-' : 'O', // - edit->curs_col + edit->over_col, // - edit->buffer.curs_line + 1, // - edit->buffer.lines + 1, // - (long) edit->buffer.curs1, // - (long) edit->buffer.size, // - character_code, + { #ifdef HAVE_TREE_SITTER - syntax_mode_label, + if (edit_options.use_tree_sitter) + g_snprintf (s, w, + "%c%c%c%c %3ld %5ld/%ld %6ld/%ld [%s] S:[%s] %s", + edit->mark1 != edit->mark2 ? (edit->column_highlight ? 'C' : 'B') : '-', + edit->modified != 0 ? 'M' : '-', + macro_index < 0 ? '-' : 'R', + edit->overwrite == 0 ? '-' : 'O', + edit->curs_col + edit->over_col, + edit->buffer.curs_line + 1, + edit->buffer.lines + 1, + (long) edit->buffer.curs1, + (long) edit->buffer.size, + character_code, + syntax_mode_label, + mc_global.source_codepage >= 0 + ? get_codepage_id (mc_global.source_codepage) : ""); + else #endif - mc_global.source_codepage >= 0 ? get_codepage_id (mc_global.source_codepage) - : "" - ); + g_snprintf (s, w, + "%c%c%c%c %3ld %5ld/%ld %6ld/%ld [%s] %s", + edit->mark1 != edit->mark2 ? (edit->column_highlight ? 'C' : 'B') : '-', + edit->modified != 0 ? 'M' : '-', + macro_index < 0 ? '-' : 'R', + edit->overwrite == 0 ? '-' : 'O', + edit->curs_col + edit->over_col, + edit->buffer.curs_line + 1, + edit->buffer.lines + 1, + (long) edit->buffer.curs1, + (long) edit->buffer.size, + character_code, + mc_global.source_codepage >= 0 + ? get_codepage_id (mc_global.source_codepage) : ""); + } else - g_snprintf (s, w, -#ifdef HAVE_TREE_SITTER - "[%c%c%c%c] %2ld L:[%3ld+%2ld %3ld/%3ld] *(%-4ld/%4ldb) [%s] S:[%s] %s", -#else - "[%c%c%c%c] %2ld L:[%3ld+%2ld %3ld/%3ld] *(%-4ld/%4ldb) [%s] %s", -#endif - edit->mark1 != edit->mark2 ? (edit->column_highlight ? 'C' : 'B') : '-', // - edit->modified != 0 ? 'M' : '-', // - macro_index < 0 ? '-' : 'R', // - edit->overwrite == 0 ? '-' : 'O', // - edit->curs_col + edit->over_col, // - edit->start_line + 1, // - edit->curs_row, // - edit->buffer.curs_line + 1, // - edit->buffer.lines + 1, // - (long) edit->buffer.curs1, // - (long) edit->buffer.size, // - character_code, + { #ifdef HAVE_TREE_SITTER - syntax_mode_label, + if (edit_options.use_tree_sitter) + g_snprintf (s, w, + "[%c%c%c%c] %2ld L:[%3ld+%2ld %3ld/%3ld] *(%-4ld/%4ldb) [%s] S:[%s] %s", + edit->mark1 != edit->mark2 ? (edit->column_highlight ? 'C' : 'B') : '-', + edit->modified != 0 ? 'M' : '-', + macro_index < 0 ? '-' : 'R', + edit->overwrite == 0 ? '-' : 'O', + edit->curs_col + edit->over_col, + edit->start_line + 1, + edit->curs_row, + edit->buffer.curs_line + 1, + edit->buffer.lines + 1, + (long) edit->buffer.curs1, + (long) edit->buffer.size, + character_code, + syntax_mode_label, + mc_global.source_codepage >= 0 + ? get_codepage_id (mc_global.source_codepage) : ""); + else #endif - mc_global.source_codepage >= 0 ? get_codepage_id (mc_global.source_codepage) - : "" - ); + g_snprintf (s, w, + "[%c%c%c%c] %2ld L:[%3ld+%2ld %3ld/%3ld] *(%-4ld/%4ldb) [%s] %s", + edit->mark1 != edit->mark2 ? (edit->column_highlight ? 'C' : 'B') : '-', + edit->modified != 0 ? 'M' : '-', + macro_index < 0 ? '-' : 'R', + edit->overwrite == 0 ? '-' : 'O', + edit->curs_col + edit->over_col, + edit->start_line + 1, + edit->curs_row, + edit->buffer.curs_line + 1, + edit->buffer.lines + 1, + (long) edit->buffer.curs1, + (long) edit->buffer.size, + character_code, + mc_global.source_codepage >= 0 + ? get_codepage_id (mc_global.source_codepage) : ""); + } g_free (character_code); } diff --git a/src/editor/editoptions.c b/src/editor/editoptions.c index f60cb40daa..aad457ffab 100644 --- a/src/editor/editoptions.c +++ b/src/editor/editoptions.c @@ -124,6 +124,9 @@ edit_options_dialog (WDialog *h) char *p, *q; int wrap_mode = 0; gboolean old_syntax_hl; +#ifdef HAVE_TREE_SITTER + gboolean old_use_ts; +#endif #ifdef ENABLE_NLS static gboolean i18n_flag = FALSE; @@ -145,6 +148,11 @@ edit_options_dialog (WDialog *h) else wrap_mode = 0; + old_syntax_hl = edit_options.syntax_highlighting; +#ifdef HAVE_TREE_SITTER + old_use_ts = edit_options.use_tree_sitter; +#endif + { quick_widget_t quick_widgets[] = { // clang-format off @@ -175,6 +183,10 @@ edit_options_dialog (WDialog *h) QUICK_CHECKBOX (_ ("Visible &tabs"), &edit_options.visible_tabs, NULL), QUICK_CHECKBOX (_ ("Synta&x highlighting"), &edit_options.syntax_highlighting, NULL), +#ifdef HAVE_TREE_SITTER + QUICK_CHECKBOX (_ ("Tree-&sitter highlighting"), + &edit_options.use_tree_sitter, NULL), +#endif QUICK_CHECKBOX (_ ("C&ursor after inserted block"), &edit_options.cursor_after_inserted_block, NULL), QUICK_CHECKBOX (_ ("Pers&istent selection"), @@ -206,8 +218,6 @@ edit_options_dialog (WDialog *h) return; } - old_syntax_hl = edit_options.syntax_highlighting; - if (!edit_options.cursor_beyond_eol) g_list_foreach (GROUP (h)->widgets, edit_reset_over_col, NULL); @@ -243,8 +253,12 @@ edit_options_dialog (WDialog *h) edit_options.typewriter_wrap = FALSE; } - // Load or unload syntax rules if the option has changed - if (edit_options.syntax_highlighting != old_syntax_hl) + // Load or unload syntax rules if any highlighting option changed + if (edit_options.syntax_highlighting != old_syntax_hl +#ifdef HAVE_TREE_SITTER + || edit_options.use_tree_sitter != old_use_ts +#endif + ) g_list_foreach (GROUP (h)->widgets, edit_reload_syntax, NULL); } diff --git a/src/editor/syntax.c b/src/editor/syntax.c index f3a04bae3f..5def82d5ea 100644 --- a/src/editor/syntax.c +++ b/src/editor/syntax.c @@ -1597,7 +1597,7 @@ edit_load_syntax (WEdit *edit, GPtrArray *pnames, const char *type) else if (edit_options.syntax_highlight_mode == SYNTAX_HIGHLIGHT_NONE) { edit_options.syntax_highlight_mode = - (edit_options.ts_available && !mc_args__no_tree_sitter) + (edit_options.use_tree_sitter && !mc_args__no_tree_sitter) ? SYNTAX_HIGHLIGHT_TS : SYNTAX_HIGHLIGHT_LEGACY; } diff --git a/src/editor/syntax_ts.c b/src/editor/syntax_ts.c index ca97c556e0..911cd158e7 100644 --- a/src/editor/syntax_ts.c +++ b/src/editor/syntax_ts.c @@ -33,11 +33,7 @@ #ifdef HAVE_TREE_SITTER #include -#ifdef TREE_SITTER_STATIC -#include "ts-grammars/ts-grammar-registry.h" -#else #include "ts-grammar-loader.h" -#endif #include "lib/global.h" #include "lib/skin.h" @@ -78,8 +74,31 @@ typedef struct /*** file scope variables ************************************************************************/ -/* Color mappings loaded from colors.ini. - Key: "section:capture_name" (e.g., "default:keyword", "markdown:comment") +/* Per-grammar config loaded from config.ini files */ +typedef struct +{ + char *grammar_name; + char *display_name; + char *symbol_override; + char *wrapper_content_node; /* first token of wrapper= (or NULL) */ + char **wrapper_hosts; /* remaining tokens of wrapper= (NULL-terminated, or NULL) */ + char **extensions; /* from extensions= (NULL-terminated, or NULL) */ + char **filenames; /* from filenames= (NULL-terminated, or NULL) */ + char **shebangs; /* from shebangs= (NULL-terminated, or NULL) */ +} ts_grammar_config_t; + +/* Grammar registry: populated on first use by scanning config.ini files */ +static GHashTable *ts_grammar_configs = NULL; /* grammar_name -> ts_grammar_config_t* */ +static GHashTable *ts_ext_map = NULL; /* ".py" -> "python" */ +static GHashTable *ts_filename_map = NULL; /* "Makefile" -> "make" */ +static GHashTable *ts_shebang_map = NULL; /* "python3" -> "python" */ +static GHashTable *ts_display_to_grammar = NULL; /* "Python Program" -> "python" */ +static GHashTable *ts_wrapper_host_map = NULL; /* "yaml" -> "gotmpl" (wrapper name) */ +static GHashTable *ts_wrapper_node_map = NULL; /* "gotmpl" -> "text" (content node) */ +static gboolean ts_registry_loaded = FALSE; + +/* Color mappings loaded from [colors] sections of config.ini files. + Key: "grammar_name:capture_name" (e.g., "python:keyword", "default:comment") Value: GINT_TO_POINTER(color_pair_id) */ static GHashTable *ts_color_map = NULL; @@ -159,81 +178,302 @@ ts_alloc_color_from_spec (const char *spec) } /** - * Load the colors.ini config file. Uses GKeyFile (INI format). - * Sections: [default] for global colors, [grammar_name] for overrides. - * User file takes precedence over system file (loaded first). + * Split a space-separated string into a NULL-terminated array. + * Returns newly allocated array (and strings within), or NULL if empty. + */ +static char ** +ts_split_values (const char *str) +{ + char **result; + int src, dst; + + if (str == NULL || *str == '\0') + return NULL; + + result = g_strsplit_set (str, " \t", -1); + + /* Remove empty strings from the array (from consecutive spaces) */ + src = 0; + dst = 0; + while (result[src] != NULL) + { + if (result[src][0] != '\0') + { + result[dst] = result[src]; + dst++; + } + else + g_free (result[src]); + src++; + } + result[dst] = NULL; + + if (dst == 0) + { + g_free (result); + return NULL; + } + + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +/** + * Load one config.ini file and populate the registry. + * grammar_name is the directory name (canonical grammar name). */ static void -ts_load_color_config (void) +ts_load_one_config (const char *config_path, const char *grammar_name) +{ + GKeyFile *kf; + ts_grammar_config_t *cfg; + char *value; + gchar **keys; + gsize k_count, ki; + + kf = g_key_file_new (); + if (!g_key_file_load_from_file (kf, config_path, G_KEY_FILE_NONE, NULL)) + { + g_key_file_free (kf); + return; + } + + cfg = g_new0 (ts_grammar_config_t, 1); + cfg->grammar_name = g_strdup (grammar_name); + + /* [grammar] section */ + value = g_key_file_get_value (kf, "grammar", "display-name", NULL); + if (value != NULL) + { + g_strstrip (value); + cfg->display_name = value; + } + + value = g_key_file_get_value (kf, "grammar", "symbol", NULL); + if (value != NULL) + { + g_strstrip (value); + cfg->symbol_override = value; + } + + value = g_key_file_get_value (kf, "grammar", "extensions", NULL); + if (value != NULL) + { + g_strstrip (value); + cfg->extensions = ts_split_values (value); + g_free (value); + } + + value = g_key_file_get_value (kf, "grammar", "filenames", NULL); + if (value != NULL) + { + g_strstrip (value); + cfg->filenames = ts_split_values (value); + g_free (value); + } + + value = g_key_file_get_value (kf, "grammar", "shebangs", NULL); + if (value != NULL) + { + g_strstrip (value); + cfg->shebangs = ts_split_values (value); + g_free (value); + } + + value = g_key_file_get_value (kf, "grammar", "wrapper", NULL); + if (value != NULL) + { + char **parts; + + g_strstrip (value); + parts = ts_split_values (value); + g_free (value); + + if (parts != NULL && parts[0] != NULL) + { + int count, i; + char **hosts; + + cfg->wrapper_content_node = g_strdup (parts[0]); + + /* Remaining tokens are host grammar names */ + count = 0; + while (parts[count + 1] != NULL) + count++; + if (count > 0) + { + hosts = g_new0 (char *, count + 1); + for (i = 0; i < count; i++) + hosts[i] = g_strdup (parts[i + 1]); + hosts[count] = NULL; + cfg->wrapper_hosts = hosts; + } + } + g_strfreev (parts); + } + + /* Populate lookup maps */ + g_hash_table_insert (ts_grammar_configs, g_strdup (grammar_name), cfg); + + if (cfg->display_name != NULL) + g_hash_table_insert (ts_display_to_grammar, g_strdup (cfg->display_name), + g_strdup (grammar_name)); + + if (cfg->extensions != NULL) + { + int i; + + for (i = 0; cfg->extensions[i] != NULL; i++) + g_hash_table_insert (ts_ext_map, g_strdup (cfg->extensions[i]), + g_strdup (grammar_name)); + } + + if (cfg->filenames != NULL) + { + int i; + + for (i = 0; cfg->filenames[i] != NULL; i++) + g_hash_table_insert (ts_filename_map, g_strdup (cfg->filenames[i]), + g_strdup (grammar_name)); + } + + if (cfg->shebangs != NULL) + { + int i; + + for (i = 0; cfg->shebangs[i] != NULL; i++) + g_hash_table_insert (ts_shebang_map, g_strdup (cfg->shebangs[i]), + g_strdup (grammar_name)); + } + + if (cfg->wrapper_hosts != NULL) + { + int i; + + g_hash_table_insert (ts_wrapper_node_map, g_strdup (grammar_name), + g_strdup (cfg->wrapper_content_node)); + for (i = 0; cfg->wrapper_hosts[i] != NULL; i++) + g_hash_table_insert (ts_wrapper_host_map, g_strdup (cfg->wrapper_hosts[i]), + g_strdup (grammar_name)); + } + + /* [colors] section -> populate ts_color_map */ + keys = g_key_file_get_keys (kf, "colors", &k_count, NULL); + if (keys != NULL) + { + for (ki = 0; ki < k_count; ki++) + { + char *color_value; + char *map_key; + int color_pair; + + color_value = g_key_file_get_value (kf, "colors", keys[ki], NULL); + if (color_value == NULL) + continue; + + color_pair = ts_alloc_color_from_spec (color_value); + g_free (color_value); + + map_key = g_strdup_printf ("%s:%s", grammar_name, keys[ki]); + g_hash_table_insert (ts_color_map, map_key, GINT_TO_POINTER (color_pair)); + } + g_strfreev (keys); + } + + g_key_file_free (kf); +} + +/* --------------------------------------------------------------------------------------------- */ + +/** + * Scan per-grammar config.ini files from both search paths. + * User-local entries take precedence over system entries. + */ +void +ts_load_grammar_registry (void) { const char *dirs[2]; int d; - if (ts_color_map != NULL) + if (ts_registry_loaded) return; + ts_registry_loaded = TRUE; + /* Allocate red color for ERROR nodes (parse failures) */ ts_error_color = ts_alloc_color_from_spec ("red;"); + ts_grammar_configs = g_hash_table_new (g_str_hash, g_str_equal); + ts_ext_map = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free); + ts_filename_map = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free); + ts_shebang_map = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free); + ts_display_to_grammar = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free); + ts_wrapper_host_map = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free); + ts_wrapper_node_map = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free); ts_color_map = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL); dirs[0] = mc_config_get_data_path (); dirs[1] = mc_global.share_data_dir; - /* Load system file first, then user file overwrites */ + /* Scan system first, then user-local overwrites */ for (d = 1; d >= 0; d--) { - GKeyFile *kf; - char *path; - gchar **groups; - gsize g_count; - gsize gi; - - path = g_build_filename (dirs[d], EDIT_SYNTAX_TS_DIR, "colors.ini", (char *) NULL); - kf = g_key_file_new (); + char *ts_dir; + GDir *dir; + const char *entry; - if (!g_key_file_load_from_file (kf, path, G_KEY_FILE_NONE, NULL)) + ts_dir = g_build_filename (dirs[d], EDIT_SYNTAX_TS_DIR, (char *) NULL); + dir = g_dir_open (ts_dir, 0, NULL); + if (dir == NULL) { - g_key_file_free (kf); - g_free (path); + g_free (ts_dir); continue; } - g_free (path); - groups = g_key_file_get_groups (kf, &g_count); - for (gi = 0; gi < g_count; gi++) + while ((entry = g_dir_read_name (dir)) != NULL) { - gchar **keys; - gsize k_count; - gsize ki; + char *config_path; - keys = g_key_file_get_keys (kf, groups[gi], &k_count, NULL); - if (keys == NULL) - continue; + config_path = g_build_filename (ts_dir, entry, "config.ini", (char *) NULL); + if (g_file_test (config_path, G_FILE_TEST_IS_REGULAR)) + ts_load_one_config (config_path, entry); + g_free (config_path); + } - for (ki = 0; ki < k_count; ki++) - { - gchar *value; - char *map_key; - int color_pair; + g_dir_close (dir); + g_free (ts_dir); + } +} - value = g_key_file_get_value (kf, groups[gi], keys[ki], NULL); - if (value == NULL) - continue; +/* --------------------------------------------------------------------------------------------- */ - color_pair = ts_alloc_color_from_spec (value); +/** + * Get the config record for a grammar. Triggers registry load on first use. + */ +static const ts_grammar_config_t * +ts_get_grammar_config (const char *grammar_name) +{ + if (!ts_registry_loaded) + ts_load_grammar_registry (); - map_key = g_strdup_printf ("%s:%s", groups[gi], keys[ki]); - g_free (value); - g_hash_table_insert (ts_color_map, map_key, GINT_TO_POINTER (color_pair)); - } + return (const ts_grammar_config_t *) g_hash_table_lookup (ts_grammar_configs, grammar_name); +} - g_strfreev (keys); - } +/* --------------------------------------------------------------------------------------------- */ - g_strfreev (groups); - g_key_file_free (kf); - } +/** + * Get the symbol override for a grammar (for .so loading). + * Returns newly allocated string or NULL. + */ +char * +ts_get_symbol_override (const char *grammar_name) +{ + const ts_grammar_config_t *cfg = ts_get_grammar_config (grammar_name); + + if (cfg != NULL && cfg->symbol_override != NULL) + return g_strdup (cfg->symbol_override); + return NULL; } /* --------------------------------------------------------------------------------------------- */ @@ -244,8 +484,8 @@ ts_load_color_config (void) * the last ".suffix" and retries until a match is found or exhausted. * * Lookup order for each prefix: - * 1. [grammar_name]:capture (per-grammar override) - * 2. [default]:capture (global default) + * 1. [grammar_name]:capture (per-grammar color) + * 2. [default]:capture (global default, if a default/ config exists) */ static int ts_capture_name_to_color (const char *capture_name, const char *grammar_name) @@ -263,7 +503,7 @@ ts_capture_name_to_color (const char *capture_name, const char *grammar_name) gpointer value; char *key; - /* Try grammar-specific override first */ + /* Try grammar-specific color first */ if (grammar_name != NULL) { key = g_strdup_printf ("%s:%s", grammar_name, name); @@ -294,304 +534,27 @@ ts_capture_name_to_color (const char *capture_name, const char *grammar_name) } g_free (name); - - { - FILE *dbg = fopen ("/tmp/ts_color_debug.txt", "a"); - if (dbg != NULL) - { - fprintf (dbg, "UNRESOLVED: @%s grammar=%s hash_size=%u\n", - capture_name, grammar_name ? grammar_name : "NULL", - g_hash_table_size (ts_color_map)); - fclose (dbg); - } - } - return EDITOR_NORMAL_COLOR; } /* --------------------------------------------------------------------------------------------- */ /** - * Look up a config file by matching a value in the value list. - * Reads file where each line is: ... - * If value matches any val, returns g_strdup(key). Otherwise NULL. - * User file is searched first; if found there, system file is not searched. - * An empty value list means the grammar is disabled (skip it). - */ -static char * -ts_config_lookup_by_value (const char *config_name, const char *value) -{ - const char *dirs[2]; - int d; - - dirs[0] = mc_config_get_data_path (); - dirs[1] = mc_global.share_data_dir; - - for (d = 0; d < 2; d++) - { - char *path; - FILE *f; - char *line = NULL; - - path = g_build_filename (dirs[d], EDIT_SYNTAX_TS_DIR, config_name, (char *) NULL); - f = fopen (path, "r"); - g_free (path); - - if (f == NULL) - continue; - - while (read_one_line (&line, f) != 0) - { - char *p, *key; - - p = line; - - /* Skip whitespace */ - while (*p != '\0' && whiteness (*p)) - p++; - - /* Skip comments and empty lines */ - if (*p == '#' || *p == '\0') - { - g_free (line); - line = NULL; - continue; - } - - /* Extract key (first word) */ - key = p; - while (*p != '\0' && !whiteness (*p)) - p++; - - /* If no values follow the key, grammar is disabled -- skip */ - if (*p == '\0') - { - g_free (line); - line = NULL; - continue; - } - *p++ = '\0'; - - /* Check each value on the line */ - while (*p != '\0') - { - char *val; - - /* Skip whitespace */ - while (*p != '\0' && whiteness (*p)) - p++; - - if (*p == '\0') - break; - - val = p; - while (*p != '\0' && !whiteness (*p)) - p++; - if (*p != '\0') - *p++ = '\0'; - - if (strcmp (val, value) == 0) - { - char *result = g_strdup (key); - - g_free (line); - fclose (f); - return result; - } - } - - g_free (line); - line = NULL; - } - - g_free (line); - fclose (f); - } - - return NULL; -} - -/* --------------------------------------------------------------------------------------------- */ - -/** - * Look up a config file by matching the key (first field). - * Returns g_strdup of the rest of the line (trimmed). Otherwise NULL. - * User file takes precedence per-record. - */ -char * -ts_config_lookup_by_grammar (const char *config_name, const char *grammar_name) -{ - const char *dirs[2]; - int d; - - dirs[0] = mc_config_get_data_path (); - dirs[1] = mc_global.share_data_dir; - - for (d = 0; d < 2; d++) - { - char *path; - FILE *f; - char *line = NULL; - - path = g_build_filename (dirs[d], EDIT_SYNTAX_TS_DIR, config_name, (char *) NULL); - f = fopen (path, "r"); - g_free (path); - - if (f == NULL) - continue; - - while (read_one_line (&line, f) != 0) - { - char *p, *key; - - p = line; - - /* Skip whitespace */ - while (*p != '\0' && whiteness (*p)) - p++; - - /* Skip comments and empty lines */ - if (*p == '#' || *p == '\0') - { - g_free (line); - line = NULL; - continue; - } - - /* Extract key (first word) */ - key = p; - while (*p != '\0' && !whiteness (*p)) - p++; - if (*p == '\0') - { - /* Key only, no value */ - if (strcmp (key, grammar_name) == 0) - { - g_free (line); - fclose (f); - return g_strdup (""); - } - g_free (line); - line = NULL; - continue; - } - *p++ = '\0'; - - if (strcmp (key, grammar_name) == 0) - { - char *rest, *end; - - /* Skip whitespace before the rest */ - while (*p != '\0' && whiteness (*p)) - p++; - rest = p; - - /* Trim trailing whitespace */ - end = rest + strlen (rest) - 1; - while (end > rest && whiteness (*end)) - *end-- = '\0'; - - { - char *result = g_strdup (rest); - - g_free (line); - fclose (f); - return result; - } - } - - g_free (line); - line = NULL; - } - - g_free (line); - fclose (f); - } - - return NULL; -} - -/* --------------------------------------------------------------------------------------------- */ - -/** - * Reverse lookup: given a config file and a display value (rest of line), - * return the key (first field). For example, looking up "Go Template" in - * display-names returns "gotmpl". + * Reverse lookup: given a display name, return the grammar name. * Returns newly allocated string or NULL. */ char * ts_config_reverse_lookup (const char *config_name, const char *display_value) { - const char *dirs[2]; - int d; - - dirs[0] = mc_config_get_data_path (); - dirs[1] = mc_global.share_data_dir; - - for (d = 0; d < 2; d++) - { - char *path; - FILE *f; - char *line = NULL; - - path = g_build_filename (dirs[d], EDIT_SYNTAX_TS_DIR, config_name, (char *) NULL); - f = fopen (path, "r"); - g_free (path); - - if (f == NULL) - continue; - - while (read_one_line (&line, f) != 0) - { - char *p, *key, *rest, *end; - - p = line; - while (*p != '\0' && whiteness (*p)) - p++; - if (*p == '#' || *p == '\0') - { - g_free (line); - line = NULL; - continue; - } - - key = p; - while (*p != '\0' && !whiteness (*p)) - p++; - if (*p == '\0') - { - g_free (line); - line = NULL; - continue; - } - *p++ = '\0'; - - while (*p != '\0' && whiteness (*p)) - p++; - rest = p; + const char *grammar; - end = rest + strlen (rest) - 1; - while (end > rest && whiteness (*end)) - *end-- = '\0'; + (void) config_name; /* kept for API compatibility */ - if (strcmp (rest, display_value) == 0) - { - char *result = g_strdup (key); - - g_free (line); - fclose (f); - return result; - } + if (!ts_registry_loaded) + ts_load_grammar_registry (); - g_free (line); - line = NULL; - } - - g_free (line); - fclose (f); - } - - return NULL; + grammar = (const char *) g_hash_table_lookup (ts_display_to_grammar, display_value); + return (grammar != NULL) ? g_strdup (grammar) : NULL; } /* --------------------------------------------------------------------------------------------- */ @@ -685,10 +648,15 @@ ts_find_grammar (const char *filename, const char *first_line, char **grammar_name, char **display_name) { const char *basename; + const char *match; + const ts_grammar_config_t *cfg; if (filename == NULL) return FALSE; + if (!ts_registry_loaded) + ts_load_grammar_registry (); + *grammar_name = NULL; *display_name = NULL; @@ -696,10 +664,13 @@ ts_find_grammar (const char *filename, const char *first_line, basename = (basename != NULL) ? basename + 1 : filename; /* 1. Exact filename match (most specific) */ - *grammar_name = ts_config_lookup_by_value ("filenames", basename); - if (*grammar_name != NULL) + match = (const char *) g_hash_table_lookup (ts_filename_map, basename); + if (match != NULL) { - *display_name = ts_config_lookup_by_grammar ("display-names", *grammar_name); + *grammar_name = g_strdup (match); + cfg = ts_get_grammar_config (match); + *display_name = (cfg != NULL && cfg->display_name != NULL) + ? g_strdup (cfg->display_name) : g_strdup (match); return TRUE; } @@ -710,11 +681,14 @@ ts_find_grammar (const char *filename, const char *first_line, if (interpreter != NULL) { - *grammar_name = ts_config_lookup_by_value ("shebangs", interpreter); + match = (const char *) g_hash_table_lookup (ts_shebang_map, interpreter); g_free (interpreter); - if (*grammar_name != NULL) + if (match != NULL) { - *display_name = ts_config_lookup_by_grammar ("display-names", *grammar_name); + *grammar_name = g_strdup (match); + cfg = ts_get_grammar_config (match); + *display_name = (cfg != NULL && cfg->display_name != NULL) + ? g_strdup (cfg->display_name) : g_strdup (match); return TRUE; } } @@ -726,10 +700,13 @@ ts_find_grammar (const char *filename, const char *first_line, if (dot != NULL) { - *grammar_name = ts_config_lookup_by_value ("extensions", dot); - if (*grammar_name != NULL) + match = (const char *) g_hash_table_lookup (ts_ext_map, dot); + if (match != NULL) { - *display_name = ts_config_lookup_by_grammar ("display-names", *grammar_name); + *grammar_name = g_strdup (match); + cfg = ts_get_grammar_config (match); + *display_name = (cfg != NULL && cfg->display_name != NULL) + ? g_strdup (cfg->display_name) : g_strdup (match); return TRUE; } } @@ -741,196 +718,49 @@ ts_find_grammar (const char *filename, const char *first_line, /* --------------------------------------------------------------------------------------------- */ /** - * Look up the wrappers config file to find a wrapper grammar for a given host. - * The wrappers file format is: - * wrapper_grammar content_node host1 host2 ... - * - * If host_grammar matches one of the hosts, returns the wrapper grammar name - * (newly allocated) and fills content_node (newly allocated). + * Look up the wrappers registry to find a wrapper grammar for a given host. + * Returns the wrapper grammar name (newly allocated) and fills content_node. * Returns NULL if no wrapper handles this host. */ static char * ts_find_wrapper_for_host (const char *host_grammar, char **content_node) { - const char *dirs[2]; - int d; + const char *wrapper; + const char *node; *content_node = NULL; - dirs[0] = mc_config_get_data_path (); - dirs[1] = mc_global.share_data_dir; - for (d = 0; d < 2; d++) - { - char *path; - FILE *f; - char *line = NULL; + if (!ts_registry_loaded) + ts_load_grammar_registry (); - path = g_build_filename (dirs[d], EDIT_SYNTAX_TS_DIR, "wrappers", (char *) NULL); - f = fopen (path, "r"); - g_free (path); - - if (f == NULL) - continue; - - while (read_one_line (&line, f) != 0) - { - char *p, *wrapper, *node; - - p = line; - while (*p != '\0' && whiteness (*p)) - p++; - if (*p == '#' || *p == '\0') - { - g_free (line); - line = NULL; - continue; - } - - /* Field 1: wrapper grammar name */ - wrapper = p; - while (*p != '\0' && !whiteness (*p)) - p++; - if (*p == '\0') - { - g_free (line); - line = NULL; - continue; - } - *p++ = '\0'; - - /* Field 2: content node name */ - while (*p != '\0' && whiteness (*p)) - p++; - node = p; - while (*p != '\0' && !whiteness (*p)) - p++; - if (*p == '\0') - { - g_free (line); - line = NULL; - continue; - } - *p++ = '\0'; - - /* Remaining fields: host grammar names */ - while (*p != '\0') - { - char *host; - - while (*p != '\0' && whiteness (*p)) - p++; - if (*p == '\0') - break; - - host = p; - while (*p != '\0' && !whiteness (*p)) - p++; - if (*p != '\0') - *p++ = '\0'; - - if (strcmp (host, host_grammar) == 0) - { - char *result = g_strdup (wrapper); - - *content_node = g_strdup (node); - g_free (line); - fclose (f); - return result; - } - } - - g_free (line); - line = NULL; - } + wrapper = (const char *) g_hash_table_lookup (ts_wrapper_host_map, host_grammar); + if (wrapper == NULL) + return NULL; - g_free (line); - fclose (f); - } + node = (const char *) g_hash_table_lookup (ts_wrapper_node_map, wrapper); + if (node == NULL) + return NULL; - return NULL; + *content_node = g_strdup (node); + return g_strdup (wrapper); } /* --------------------------------------------------------------------------------------------- */ /** - * Look up the wrappers config file to find the content node for a wrapper grammar. + * Look up the wrappers registry to find the content node for a wrapper grammar. * Returns the content node name (newly allocated), or NULL if not a wrapper. */ static char * ts_find_wrapper_content_node (const char *wrapper_grammar) { - const char *dirs[2]; - int d; + const char *node; - dirs[0] = mc_config_get_data_path (); - dirs[1] = mc_global.share_data_dir; - - for (d = 0; d < 2; d++) - { - char *path; - FILE *f; - char *line = NULL; + if (!ts_registry_loaded) + ts_load_grammar_registry (); - path = g_build_filename (dirs[d], EDIT_SYNTAX_TS_DIR, "wrappers", (char *) NULL); - f = fopen (path, "r"); - g_free (path); - - if (f == NULL) - continue; - - while (read_one_line (&line, f) != 0) - { - char *p, *wrapper, *node; - - p = line; - while (*p != '\0' && whiteness (*p)) - p++; - if (*p == '#' || *p == '\0') - { - g_free (line); - line = NULL; - continue; - } - - /* Field 1: wrapper grammar name */ - wrapper = p; - while (*p != '\0' && !whiteness (*p)) - p++; - if (*p == '\0') - { - g_free (line); - line = NULL; - continue; - } - *p++ = '\0'; - - if (strcmp (wrapper, wrapper_grammar) == 0) - { - char *result; - - /* Field 2: content node name */ - while (*p != '\0' && whiteness (*p)) - p++; - node = p; - while (*p != '\0' && !whiteness (*p)) - p++; - *p = '\0'; - - result = g_strdup (node); - g_free (line); - fclose (f); - return result; - } - - g_free (line); - line = NULL; - } - - g_free (line); - fclose (f); - } - - return NULL; + node = (const char *) g_hash_table_lookup (ts_wrapper_node_map, wrapper_grammar); + return (node != NULL) ? g_strdup (node) : NULL; } /* --------------------------------------------------------------------------------------------- */ @@ -967,13 +797,13 @@ ts_setup_wrapper_injection (WEdit *edit, const TSLanguage *lang, /* --------------------------------------------------------------------------------------------- */ /** - * Load a query file. Search order: - * 1. User queries: ~/.local/share/mc/syntax-ts/queries/ - * 2. System queries: $(datadir)/mc/syntax-ts/queries/ + * Load a query file from per-grammar directory. Search order: + * 1. User: ~/.local/share/mc/syntax-ts// + * 2. System: $(datadir)/mc/syntax-ts// * Returns a newly allocated string with the file contents, or NULL on failure. */ static char * -ts_load_query_file (const char *query_filename, uint32_t *out_len) +ts_load_query_file (const char *grammar_name, const char *query_filename, uint32_t *out_len) { const char *base_dirs[2]; char *contents = NULL; @@ -987,7 +817,7 @@ ts_load_query_file (const char *query_filename, uint32_t *out_len) { char *path; - path = g_build_filename (base_dirs[b], EDIT_SYNTAX_TS_DIR, "queries", + path = g_build_filename (base_dirs[b], EDIT_SYNTAX_TS_DIR, grammar_name, query_filename, (char *) NULL); if (g_file_get_contents (path, &contents, &len, NULL)) { @@ -1276,16 +1106,13 @@ ts_evaluate_match_predicates (TSQuery *query, const TSQueryMatch *match, WEdit * static void ts_init_injections (WEdit *edit, const char *grammar_name, const TSLanguage *lang) { - char *query_filename; char *query_src; uint32_t query_len; TSQuery *inj_query; uint32_t error_offset; TSQueryError error_type; - query_filename = g_strdup_printf ("%s-injections.scm", grammar_name); - query_src = ts_load_query_file (query_filename, &query_len); - g_free (query_filename); + query_src = ts_load_query_file (grammar_name, "injections.scm", &query_len); if (query_src == NULL) return; /* no injections for this grammar */ @@ -1318,7 +1145,6 @@ ts_init_for_file (WEdit *edit, const char *forced_grammar) const char *filename; char *grammar_name = NULL; char *display_name = NULL; - char *query_filename = NULL; const TSLanguage *lang; TSParser *parser; TSTree *tree; @@ -1329,8 +1155,9 @@ ts_init_for_file (WEdit *edit, const char *forced_grammar) TSQueryError error_type; TSQuery *query; - /* Load color mappings from config on first use */ - ts_load_color_config (); + /* Ensure grammar registry (and colors) are loaded */ + if (!ts_registry_loaded) + ts_load_grammar_registry (); filename = vfs_path_as_str (edit->filename_vpath); @@ -1338,9 +1165,11 @@ ts_init_for_file (WEdit *edit, const char *forced_grammar) { /* Manual grammar selection — skip auto-detection */ grammar_name = g_strdup (forced_grammar); - display_name = ts_config_lookup_by_grammar ("display-names", forced_grammar); - if (display_name == NULL) - display_name = g_strdup (forced_grammar); + { + const ts_grammar_config_t *cfg = ts_get_grammar_config (forced_grammar); + display_name = (cfg != NULL && cfg->display_name != NULL) + ? g_strdup (cfg->display_name) : g_strdup (forced_grammar); + } } else if (!ts_find_grammar (filename, get_first_editor_line (edit), &grammar_name, &display_name)) @@ -1403,8 +1232,6 @@ ts_init_for_file (WEdit *edit, const char *forced_grammar) if (wrapper_tree != NULL && !ts_node_is_error (ts_tree_root_node (wrapper_tree))) { - char *wrapper_display; - /* Inject the original grammar into the wrapper's content nodes */ ts_setup_wrapper_injection (edit, wrapper_lang, content_node, grammar_name); @@ -1415,10 +1242,11 @@ ts_init_for_file (WEdit *edit, const char *forced_grammar) g_free (grammar_name); grammar_name = g_strdup (wrapper_name); g_free (display_name); - wrapper_display = - ts_config_lookup_by_grammar ("display-names", wrapper_name); - display_name = - (wrapper_display != NULL) ? wrapper_display : g_strdup (wrapper_name); + { + const ts_grammar_config_t *wcfg = ts_get_grammar_config (wrapper_name); + display_name = (wcfg != NULL && wcfg->display_name != NULL) + ? g_strdup (wcfg->display_name) : g_strdup (wrapper_name); + } } else { @@ -1435,10 +1263,8 @@ ts_init_for_file (WEdit *edit, const char *forced_grammar) g_free (content_node); } - // Load and compile highlight query: -highlights.scm - query_filename = g_strdup_printf ("%s-highlights.scm", grammar_name); - query_src = ts_load_query_file (query_filename, &query_len); - g_free (query_filename); + // Load and compile highlight query + query_src = ts_load_query_file (grammar_name, "highlights.scm", &query_len); if (query_src == NULL) { @@ -1514,7 +1340,9 @@ ts_init_for_file (WEdit *edit, const char *forced_grammar) memcpy (ext_buf, prev_dot, (size_t) ext_len); ext_buf[ext_len] = '\0'; - host_grammar = ts_config_lookup_by_value ("extensions", ext_buf); + host_grammar = g_hash_table_lookup (ts_ext_map, ext_buf) != NULL + ? g_strdup ((const char *) g_hash_table_lookup (ts_ext_map, ext_buf)) + : NULL; if (host_grammar != NULL) { ts_setup_wrapper_injection (edit, lang, content_node, @@ -1606,7 +1434,7 @@ ts_free (WEdit *edit) edit->ts.highlights_end = -1; edit->ts.active = FALSE; - /* Clear the global color map so it's reloaded on next init. + /* Clear the grammar registry and color map so they're reloaded on next init. This is needed because tty_color_free_temp() (called by edit_free_syntax_rules) invalidates all temporary color pairs, including the ones stored in ts_color_map. */ @@ -1616,6 +1444,42 @@ ts_free (WEdit *edit) ts_color_map = NULL; ts_error_color = -1; } + if (ts_grammar_configs != NULL) + { + g_hash_table_destroy (ts_grammar_configs); + ts_grammar_configs = NULL; + } + if (ts_ext_map != NULL) + { + g_hash_table_destroy (ts_ext_map); + ts_ext_map = NULL; + } + if (ts_filename_map != NULL) + { + g_hash_table_destroy (ts_filename_map); + ts_filename_map = NULL; + } + if (ts_shebang_map != NULL) + { + g_hash_table_destroy (ts_shebang_map); + ts_shebang_map = NULL; + } + if (ts_display_to_grammar != NULL) + { + g_hash_table_destroy (ts_display_to_grammar); + ts_display_to_grammar = NULL; + } + if (ts_wrapper_host_map != NULL) + { + g_hash_table_destroy (ts_wrapper_host_map); + ts_wrapper_host_map = NULL; + } + if (ts_wrapper_node_map != NULL) + { + g_hash_table_destroy (ts_wrapper_node_map); + ts_wrapper_node_map = NULL; + } + ts_registry_loaded = FALSE; } /* --------------------------------------------------------------------------------------------- */ @@ -1682,7 +1546,6 @@ ts_get_dynamic_lang (GHashTable *lang_cache, const char *lang_name) ts_dynamic_lang_t *dl; const TSLanguage *lang; TSParser *parser; - char *query_filename; char *query_src; uint32_t query_len; uint32_t error_offset; @@ -1712,9 +1575,7 @@ ts_get_dynamic_lang (GHashTable *lang_cache, const char *lang_name) return NULL; } - query_filename = g_strdup_printf ("%s-highlights.scm", lang_name); - query_src = ts_load_query_file (query_filename, &query_len); - g_free (query_filename); + query_src = ts_load_query_file (lang_name, "highlights.scm", &query_len); if (query_src == NULL) { @@ -1826,13 +1687,10 @@ ts_inject_and_highlight (const char *lang_name, TSNode content_node, TSInput inp dynamic language entry to avoid recompiling on every call. */ if (!dl->injection_query_loaded) { - char *nested_inj_filename; char *nested_inj_src; uint32_t nested_inj_len; - nested_inj_filename = g_strdup_printf ("%s-injections.scm", lang_name); - nested_inj_src = ts_load_query_file (nested_inj_filename, &nested_inj_len); - g_free (nested_inj_filename); + nested_inj_src = ts_load_query_file (lang_name, "injections.scm", &nested_inj_len); if (nested_inj_src != NULL) { diff --git a/src/editor/syntax_ts.h b/src/editor/syntax_ts.h index 2e61b82c7c..1ab20d58c4 100644 --- a/src/editor/syntax_ts.h +++ b/src/editor/syntax_ts.h @@ -17,6 +17,7 @@ gboolean ts_init_for_file (WEdit *edit, const char *forced_grammar); char *ts_config_reverse_lookup (const char *config_name, const char *display_value); +void ts_load_grammar_registry (void); /* exposed for mc-syntax-dump */ void ts_free (WEdit *edit); int ts_get_color_at (WEdit *edit, off_t byte_index); void ts_rebuild_highlight_cache (WEdit *edit, off_t range_start, off_t range_end); diff --git a/src/editor/ts-grammar-loader.h b/src/editor/ts-grammar-loader.h index e5853c7890..0ec46ff411 100644 --- a/src/editor/ts-grammar-loader.h +++ b/src/editor/ts-grammar-loader.h @@ -14,8 +14,8 @@ #include #include -/* Implemented in syntax_ts.c -- reads grammar config files */ -char *ts_config_lookup_by_grammar (const char *config_name, const char *grammar_name); +/* Implemented in syntax_ts.c -- reads grammar config */ +char *ts_get_symbol_override (const char *grammar_name); /*** Cached loaded modules ***/ @@ -80,10 +80,10 @@ ts_grammar_registry_lookup (const char *name) return NULL; } - /* Determine the symbol name from the symbols config file. - Default is tree_sitter_. Override if symbols config has an entry. */ + /* Determine the symbol name from config.ini. + Default is tree_sitter_. Override if config has symbol= entry. */ { - char *override = ts_config_lookup_by_grammar ("symbols", name); + char *override = ts_get_symbol_override (name); if (override != NULL) { diff --git a/src/editor/ts-grammars/.gitignore b/src/editor/ts-grammars/.gitignore deleted file mode 100644 index 1190881ba0..0000000000 --- a/src/editor/ts-grammars/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -# Grammar directories and build artifacts are downloaded/generated at build time. -*/ -libtsgrammars.a diff --git a/src/editor/ts-grammars/Makefile.am b/src/editor/ts-grammars/Makefile.am deleted file mode 100644 index 259aa5e0e5..0000000000 --- a/src/editor/ts-grammars/Makefile.am +++ /dev/null @@ -1,60 +0,0 @@ -# Makefile.am for tree-sitter grammar libraries. -# In static mode, merges pre-built .a files into a convenience library. -# In shared mode, grammar .so files are loaded at runtime (nothing to build). - -EXTRA_DIST = \ - ts-grammar-registry.h - -# Variables substituted by configure -TREE_SITTER_GRAMMAR_ARCHIVES = @TREE_SITTER_GRAMMAR_ARCHIVES@ -TREE_SITTER_GRAMMARS = @TREE_SITTER_GRAMMARS@ -TREE_SITTER_BUILD_TARGET = @TREE_SITTER_BUILD_TARGET@ -TREE_SITTER_BUILD_MODE = @TREE_SITTER_BUILD_MODE@ -TREE_SITTER_GRAMMAR_LIBDIR = @TREE_SITTER_GRAMMAR_LIBDIR@ - -# ============================================================================ -# Build -# ============================================================================ - -all-local: $(TREE_SITTER_BUILD_TARGET) - -# Static mode: merge pre-built .a files into a single static library. -# Extract .o files from each grammar archive, then combine into one .a. -libtsgrammars.a: - @rm -rf _objs && mkdir -p _objs; \ - for archive in $(TREE_SITTER_GRAMMAR_ARCHIVES); do \ - lang=`echo $$archive | sed 's|/.*||'`; \ - mkdir -p _objs/$$lang; \ - (cd _objs/$$lang && $(AR) x $(abs_srcdir)/$$archive); \ - done; \ - all_objs=""; \ - for archive in $(TREE_SITTER_GRAMMAR_ARCHIVES); do \ - lang=`echo $$archive | sed 's|/.*||'`; \ - for o in _objs/$$lang/*.o; do \ - test -f "$$o" && all_objs="$$all_objs $$o"; \ - done; \ - done; \ - echo " AR libtsgrammars.a"; \ - $(AR) cr $@ $$all_objs; \ - $(RANLIB) $@; \ - rm -rf _objs - -# ============================================================================ -# Install -# ============================================================================ - -install-exec-local: install-exec-@TREE_SITTER_BUILD_MODE@ - -install-exec-static: - @true - -install-exec-shared: - @true - -# ============================================================================ -# Clean -# ============================================================================ - -clean-local: - rm -f libtsgrammars.a - rm -rf _objs diff --git a/src/editor/ts-grammars/ts-grammar-registry.h b/src/editor/ts-grammars/ts-grammar-registry.h deleted file mode 100644 index fb5d689ac4..0000000000 --- a/src/editor/ts-grammars/ts-grammar-registry.h +++ /dev/null @@ -1,979 +0,0 @@ -/* - Static registry of tree-sitter grammars. - - This file declares all tree_sitter_*() functions from the grammar sources - and provides a lookup table mapping grammar names to their corresponding - language function pointers. - - Each grammar is guarded by HAVE_GRAMMAR_ defines, which are set by - configure based on --with-tree-sitter-grammars=LIST. Only selected grammars - are compiled and registered. - - Used by syntax.c to find grammars at compile time instead of dlopen(). - */ - -#ifndef MC__TS_GRAMMAR_REGISTRY_H -#define MC__TS_GRAMMAR_REGISTRY_H - -#include - -/*** Forward declarations of grammar functions (conditional) ***/ - -#ifdef HAVE_GRAMMAR_ADA -extern const TSLanguage *tree_sitter_ada (void); -#endif -#ifdef HAVE_GRAMMAR_AGDA -extern const TSLanguage *tree_sitter_agda (void); -#endif -#ifdef HAVE_GRAMMAR_ASM -extern const TSLanguage *tree_sitter_asm (void); -#endif -#ifdef HAVE_GRAMMAR_ASTRO -extern const TSLanguage *tree_sitter_astro (void); -#endif -#ifdef HAVE_GRAMMAR_AWK -extern const TSLanguage *tree_sitter_awk (void); -#endif -#ifdef HAVE_GRAMMAR_BASH -extern const TSLanguage *tree_sitter_bash (void); -#endif -#ifdef HAVE_GRAMMAR_BIBTEX -extern const TSLanguage *tree_sitter_bibtex (void); -#endif -#ifdef HAVE_GRAMMAR_BICEP -extern const TSLanguage *tree_sitter_bicep (void); -#endif -#ifdef HAVE_GRAMMAR_BISON -extern const TSLanguage *tree_sitter_bison (void); -#endif -#ifdef HAVE_GRAMMAR_BLUEPRINT -extern const TSLanguage *tree_sitter_blueprint (void); -#endif -#ifdef HAVE_GRAMMAR_C -extern const TSLanguage *tree_sitter_c (void); -#endif -#ifdef HAVE_GRAMMAR_CADDY -extern const TSLanguage *tree_sitter_caddy (void); -#endif -#ifdef HAVE_GRAMMAR_CAIRO -extern const TSLanguage *tree_sitter_cairo (void); -#endif -#ifdef HAVE_GRAMMAR_CLOJURE -extern const TSLanguage *tree_sitter_clojure (void); -#endif -#ifdef HAVE_GRAMMAR_CMAKE -extern const TSLanguage *tree_sitter_cmake (void); -#endif -#ifdef HAVE_GRAMMAR_COBOL -extern const TSLanguage *tree_sitter_COBOL (void); -#endif -#ifdef HAVE_GRAMMAR_COMMONLISP -extern const TSLanguage *tree_sitter_commonlisp (void); -#endif -#ifdef HAVE_GRAMMAR_CPP -extern const TSLanguage *tree_sitter_cpp (void); -#endif -#ifdef HAVE_GRAMMAR_C_SHARP -extern const TSLanguage *tree_sitter_c_sharp (void); -#endif -#ifdef HAVE_GRAMMAR_CSS -extern const TSLanguage *tree_sitter_css (void); -#endif -#ifdef HAVE_GRAMMAR_CSV -extern const TSLanguage *tree_sitter_csv (void); -#endif -#ifdef HAVE_GRAMMAR_CUDA -extern const TSLanguage *tree_sitter_cuda (void); -#endif -#ifdef HAVE_GRAMMAR_CUE -extern const TSLanguage *tree_sitter_cue (void); -#endif -#ifdef HAVE_GRAMMAR_D -extern const TSLanguage *tree_sitter_d (void); -#endif -#ifdef HAVE_GRAMMAR_DART -extern const TSLanguage *tree_sitter_dart (void); -#endif -#ifdef HAVE_GRAMMAR_DEVICETREE -extern const TSLanguage *tree_sitter_devicetree (void); -#endif -#ifdef HAVE_GRAMMAR_DHALL -extern const TSLanguage *tree_sitter_dhall (void); -#endif -#ifdef HAVE_GRAMMAR_DIFF -extern const TSLanguage *tree_sitter_diff (void); -#endif -#ifdef HAVE_GRAMMAR_DOCKERFILE -extern const TSLanguage *tree_sitter_dockerfile (void); -#endif -#ifdef HAVE_GRAMMAR_DOT -extern const TSLanguage *tree_sitter_dot (void); -#endif -#ifdef HAVE_GRAMMAR_EARTHFILE -extern const TSLanguage *tree_sitter_earthfile (void); -#endif -#ifdef HAVE_GRAMMAR_EDITORCONFIG -extern const TSLanguage *tree_sitter_editorconfig (void); -#endif -#ifdef HAVE_GRAMMAR_ELIXIR -extern const TSLanguage *tree_sitter_elixir (void); -#endif -#ifdef HAVE_GRAMMAR_ELM -extern const TSLanguage *tree_sitter_elm (void); -#endif -#ifdef HAVE_GRAMMAR_ERLANG -extern const TSLanguage *tree_sitter_erlang (void); -#endif -#ifdef HAVE_GRAMMAR_FENNEL -extern const TSLanguage *tree_sitter_fennel (void); -#endif -#ifdef HAVE_GRAMMAR_FISH -extern const TSLanguage *tree_sitter_fish (void); -#endif -#ifdef HAVE_GRAMMAR_FORTH -extern const TSLanguage *tree_sitter_forth (void); -#endif -#ifdef HAVE_GRAMMAR_FORTRAN -extern const TSLanguage *tree_sitter_fortran (void); -#endif -#ifdef HAVE_GRAMMAR_GDSCRIPT -extern const TSLanguage *tree_sitter_gdscript (void); -#endif -#ifdef HAVE_GRAMMAR_GDSHADER -extern const TSLanguage *tree_sitter_gdshader (void); -#endif -#ifdef HAVE_GRAMMAR_GITATTRIBUTES -extern const TSLanguage *tree_sitter_gitattributes (void); -#endif -#ifdef HAVE_GRAMMAR_GITIGNORE -extern const TSLanguage *tree_sitter_gitignore (void); -#endif -#ifdef HAVE_GRAMMAR_GLEAM -extern const TSLanguage *tree_sitter_gleam (void); -#endif -#ifdef HAVE_GRAMMAR_GLSL -extern const TSLanguage *tree_sitter_glsl (void); -#endif -#ifdef HAVE_GRAMMAR_GO -extern const TSLanguage *tree_sitter_go (void); -#endif -#ifdef HAVE_GRAMMAR_GRAPHQL -extern const TSLanguage *tree_sitter_graphql (void); -#endif -#ifdef HAVE_GRAMMAR_GROOVY -extern const TSLanguage *tree_sitter_groovy (void); -#endif -#ifdef HAVE_GRAMMAR_HACK -extern const TSLanguage *tree_sitter_hack (void); -#endif -#ifdef HAVE_GRAMMAR_HARE -extern const TSLanguage *tree_sitter_hare (void); -#endif -#ifdef HAVE_GRAMMAR_HASKELL -extern const TSLanguage *tree_sitter_haskell (void); -#endif -#ifdef HAVE_GRAMMAR_HAXE -extern const TSLanguage *tree_sitter_haxe (void); -#endif -#ifdef HAVE_GRAMMAR_HCL -extern const TSLanguage *tree_sitter_hcl (void); -#endif -#ifdef HAVE_GRAMMAR_HEEX -extern const TSLanguage *tree_sitter_heex (void); -#endif -#ifdef HAVE_GRAMMAR_HJSON -extern const TSLanguage *tree_sitter_hjson (void); -#endif -#ifdef HAVE_GRAMMAR_HLSL -extern const TSLanguage *tree_sitter_hlsl (void); -#endif -#ifdef HAVE_GRAMMAR_HTML -extern const TSLanguage *tree_sitter_html (void); -#endif -#ifdef HAVE_GRAMMAR_IDL -extern const TSLanguage *tree_sitter_idl (void); -#endif -#ifdef HAVE_GRAMMAR_INI -extern const TSLanguage *tree_sitter_ini (void); -#endif -#ifdef HAVE_GRAMMAR_JAVA -extern const TSLanguage *tree_sitter_java (void); -#endif -#ifdef HAVE_GRAMMAR_JAVASCRIPT -extern const TSLanguage *tree_sitter_javascript (void); -#endif -#ifdef HAVE_GRAMMAR_JQ -extern const TSLanguage *tree_sitter_jq (void); -#endif -#ifdef HAVE_GRAMMAR_JSON -extern const TSLanguage *tree_sitter_json (void); -#endif -#ifdef HAVE_GRAMMAR_JSON5 -extern const TSLanguage *tree_sitter_json5 (void); -#endif -#ifdef HAVE_GRAMMAR_JSONNET -extern const TSLanguage *tree_sitter_jsonnet (void); -#endif -#ifdef HAVE_GRAMMAR_JULIA -extern const TSLanguage *tree_sitter_julia (void); -#endif -#ifdef HAVE_GRAMMAR_JUST -extern const TSLanguage *tree_sitter_just (void); -#endif -#ifdef HAVE_GRAMMAR_KCL -extern const TSLanguage *tree_sitter_kcl (void); -#endif -#ifdef HAVE_GRAMMAR_KDL -extern const TSLanguage *tree_sitter_kdl (void); -#endif -#ifdef HAVE_GRAMMAR_KOTLIN -extern const TSLanguage *tree_sitter_kotlin (void); -#endif -#ifdef HAVE_GRAMMAR_LATEX -extern const TSLanguage *tree_sitter_latex (void); -#endif -#ifdef HAVE_GRAMMAR_LUA -extern const TSLanguage *tree_sitter_lua (void); -#endif -#ifdef HAVE_GRAMMAR_MAKE -extern const TSLanguage *tree_sitter_make (void); -#endif -#ifdef HAVE_GRAMMAR_MARKDOWN -extern const TSLanguage *tree_sitter_markdown (void); -#endif -#ifdef HAVE_GRAMMAR_MARKDOWN_INLINE -extern const TSLanguage *tree_sitter_markdown_inline (void); -#endif -#ifdef HAVE_GRAMMAR_MATLAB -extern const TSLanguage *tree_sitter_matlab (void); -#endif -#ifdef HAVE_GRAMMAR_MESON -extern const TSLanguage *tree_sitter_meson (void); -#endif -#ifdef HAVE_GRAMMAR_MUTTRC -extern const TSLanguage *tree_sitter_muttrc (void); -#endif -#ifdef HAVE_GRAMMAR_NICKEL -extern const TSLanguage *tree_sitter_nickel (void); -#endif -#ifdef HAVE_GRAMMAR_NIM -extern const TSLanguage *tree_sitter_nim (void); -#endif -#ifdef HAVE_GRAMMAR_NIX -extern const TSLanguage *tree_sitter_nix (void); -#endif -#ifdef HAVE_GRAMMAR_NU -extern const TSLanguage *tree_sitter_nu (void); -#endif -#ifdef HAVE_GRAMMAR_OBJC -extern const TSLanguage *tree_sitter_objc (void); -#endif -#ifdef HAVE_GRAMMAR_OCAML -extern const TSLanguage *tree_sitter_ocaml (void); -#endif -#ifdef HAVE_GRAMMAR_ODIN -extern const TSLanguage *tree_sitter_odin (void); -#endif -#ifdef HAVE_GRAMMAR_ORG -extern const TSLanguage *tree_sitter_org (void); -#endif -#ifdef HAVE_GRAMMAR_PASCAL -extern const TSLanguage *tree_sitter_pascal (void); -#endif -#ifdef HAVE_GRAMMAR_PERL -extern const TSLanguage *tree_sitter_perl (void); -#endif -#ifdef HAVE_GRAMMAR_PHP -extern const TSLanguage *tree_sitter_php (void); -#endif -#ifdef HAVE_GRAMMAR_PKL -extern const TSLanguage *tree_sitter_pkl (void); -#endif -#ifdef HAVE_GRAMMAR_PO -extern const TSLanguage *tree_sitter_po (void); -#endif -#ifdef HAVE_GRAMMAR_POWERSHELL -extern const TSLanguage *tree_sitter_powershell (void); -#endif -#ifdef HAVE_GRAMMAR_PRISMA -extern const TSLanguage *tree_sitter_prisma (void); -#endif -#ifdef HAVE_GRAMMAR_PROPERTIES -extern const TSLanguage *tree_sitter_properties (void); -#endif -#ifdef HAVE_GRAMMAR_PROTO -extern const TSLanguage *tree_sitter_proto (void); -#endif -#ifdef HAVE_GRAMMAR_PRQL -extern const TSLanguage *tree_sitter_prql (void); -#endif -#ifdef HAVE_GRAMMAR_PUPPET -extern const TSLanguage *tree_sitter_puppet (void); -#endif -#ifdef HAVE_GRAMMAR_PURESCRIPT -extern const TSLanguage *tree_sitter_purescript (void); -#endif -#ifdef HAVE_GRAMMAR_PYTHON -extern const TSLanguage *tree_sitter_python (void); -#endif -#ifdef HAVE_GRAMMAR_QMLJS -extern const TSLanguage *tree_sitter_qmljs (void); -#endif -#ifdef HAVE_GRAMMAR_R -extern const TSLanguage *tree_sitter_r (void); -#endif -#ifdef HAVE_GRAMMAR_RACKET -extern const TSLanguage *tree_sitter_racket (void); -#endif -#ifdef HAVE_GRAMMAR_RESCRIPT -extern const TSLanguage *tree_sitter_rescript (void); -#endif -#ifdef HAVE_GRAMMAR_ROBOT -extern const TSLanguage *tree_sitter_robot (void); -#endif -#ifdef HAVE_GRAMMAR_ROC -extern const TSLanguage *tree_sitter_roc (void); -#endif -#ifdef HAVE_GRAMMAR_RON -extern const TSLanguage *tree_sitter_ron (void); -#endif -#ifdef HAVE_GRAMMAR_RST -extern const TSLanguage *tree_sitter_rst (void); -#endif -#ifdef HAVE_GRAMMAR_RUBY -extern const TSLanguage *tree_sitter_ruby (void); -#endif -#ifdef HAVE_GRAMMAR_RUST -extern const TSLanguage *tree_sitter_rust (void); -#endif -#ifdef HAVE_GRAMMAR_SATYSFI -extern const TSLanguage *tree_sitter_satysfi (void); -#endif -#ifdef HAVE_GRAMMAR_SCALA -extern const TSLanguage *tree_sitter_scala (void); -#endif -#ifdef HAVE_GRAMMAR_SCHEME -extern const TSLanguage *tree_sitter_scheme (void); -#endif -#ifdef HAVE_GRAMMAR_SCSS -extern const TSLanguage *tree_sitter_scss (void); -#endif -#ifdef HAVE_GRAMMAR_SLIM -extern const TSLanguage *tree_sitter_slim (void); -#endif -#ifdef HAVE_GRAMMAR_SLINT -extern const TSLanguage *tree_sitter_slint (void); -#endif -#ifdef HAVE_GRAMMAR_SMALLTALK -extern const TSLanguage *tree_sitter_smalltalk (void); -#endif -#ifdef HAVE_GRAMMAR_SML -extern const TSLanguage *tree_sitter_sml (void); -#endif -#ifdef HAVE_GRAMMAR_SNAKEMAKE -extern const TSLanguage *tree_sitter_snakemake (void); -#endif -#ifdef HAVE_GRAMMAR_SOLIDITY -extern const TSLanguage *tree_sitter_solidity (void); -#endif -#ifdef HAVE_GRAMMAR_SQL -extern const TSLanguage *tree_sitter_sql (void); -#endif -#ifdef HAVE_GRAMMAR_STARLARK -extern const TSLanguage *tree_sitter_starlark (void); -#endif -#ifdef HAVE_GRAMMAR_STRACE -extern const TSLanguage *tree_sitter_strace (void); -#endif -#ifdef HAVE_GRAMMAR_SVELTE -extern const TSLanguage *tree_sitter_svelte (void); -#endif -#ifdef HAVE_GRAMMAR_SWIFT -extern const TSLanguage *tree_sitter_swift (void); -#endif -#ifdef HAVE_GRAMMAR_TCL -extern const TSLanguage *tree_sitter_tcl (void); -#endif -#ifdef HAVE_GRAMMAR_TEAL -extern const TSLanguage *tree_sitter_teal (void); -#endif -#ifdef HAVE_GRAMMAR_TEMPL -extern const TSLanguage *tree_sitter_templ (void); -#endif -#ifdef HAVE_GRAMMAR_TERA -extern const TSLanguage *tree_sitter_tera (void); -#endif -#ifdef HAVE_GRAMMAR_TEXTPROTO -extern const TSLanguage *tree_sitter_textproto (void); -#endif -#ifdef HAVE_GRAMMAR_THRIFT -extern const TSLanguage *tree_sitter_thrift (void); -#endif -#ifdef HAVE_GRAMMAR_TLAPLUS -extern const TSLanguage *tree_sitter_tlaplus (void); -#endif -#ifdef HAVE_GRAMMAR_TMUX -extern const TSLanguage *tree_sitter_tmux (void); -#endif -#ifdef HAVE_GRAMMAR_TOML -extern const TSLanguage *tree_sitter_toml (void); -#endif -#ifdef HAVE_GRAMMAR_TSV -extern const TSLanguage *tree_sitter_tsv (void); -#endif -#ifdef HAVE_GRAMMAR_TURTLE -extern const TSLanguage *tree_sitter_turtle (void); -#endif -#ifdef HAVE_GRAMMAR_TWIG -extern const TSLanguage *tree_sitter_twig (void); -#endif -#ifdef HAVE_GRAMMAR_TYPESCRIPT -extern const TSLanguage *tree_sitter_typescript (void); -#endif -#ifdef HAVE_GRAMMAR_TYPST -extern const TSLanguage *tree_sitter_typst (void); -#endif -#ifdef HAVE_GRAMMAR_USD -extern const TSLanguage *tree_sitter_usd (void); -#endif -#ifdef HAVE_GRAMMAR_VALA -extern const TSLanguage *tree_sitter_vala (void); -#endif -#ifdef HAVE_GRAMMAR_VERILOG -extern const TSLanguage *tree_sitter_verilog (void); -#endif -#ifdef HAVE_GRAMMAR_VHDL -extern const TSLanguage *tree_sitter_vhdl (void); -#endif -#ifdef HAVE_GRAMMAR_VIM -extern const TSLanguage *tree_sitter_vim (void); -#endif -#ifdef HAVE_GRAMMAR_VUE -extern const TSLanguage *tree_sitter_vue (void); -#endif -#ifdef HAVE_GRAMMAR_WGSL -extern const TSLanguage *tree_sitter_wgsl (void); -#endif -#ifdef HAVE_GRAMMAR_WIT -extern const TSLanguage *tree_sitter_wit (void); -#endif -#ifdef HAVE_GRAMMAR_XML -extern const TSLanguage *tree_sitter_xml (void); -#endif -#ifdef HAVE_GRAMMAR_XQUERY -extern const TSLanguage *tree_sitter_xquery (void); -#endif -#ifdef HAVE_GRAMMAR_YAML -extern const TSLanguage *tree_sitter_yaml (void); -#endif -#ifdef HAVE_GRAMMAR_YANG -extern const TSLanguage *tree_sitter_yang (void); -#endif -#ifdef HAVE_GRAMMAR_YUCK -extern const TSLanguage *tree_sitter_yuck (void); -#endif -#ifdef HAVE_GRAMMAR_ZEEK -extern const TSLanguage *tree_sitter_zeek (void); -#endif -#ifdef HAVE_GRAMMAR_ZIG -extern const TSLanguage *tree_sitter_zig (void); -#endif -#ifdef HAVE_GRAMMAR_ZSH -extern const TSLanguage *tree_sitter_zsh (void); -#endif - -/*** Lookup table: grammar name -> language function pointer ***/ - -typedef struct -{ - const char *name; - const TSLanguage *(*func) (void); -} ts_grammar_entry_static_t; - -/* clang-format off */ -static const ts_grammar_entry_static_t ts_grammar_registry[] = { -#ifdef HAVE_GRAMMAR_ADA - { "ada", tree_sitter_ada }, -#endif -#ifdef HAVE_GRAMMAR_AGDA - { "agda", tree_sitter_agda }, -#endif -#ifdef HAVE_GRAMMAR_ASM - { "asm", tree_sitter_asm }, -#endif -#ifdef HAVE_GRAMMAR_ASTRO - { "astro", tree_sitter_astro }, -#endif -#ifdef HAVE_GRAMMAR_AWK - { "awk", tree_sitter_awk }, -#endif -#ifdef HAVE_GRAMMAR_BASH - { "bash", tree_sitter_bash }, -#endif -#ifdef HAVE_GRAMMAR_BIBTEX - { "bibtex", tree_sitter_bibtex }, -#endif -#ifdef HAVE_GRAMMAR_BICEP - { "bicep", tree_sitter_bicep }, -#endif -#ifdef HAVE_GRAMMAR_BISON - { "bison", tree_sitter_bison }, -#endif -#ifdef HAVE_GRAMMAR_BLUEPRINT - { "blueprint", tree_sitter_blueprint }, -#endif -#ifdef HAVE_GRAMMAR_C - { "c", tree_sitter_c }, -#endif -#ifdef HAVE_GRAMMAR_CADDY - { "caddy", tree_sitter_caddy }, -#endif -#ifdef HAVE_GRAMMAR_CAIRO - { "cairo", tree_sitter_cairo }, -#endif -#ifdef HAVE_GRAMMAR_CLOJURE - { "clojure", tree_sitter_clojure }, -#endif -#ifdef HAVE_GRAMMAR_CMAKE - { "cmake", tree_sitter_cmake }, -#endif -#ifdef HAVE_GRAMMAR_COBOL - { "cobol", tree_sitter_COBOL }, -#endif -#ifdef HAVE_GRAMMAR_COMMONLISP - { "commonlisp", tree_sitter_commonlisp }, -#endif -#ifdef HAVE_GRAMMAR_CPP - { "cpp", tree_sitter_cpp }, -#endif -#ifdef HAVE_GRAMMAR_C_SHARP - { "c_sharp", tree_sitter_c_sharp }, -#endif -#ifdef HAVE_GRAMMAR_CSS - { "css", tree_sitter_css }, -#endif -#ifdef HAVE_GRAMMAR_CSV - { "csv", tree_sitter_csv }, -#endif -#ifdef HAVE_GRAMMAR_CUDA - { "cuda", tree_sitter_cuda }, -#endif -#ifdef HAVE_GRAMMAR_CUE - { "cue", tree_sitter_cue }, -#endif -#ifdef HAVE_GRAMMAR_D - { "d", tree_sitter_d }, -#endif -#ifdef HAVE_GRAMMAR_DART - { "dart", tree_sitter_dart }, -#endif -#ifdef HAVE_GRAMMAR_DEVICETREE - { "devicetree", tree_sitter_devicetree }, -#endif -#ifdef HAVE_GRAMMAR_DHALL - { "dhall", tree_sitter_dhall }, -#endif -#ifdef HAVE_GRAMMAR_DIFF - { "diff", tree_sitter_diff }, -#endif -#ifdef HAVE_GRAMMAR_DOCKERFILE - { "dockerfile", tree_sitter_dockerfile }, -#endif -#ifdef HAVE_GRAMMAR_DOT - { "dot", tree_sitter_dot }, -#endif -#ifdef HAVE_GRAMMAR_EARTHFILE - { "earthfile", tree_sitter_earthfile }, -#endif -#ifdef HAVE_GRAMMAR_EDITORCONFIG - { "editorconfig", tree_sitter_editorconfig }, -#endif -#ifdef HAVE_GRAMMAR_ELIXIR - { "elixir", tree_sitter_elixir }, -#endif -#ifdef HAVE_GRAMMAR_ELM - { "elm", tree_sitter_elm }, -#endif -#ifdef HAVE_GRAMMAR_ERLANG - { "erlang", tree_sitter_erlang }, -#endif -#ifdef HAVE_GRAMMAR_FENNEL - { "fennel", tree_sitter_fennel }, -#endif -#ifdef HAVE_GRAMMAR_FISH - { "fish", tree_sitter_fish }, -#endif -#ifdef HAVE_GRAMMAR_FORTH - { "forth", tree_sitter_forth }, -#endif -#ifdef HAVE_GRAMMAR_FORTRAN - { "fortran", tree_sitter_fortran }, -#endif -#ifdef HAVE_GRAMMAR_GDSCRIPT - { "gdscript", tree_sitter_gdscript }, -#endif -#ifdef HAVE_GRAMMAR_GDSHADER - { "gdshader", tree_sitter_gdshader }, -#endif -#ifdef HAVE_GRAMMAR_GITATTRIBUTES - { "gitattributes", tree_sitter_gitattributes }, -#endif -#ifdef HAVE_GRAMMAR_GITIGNORE - { "gitignore", tree_sitter_gitignore }, -#endif -#ifdef HAVE_GRAMMAR_GLEAM - { "gleam", tree_sitter_gleam }, -#endif -#ifdef HAVE_GRAMMAR_GLSL - { "glsl", tree_sitter_glsl }, -#endif -#ifdef HAVE_GRAMMAR_GO - { "go", tree_sitter_go }, -#endif -#ifdef HAVE_GRAMMAR_GRAPHQL - { "graphql", tree_sitter_graphql }, -#endif -#ifdef HAVE_GRAMMAR_GROOVY - { "groovy", tree_sitter_groovy }, -#endif -#ifdef HAVE_GRAMMAR_HACK - { "hack", tree_sitter_hack }, -#endif -#ifdef HAVE_GRAMMAR_HARE - { "hare", tree_sitter_hare }, -#endif -#ifdef HAVE_GRAMMAR_HASKELL - { "haskell", tree_sitter_haskell }, -#endif -#ifdef HAVE_GRAMMAR_HAXE - { "haxe", tree_sitter_haxe }, -#endif -#ifdef HAVE_GRAMMAR_HCL - { "hcl", tree_sitter_hcl }, -#endif -#ifdef HAVE_GRAMMAR_HEEX - { "heex", tree_sitter_heex }, -#endif -#ifdef HAVE_GRAMMAR_HJSON - { "hjson", tree_sitter_hjson }, -#endif -#ifdef HAVE_GRAMMAR_HLSL - { "hlsl", tree_sitter_hlsl }, -#endif -#ifdef HAVE_GRAMMAR_HTML - { "html", tree_sitter_html }, -#endif -#ifdef HAVE_GRAMMAR_IDL - { "idl", tree_sitter_idl }, -#endif -#ifdef HAVE_GRAMMAR_INI - { "ini", tree_sitter_ini }, -#endif -#ifdef HAVE_GRAMMAR_JAVA - { "java", tree_sitter_java }, -#endif -#ifdef HAVE_GRAMMAR_JAVASCRIPT - { "javascript", tree_sitter_javascript }, -#endif -#ifdef HAVE_GRAMMAR_JQ - { "jq", tree_sitter_jq }, -#endif -#ifdef HAVE_GRAMMAR_JSON - { "json", tree_sitter_json }, -#endif -#ifdef HAVE_GRAMMAR_JSON5 - { "json5", tree_sitter_json5 }, -#endif -#ifdef HAVE_GRAMMAR_JSONNET - { "jsonnet", tree_sitter_jsonnet }, -#endif -#ifdef HAVE_GRAMMAR_JULIA - { "julia", tree_sitter_julia }, -#endif -#ifdef HAVE_GRAMMAR_JUST - { "just", tree_sitter_just }, -#endif -#ifdef HAVE_GRAMMAR_KCL - { "kcl", tree_sitter_kcl }, -#endif -#ifdef HAVE_GRAMMAR_KDL - { "kdl", tree_sitter_kdl }, -#endif -#ifdef HAVE_GRAMMAR_KOTLIN - { "kotlin", tree_sitter_kotlin }, -#endif -#ifdef HAVE_GRAMMAR_LATEX - { "latex", tree_sitter_latex }, -#endif -#ifdef HAVE_GRAMMAR_LUA - { "lua", tree_sitter_lua }, -#endif -#ifdef HAVE_GRAMMAR_MAKE - { "make", tree_sitter_make }, -#endif -#ifdef HAVE_GRAMMAR_MARKDOWN - { "markdown", tree_sitter_markdown }, -#endif -#ifdef HAVE_GRAMMAR_MARKDOWN_INLINE - { "markdown_inline", tree_sitter_markdown_inline }, -#endif -#ifdef HAVE_GRAMMAR_MATLAB - { "matlab", tree_sitter_matlab }, -#endif -#ifdef HAVE_GRAMMAR_MESON - { "meson", tree_sitter_meson }, -#endif -#ifdef HAVE_GRAMMAR_MUTTRC - { "muttrc", tree_sitter_muttrc }, -#endif -#ifdef HAVE_GRAMMAR_NICKEL - { "nickel", tree_sitter_nickel }, -#endif -#ifdef HAVE_GRAMMAR_NIM - { "nim", tree_sitter_nim }, -#endif -#ifdef HAVE_GRAMMAR_NIX - { "nix", tree_sitter_nix }, -#endif -#ifdef HAVE_GRAMMAR_NU - { "nu", tree_sitter_nu }, -#endif -#ifdef HAVE_GRAMMAR_OBJC - { "objc", tree_sitter_objc }, -#endif -#ifdef HAVE_GRAMMAR_OCAML - { "ocaml", tree_sitter_ocaml }, -#endif -#ifdef HAVE_GRAMMAR_ODIN - { "odin", tree_sitter_odin }, -#endif -#ifdef HAVE_GRAMMAR_ORG - { "org", tree_sitter_org }, -#endif -#ifdef HAVE_GRAMMAR_PASCAL - { "pascal", tree_sitter_pascal }, -#endif -#ifdef HAVE_GRAMMAR_PERL - { "perl", tree_sitter_perl }, -#endif -#ifdef HAVE_GRAMMAR_PHP - { "php", tree_sitter_php }, -#endif -#ifdef HAVE_GRAMMAR_PKL - { "pkl", tree_sitter_pkl }, -#endif -#ifdef HAVE_GRAMMAR_PO - { "po", tree_sitter_po }, -#endif -#ifdef HAVE_GRAMMAR_POWERSHELL - { "powershell", tree_sitter_powershell }, -#endif -#ifdef HAVE_GRAMMAR_PRISMA - { "prisma", tree_sitter_prisma }, -#endif -#ifdef HAVE_GRAMMAR_PROPERTIES - { "properties", tree_sitter_properties }, -#endif -#ifdef HAVE_GRAMMAR_PROTO - { "proto", tree_sitter_proto }, -#endif -#ifdef HAVE_GRAMMAR_PRQL - { "prql", tree_sitter_prql }, -#endif -#ifdef HAVE_GRAMMAR_PUPPET - { "puppet", tree_sitter_puppet }, -#endif -#ifdef HAVE_GRAMMAR_PURESCRIPT - { "purescript", tree_sitter_purescript }, -#endif -#ifdef HAVE_GRAMMAR_PYTHON - { "python", tree_sitter_python }, -#endif -#ifdef HAVE_GRAMMAR_QMLJS - { "qmljs", tree_sitter_qmljs }, -#endif -#ifdef HAVE_GRAMMAR_R - { "r", tree_sitter_r }, -#endif -#ifdef HAVE_GRAMMAR_RACKET - { "racket", tree_sitter_racket }, -#endif -#ifdef HAVE_GRAMMAR_RESCRIPT - { "rescript", tree_sitter_rescript }, -#endif -#ifdef HAVE_GRAMMAR_ROBOT - { "robot", tree_sitter_robot }, -#endif -#ifdef HAVE_GRAMMAR_ROC - { "roc", tree_sitter_roc }, -#endif -#ifdef HAVE_GRAMMAR_RON - { "ron", tree_sitter_ron }, -#endif -#ifdef HAVE_GRAMMAR_RST - { "rst", tree_sitter_rst }, -#endif -#ifdef HAVE_GRAMMAR_RUBY - { "ruby", tree_sitter_ruby }, -#endif -#ifdef HAVE_GRAMMAR_RUST - { "rust", tree_sitter_rust }, -#endif -#ifdef HAVE_GRAMMAR_SATYSFI - { "satysfi", tree_sitter_satysfi }, -#endif -#ifdef HAVE_GRAMMAR_SCALA - { "scala", tree_sitter_scala }, -#endif -#ifdef HAVE_GRAMMAR_SCHEME - { "scheme", tree_sitter_scheme }, -#endif -#ifdef HAVE_GRAMMAR_SCSS - { "scss", tree_sitter_scss }, -#endif -#ifdef HAVE_GRAMMAR_SLIM - { "slim", tree_sitter_slim }, -#endif -#ifdef HAVE_GRAMMAR_SLINT - { "slint", tree_sitter_slint }, -#endif -#ifdef HAVE_GRAMMAR_SMALLTALK - { "smalltalk", tree_sitter_smalltalk }, -#endif -#ifdef HAVE_GRAMMAR_SML - { "sml", tree_sitter_sml }, -#endif -#ifdef HAVE_GRAMMAR_SNAKEMAKE - { "snakemake", tree_sitter_snakemake }, -#endif -#ifdef HAVE_GRAMMAR_SOLIDITY - { "solidity", tree_sitter_solidity }, -#endif -#ifdef HAVE_GRAMMAR_SQL - { "sql", tree_sitter_sql }, -#endif -#ifdef HAVE_GRAMMAR_STARLARK - { "starlark", tree_sitter_starlark }, -#endif -#ifdef HAVE_GRAMMAR_STRACE - { "strace", tree_sitter_strace }, -#endif -#ifdef HAVE_GRAMMAR_SVELTE - { "svelte", tree_sitter_svelte }, -#endif -#ifdef HAVE_GRAMMAR_SWIFT - { "swift", tree_sitter_swift }, -#endif -#ifdef HAVE_GRAMMAR_TCL - { "tcl", tree_sitter_tcl }, -#endif -#ifdef HAVE_GRAMMAR_TEAL - { "teal", tree_sitter_teal }, -#endif -#ifdef HAVE_GRAMMAR_TEMPL - { "templ", tree_sitter_templ }, -#endif -#ifdef HAVE_GRAMMAR_TERA - { "tera", tree_sitter_tera }, -#endif -#ifdef HAVE_GRAMMAR_TEXTPROTO - { "textproto", tree_sitter_textproto }, -#endif -#ifdef HAVE_GRAMMAR_THRIFT - { "thrift", tree_sitter_thrift }, -#endif -#ifdef HAVE_GRAMMAR_TLAPLUS - { "tlaplus", tree_sitter_tlaplus }, -#endif -#ifdef HAVE_GRAMMAR_TMUX - { "tmux", tree_sitter_tmux }, -#endif -#ifdef HAVE_GRAMMAR_TOML - { "toml", tree_sitter_toml }, -#endif -#ifdef HAVE_GRAMMAR_TSV - { "tsv", tree_sitter_tsv }, -#endif -#ifdef HAVE_GRAMMAR_TURTLE - { "turtle", tree_sitter_turtle }, -#endif -#ifdef HAVE_GRAMMAR_TWIG - { "twig", tree_sitter_twig }, -#endif -#ifdef HAVE_GRAMMAR_TYPESCRIPT - { "typescript", tree_sitter_typescript }, -#endif -#ifdef HAVE_GRAMMAR_TYPST - { "typst", tree_sitter_typst }, -#endif -#ifdef HAVE_GRAMMAR_USD - { "usd", tree_sitter_usd }, -#endif -#ifdef HAVE_GRAMMAR_VALA - { "vala", tree_sitter_vala }, -#endif -#ifdef HAVE_GRAMMAR_VERILOG - { "verilog", tree_sitter_verilog }, -#endif -#ifdef HAVE_GRAMMAR_VHDL - { "vhdl", tree_sitter_vhdl }, -#endif -#ifdef HAVE_GRAMMAR_VIM - { "vim", tree_sitter_vim }, -#endif -#ifdef HAVE_GRAMMAR_VUE - { "vue", tree_sitter_vue }, -#endif -#ifdef HAVE_GRAMMAR_WGSL - { "wgsl", tree_sitter_wgsl }, -#endif -#ifdef HAVE_GRAMMAR_WIT - { "wit", tree_sitter_wit }, -#endif -#ifdef HAVE_GRAMMAR_XML - { "xml", tree_sitter_xml }, -#endif -#ifdef HAVE_GRAMMAR_XQUERY - { "xquery", tree_sitter_xquery }, -#endif -#ifdef HAVE_GRAMMAR_YAML - { "yaml", tree_sitter_yaml }, -#endif -#ifdef HAVE_GRAMMAR_YANG - { "yang", tree_sitter_yang }, -#endif -#ifdef HAVE_GRAMMAR_YUCK - { "yuck", tree_sitter_yuck }, -#endif -#ifdef HAVE_GRAMMAR_ZEEK - { "zeek", tree_sitter_zeek }, -#endif -#ifdef HAVE_GRAMMAR_ZIG - { "zig", tree_sitter_zig }, -#endif -#ifdef HAVE_GRAMMAR_ZSH - { "zsh", tree_sitter_zsh }, -#endif - { NULL, NULL } -}; -/* clang-format on */ - -/** - * Look up a grammar by name in the static registry. - * Returns the TSLanguage* or NULL if not found. - */ -static inline const TSLanguage * -ts_grammar_registry_lookup (const char *name) -{ - const ts_grammar_entry_static_t *entry; - - for (entry = ts_grammar_registry; entry->name != NULL; entry++) - { - if (strcmp (entry->name, name) == 0) - return entry->func (); - } - - return NULL; -} - -#endif /* MC__TS_GRAMMAR_REGISTRY_H */ diff --git a/src/setup.c b/src/setup.c index aaa32bd6ad..a390d4e145 100644 --- a/src/setup.c +++ b/src/setup.c @@ -355,6 +355,9 @@ static const struct { "editor_show_right_margin", &edit_options.show_right_margin }, { "editor_group_undo", &edit_options.group_undo }, { "editor_state_full_filename", &edit_options.state_full_filename }, +#ifdef HAVE_TREE_SITTER + { "editor_use_tree_sitter", &edit_options.use_tree_sitter }, +#endif #endif { "editor_ask_filename_before_edit", &editor_ask_filename_before_edit }, { "nice_rotating_dash", &nice_rotating_dash }, diff --git a/tests/src/editor/Makefile.am b/tests/src/editor/Makefile.am index 64282d77c2..2e52164eb4 100644 --- a/tests/src/editor/Makefile.am +++ b/tests/src/editor/Makefile.am @@ -40,27 +40,8 @@ if USE_TREE_SITTER edit_syntax_ts_SOURCES = \ edit_syntax_ts.c -if TREE_SITTER_STATIC edit_syntax_ts_CPPFLAGS = \ - -DTEST_TS_QUERIES_DIR=\"$(abs_top_srcdir)/misc/syntax-ts/queries\" \ - $(GLIB_CFLAGS) \ - $(TREE_SITTER_CFLAGS) \ - @TREE_SITTER_GRAMMAR_DEFS@ \ - -I$(top_srcdir) \ - -I$(top_srcdir)/src/editor/ts-grammars \ - @CHECK_CFLAGS@ - -edit_syntax_ts_LDADD = \ - $(top_builddir)/src/editor/ts-grammars/libtsgrammars.la \ - $(TREE_SITTER_LIBS) \ - $(GLIB_LIBS) \ - @CHECK_LIBS@ -else -# Shared mode: point TS_GRAMMAR_LIBDIR to the build tree -# (g_module_open automatically looks in .libs/ for libtool modules) -edit_syntax_ts_CPPFLAGS = \ - -DTEST_TS_QUERIES_DIR=\"$(abs_top_srcdir)/misc/syntax-ts/queries\" \ - -DTS_GRAMMAR_LIBDIR=\"$(abs_top_builddir)/src/editor/ts-grammars\" \ + -DTS_GRAMMAR_LIBDIR=\"$(libdir)/mc/ts-grammars\" \ $(GLIB_CFLAGS) \ $(TREE_SITTER_CFLAGS) \ $(GMODULE_CFLAGS) \ @@ -73,4 +54,3 @@ edit_syntax_ts_LDADD = \ $(GLIB_LIBS) \ @CHECK_LIBS@ endif -endif diff --git a/tests/src/editor/edit_syntax_ts.c b/tests/src/editor/edit_syntax_ts.c index da15baf3f7..5979dd9095 100644 --- a/tests/src/editor/edit_syntax_ts.c +++ b/tests/src/editor/edit_syntax_ts.c @@ -27,11 +27,7 @@ #include #include -#ifdef TREE_SITTER_STATIC -#include "src/editor/ts-grammars/ts-grammar-registry.h" -#else #include "src/editor/ts-grammar-loader.h" -#endif /* --------------------------------------------------------------------------------------------- */ @@ -180,65 +176,50 @@ START_TEST (test_all_query_files_compile) int failed = 0; char first_fail[128] = ""; -#ifdef TREE_SITTER_STATIC - { - const ts_grammar_entry_static_t *entry; - - for (entry = ts_grammar_registry; entry->name != NULL; entry++) - { - int rc = test_one_query (entry->name, entry->func ()); - - if (rc == -1) - continue; - tested++; - if (rc == 1 && ++failed == 1) - snprintf (first_fail, sizeof (first_fail), "%s", entry->name); - } - } -#else { - /* Scan for query files in source tree and user install location */ - const char *dirs[2]; + /* Scan per-grammar directories for highlights.scm */ char user_path[1024]; + const char *dirs[2]; int d; - dirs[0] = TEST_TS_QUERIES_DIR; - snprintf (user_path, sizeof (user_path), "%s/.local/share/mc/syntax-ts/queries", + snprintf (user_path, sizeof (user_path), "%s/.local/share/mc/syntax-ts", g_get_home_dir ()); - dirs[1] = user_path; + dirs[0] = user_path; + dirs[1] = "/usr/share/mc/syntax-ts"; for (d = 0; d < 2; d++) { GDir *dir = g_dir_open (dirs[d], 0, NULL); - const gchar *fname; + const gchar *entry; if (dir == NULL) continue; - while ((fname = g_dir_read_name (dir)) != NULL) + while ((entry = g_dir_read_name (dir)) != NULL) { - if (g_str_has_suffix (fname, "-highlights.scm")) + gchar *scm_path; + + scm_path = g_build_filename (dirs[d], entry, "highlights.scm", NULL); + if (g_file_test (scm_path, G_FILE_TEST_IS_REGULAR)) { - gchar *name = g_strndup (fname, strlen (fname) - strlen ("-highlights.scm")); - const TSLanguage *lang = ts_grammar_registry_lookup (name); - int rc = test_one_query (name, lang); + const TSLanguage *lang = ts_grammar_registry_lookup (entry); + int rc = test_one_query (entry, lang); if (rc >= 0) { tested++; if (rc == 1 && ++failed == 1) - snprintf (first_fail, sizeof (first_fail), "%s", name); + snprintf (first_fail, sizeof (first_fail), "%s", entry); } - g_free (name); } + g_free (scm_path); } g_dir_close (dir); if (tested > 0) - break; /* found queries, no need to check second dir */ + break; } } -#endif ck_assert_msg (tested > 0, "No grammars found"); ck_assert_msg (failed == 0, "Query failed for %d/%d (first: %s)", failed, tested, first_fail); diff --git a/tests/syntax/mc-syntax-dump.c b/tests/syntax/mc-syntax-dump.c index 99253b205a..9aa9237326 100644 --- a/tests/syntax/mc-syntax-dump.c +++ b/tests/syntax/mc-syntax-dump.c @@ -3,7 +3,19 @@ * * Uses MC's actual syntax engine internals for exact color output. * - * Usage: mc-syntax-dump [--ts|--legacy] + * Usage: + * mc-syntax-dump [--ts|--legacy] + * mc-syntax-dump --ts --grammar-dir DIR --lib-dir DIR + * + * When --grammar-dir and --lib-dir are given, loads the grammar directly + * from those paths instead of using MC's installed grammar discovery. + * This allows testing queries from a development checkout without + * installing them. + * + * --grammar-dir DIR Path to a per-grammar directory containing + * highlights.scm, config.ini, and optionally + * injections.scm. + * --lib-dir DIR Path to directory containing .so files. */ #include @@ -32,6 +44,8 @@ #include "src/editor/edit-impl.h" #ifdef HAVE_TREE_SITTER #include "src/editor/syntax_ts.h" +#include +#include #endif static void @@ -54,20 +68,47 @@ void __wrap_tty_color_init_lib (gboolean disable, gboolean force) { (void) disable; (void) force; - /* no-op: skip SLang/ncurses initialization */ } void __wrap_tty_color_try_alloc_lib_pair (tty_color_lib_pair_t *mc_color_pair) { - /* no-op: we don't need actual terminal color pairs */ (void) mc_color_pair; } void __wrap_tty_color_deinit_lib (void) { - /* no-op */ } +#ifdef HAVE_TREE_SITTER +/* Duplicated from syntax_ts.c (file-scope types not visible to us) */ +typedef struct +{ + uint32_t start_byte; + uint32_t end_byte; + int color; +} ts_highlight_entry_t; + +/* + * TSInput read callback: reads chunks of text from the edit buffer. + * Duplicated from syntax_ts.c because the original is static. + */ +static const char * +ts_input_read_cb (void *payload, uint32_t byte_index, TSPoint position, uint32_t *bytes_read) +{ + static char buf[4096]; + WEdit *edit_buf = (WEdit *) payload; + uint32_t i; + + (void) position; + + for (i = 0; i < sizeof (buf) && (off_t) (byte_index + i) < edit_buf->buffer.size; i++) + buf[i] = edit_buffer_get_byte (&edit_buf->buffer, (off_t) (byte_index + i)); + + *bytes_read = i; + return (i > 0) ? buf : NULL; +} +#endif + /* Wrap ts_init_for_file so we can disable it for --legacy mode */ static gboolean ts_init_disabled = FALSE; extern gboolean __real_ts_init_for_file (WEdit *edit, const char *forced_grammar); @@ -97,17 +138,10 @@ static const char *color_names[16] = { "brightblue", "brightmagenta", "brightcyan", "white", }; -/* Cache for resolved pair_index -> fg_index mappings */ #define CACHE_SIZE 4096 static struct { int pair_index; int fg_index; } pair_cache[CACHE_SIZE]; static int pair_cache_count = 0; -/* - * Resolve a color pair index to its foreground ANSI code. - * Uses tty_try_alloc_color_pair as a reverse lookup: since it returns - * existing pairs for duplicate (fg, bg) combos, we try all fg*bg combos - * until we find one matching the target pair_index. - */ static const char * pair_index_to_fg_ansi (int pair_idx) { @@ -116,15 +150,13 @@ pair_index_to_fg_ansi (int pair_idx) if (pair_idx < 0) return ""; - /* Check cache first */ for (i = 0; i < pair_cache_count; i++) if (pair_cache[i].pair_index == pair_idx) return ansi_codes[pair_cache[i].fg_index]; - /* Brute force: try all fg/bg combinations */ for (fg = 0; fg < 16; fg++) { - for (bg = 0; bg < 17; bg++) /* 16 colors + NULL bg */ + for (bg = 0; bg < 17; bg++) { tty_color_pair_t cp; int test_idx; @@ -137,7 +169,6 @@ pair_index_to_fg_ansi (int pair_idx) test_idx = tty_try_alloc_color_pair (&cp, FALSE); if (test_idx == pair_idx) { - /* Cache the result */ if (pair_cache_count < CACHE_SIZE) { pair_cache[pair_cache_count].pair_index = pair_idx; @@ -152,6 +183,276 @@ pair_index_to_fg_ansi (int pair_idx) return ""; } +/* ------------------------------------------------------------------ */ +/* Direct grammar loading (--grammar-dir / --lib-dir) */ +/* ------------------------------------------------------------------ */ + +#ifdef HAVE_TREE_SITTER + +/* + * Convert config.ini [colors] section into a temporary colors.ini file + * in the MC format (section = grammar name, keys = capture names). + * This lets MC's existing ts_load_grammar_registry() pick up the colors + * without modifying the static ts_color_map in syntax_ts.c. + * + * The temporary file is written into a temp directory, and MC's + * data path is pointed there so ts_load_grammar_registry() finds it. + */ +static char * +create_temp_colors_ini (const char *config_path, const char *grammar_name) +{ + GKeyFile *kf; + gchar **keys; + gsize k_count, ki; + char *tmpdir; + char *ts_dir; + char *colors_path; + FILE *f; + + kf = g_key_file_new (); + if (!g_key_file_load_from_file (kf, config_path, G_KEY_FILE_NONE, NULL)) + { + g_key_file_free (kf); + return NULL; + } + + keys = g_key_file_get_keys (kf, "colors", &k_count, NULL); + if (keys == NULL) + { + g_key_file_free (kf); + return NULL; + } + + tmpdir = g_dir_make_tmp ("mc-syntax-dump-XXXXXX", NULL); + if (tmpdir == NULL) + { + g_strfreev (keys); + g_key_file_free (kf); + return NULL; + } + + ts_dir = g_build_filename (tmpdir, "syntax-ts", (char *) NULL); + g_mkdir_with_parents (ts_dir, 0700); + + colors_path = g_build_filename (ts_dir, "colors.ini", (char *) NULL); + f = fopen (colors_path, "w"); + g_free (colors_path); + g_free (ts_dir); + + if (f == NULL) + { + g_strfreev (keys); + g_key_file_free (kf); + g_free (tmpdir); + return NULL; + } + + fprintf (f, "[%s]\n", grammar_name); + for (ki = 0; ki < k_count; ki++) + { + gchar *value = g_key_file_get_value (kf, "colors", keys[ki], NULL); + if (value != NULL) + { + fprintf (f, "%s = %s\n", keys[ki], value); + g_free (value); + } + } + + fclose (f); + g_strfreev (keys); + g_key_file_free (kf); + + return tmpdir; +} + +/* + * Read the symbol= field from config.ini [grammar] section. + * Returns the symbol suffix or NULL if not specified. + * Caller must free. + */ +static char * +read_symbol_from_config (const char *config_path) +{ + GKeyFile *kf; + char *value; + + kf = g_key_file_new (); + if (!g_key_file_load_from_file (kf, config_path, G_KEY_FILE_NONE, NULL)) + { + g_key_file_free (kf); + return NULL; + } + + value = g_key_file_get_value (kf, "grammar", "symbol", NULL); + g_key_file_free (kf); + + if (value != NULL) + g_strstrip (value); + + return value; +} + +/* + * Initialize tree-sitter for an edit buffer by loading the grammar, + * query, and colors directly from the specified directories. + * Returns TRUE on success. + */ +static gboolean +ts_init_direct (WEdit *edit, const char *grammar_dir, const char *lib_dir) +{ + char *dir_basename; + char *grammar_name; + char *config_path; + char *highlights_path; + char *so_path; + char *symbol_override; + char *symbol_name; + char *query_src; + gsize query_len; + GModule *module; + gpointer symbol; + const TSLanguage *(*lang_func) (void); + const TSLanguage *lang; + TSParser *parser; + TSTree *tree; + TSInput input; + uint32_t error_offset; + TSQueryError error_type; + TSQuery *query; + + /* Grammar name from directory basename */ + dir_basename = g_path_get_basename (grammar_dir); + grammar_name = g_strdup (dir_basename); + g_free (dir_basename); + + config_path = g_build_filename (grammar_dir, "config.ini", (char *) NULL); + highlights_path = g_build_filename (grammar_dir, "highlights.scm", (char *) NULL); + + /* Load colors from our temp colors.ini */ + { + extern void ts_load_grammar_registry (void); + ts_load_grammar_registry (); + } + + /* Determine symbol name */ + symbol_override = read_symbol_from_config (config_path); + if (symbol_override != NULL) + { + symbol_name = g_strdup_printf ("tree_sitter_%s", symbol_override); + g_free (symbol_override); + } + else + symbol_name = g_strdup_printf ("tree_sitter_%s", grammar_name); + + /* Load .so from lib_dir */ + so_path = g_strdup_printf ("%s/%s", lib_dir, grammar_name); + module = g_module_open (so_path, G_MODULE_BIND_LAZY); + g_free (so_path); + + if (module == NULL) + { + fprintf (stderr, "Failed to load grammar module: %s\n", + g_module_error ()); + g_free (config_path); + g_free (highlights_path); + g_free (symbol_name); + g_free (grammar_name); + return FALSE; + } + + if (!g_module_symbol (module, symbol_name, &symbol)) + { + fprintf (stderr, "Symbol '%s' not found in module\n", symbol_name); + g_module_close (module); + g_free (config_path); + g_free (highlights_path); + g_free (symbol_name); + g_free (grammar_name); + return FALSE; + } + g_free (symbol_name); + + lang_func = (const TSLanguage * (*) (void)) symbol; + lang = lang_func (); + + /* Create parser */ + parser = ts_parser_new (); + if (!ts_parser_set_language (parser, lang)) + { + fprintf (stderr, "Failed to set parser language\n"); + ts_parser_delete (parser); + g_module_close (module); + g_free (config_path); + g_free (highlights_path); + g_free (grammar_name); + return FALSE; + } + + /* Parse the buffer */ + input.payload = edit; + input.read = ts_input_read_cb; + input.encoding = TSInputEncodingUTF8; + + tree = ts_parser_parse (parser, NULL, input); + if (tree == NULL) + { + fprintf (stderr, "Failed to parse file\n"); + ts_parser_delete (parser); + g_module_close (module); + g_free (config_path); + g_free (highlights_path); + g_free (grammar_name); + return FALSE; + } + + /* Load and compile highlight query */ + if (!g_file_get_contents (highlights_path, &query_src, &query_len, NULL)) + { + fprintf (stderr, "Failed to read %s\n", highlights_path); + ts_tree_delete (tree); + ts_parser_delete (parser); + g_module_close (module); + g_free (config_path); + g_free (highlights_path); + g_free (grammar_name); + return FALSE; + } + + query = ts_query_new (lang, query_src, (uint32_t) query_len, &error_offset, &error_type); + g_free (query_src); + + if (query == NULL) + { + fprintf (stderr, "Query compilation failed at offset %u (error type %d)\n", + error_offset, error_type); + ts_tree_delete (tree); + ts_parser_delete (parser); + g_module_close (module); + g_free (config_path); + g_free (highlights_path); + g_free (grammar_name); + return FALSE; + } + + /* Store in edit widget */ + edit->ts.parser = parser; + edit->ts.tree = tree; + edit->ts.highlight_query = query; + edit->ts.highlights = g_array_new (FALSE, FALSE, sizeof (ts_highlight_entry_t)); + edit->ts.highlights_start = -1; + edit->ts.highlights_end = -1; + edit->ts.grammar_name = grammar_name; + edit->ts.active = TRUE; + edit->ts.need_reparse = FALSE; + + g_free (config_path); + g_free (highlights_path); + + return TRUE; +} + +#endif /* HAVE_TREE_SITTER */ + /* ------------------------------------------------------------------ */ /* Main */ /* ------------------------------------------------------------------ */ @@ -160,6 +461,8 @@ int main (int argc, char *argv[]) { const char *source_path = NULL; + const char *grammar_dir = NULL; + const char *lib_dir = NULL; gboolean force_legacy = FALSE; gboolean force_ts = FALSE; int i; @@ -181,16 +484,34 @@ main (int argc, char *argv[]) force_ts = TRUE; else if (strcmp (argv[i], "--legacy") == 0) force_legacy = TRUE; + else if (strcmp (argv[i], "--grammar-dir") == 0 && i + 1 < argc) + grammar_dir = argv[++i]; + else if (strcmp (argv[i], "--lib-dir") == 0 && i + 1 < argc) + lib_dir = argv[++i]; else source_path = argv[i]; } if (source_path == NULL) { - fprintf (stderr, "Usage: %s [--ts|--legacy] \n", argv[0]); + fprintf (stderr, + "Usage: %s [--ts|--legacy] [--grammar-dir DIR --lib-dir DIR] \n", + argv[0]); return 1; } + /* --grammar-dir requires --lib-dir and implies --ts */ + if (grammar_dir != NULL) + { + if (lib_dir == NULL) + { + fprintf (stderr, "--grammar-dir requires --lib-dir\n"); + return 1; + } + force_ts = TRUE; + force_legacy = FALSE; + } + str_init_strings (NULL); mc_config_init_config_paths (NULL); @@ -201,9 +522,7 @@ main (int argc, char *argv[]) mc_global.share_data_dir = g_strdup ("/usr/share/mc"); mc_global.sysconfig_dir = g_strdup ("/usr/share/mc"); - /* Initialize colors - tty_color_init_lib is wrapped to avoid needing a terminal */ tty_init_colors (FALSE, TRUE, 256); - /* tty_color_init_lib (wrapped) didn't set use_colors, set it manually */ use_colors = TRUE; edit_options.syntax_highlighting = TRUE; @@ -214,6 +533,43 @@ main (int argc, char *argv[]) arg.file_vpath = vfs_path_from_str (source_path); arg.line_number = 0; +#ifdef HAVE_TREE_SITTER + /* When using --grammar-dir, disable normal TS init during edit_init + so we can do our own direct init afterwards. */ + if (grammar_dir != NULL) + { + char *config_path; + char *dir_basename; + char *grammar_name; + char *tmpdir; + + ts_init_disabled = TRUE; + + /* Set up color config from config.ini BEFORE edit_init, + because ts_load_grammar_registry() runs once and caches. */ + dir_basename = g_path_get_basename (grammar_dir); + grammar_name = g_strdup (dir_basename); + g_free (dir_basename); + + config_path = g_build_filename (grammar_dir, "config.ini", (char *) NULL); + tmpdir = create_temp_colors_ini (config_path, grammar_name); + g_free (config_path); + g_free (grammar_name); + + if (tmpdir != NULL) + { + g_free (mc_global.share_data_dir); + mc_global.share_data_dir = tmpdir; + /* Also redirect user data path so the existing + ~/.local/share/mc/syntax-ts/colors.ini doesn't + override our temp colors. */ + setenv ("MC_XDG_DATA_HOME", tmpdir, 1); + /* Re-initialize paths with the new env */ + mc_config_init_config_paths (NULL); + } + } +#endif + edit = edit_init (NULL, &rect, &arg); if (edit == NULL) { @@ -226,11 +582,20 @@ main (int argc, char *argv[]) group_add_widget (&owner, WIDGET (edit)); #ifdef HAVE_TREE_SITTER - if (force_legacy && edit->ts.active) + if (grammar_dir != NULL) + { + /* Direct grammar loading from specified directories */ + ts_init_disabled = FALSE; + if (!ts_init_direct (edit, grammar_dir, lib_dir)) + { + fprintf (stderr, "Failed to initialize tree-sitter from %s\n", grammar_dir); + /* Fall through to show file without highlighting */ + } + } + else if (force_legacy && edit->ts.active) { ts_free (edit); edit->ts.active = FALSE; - /* Reload with TS disabled (ts_init_for_file is wrapped to fail) */ ts_init_disabled = TRUE; edit_load_syntax (edit, NULL, NULL); ts_init_disabled = FALSE; @@ -240,6 +605,8 @@ main (int argc, char *argv[]) #else (void) force_ts; (void) force_legacy; + (void) grammar_dir; + (void) lib_dir; #endif fprintf (stderr, "File: %s (%ld bytes)\n", source_path, (long) edit->buffer.size); @@ -252,10 +619,6 @@ main (int argc, char *argv[]) #endif ); - /* Output with ANSI colors. - Color pairs >= TTY_COLOR_MAP_OFFSET are role-based (e.g. EDITOR_NORMAL_COLOR) - and represent the default color. Pair indices 0..N are direct pairs allocated - by the syntax engine. */ { int prev_color = -1; diff --git a/tests/syntax/samples/Caddyfile b/tests/syntax/samples/Caddyfile deleted file mode 100644 index 6fdec89f73..0000000000 --- a/tests/syntax/samples/Caddyfile +++ /dev/null @@ -1,122 +0,0 @@ -# Global options block -{ - debug - http_port 80 - https_port 443 - admin off - email admin@example.com - order reverse_proxy before respond - storage file_system /var/lib/caddy - auto_https disable_redirects - grace_period 10s - servers { - protocols h1 h2 h3 - strict_sni_host - max_header_size 16384 - } -} - -# Import shared snippet -(logging) { - log { - output file /var/log/caddy/access.log - format json - } -} - -# Simple site block with address -localhost:8080 { - respond "Hello, world!" 200 -} - -# HTTPS site with multiple directives -https://example.com { - import logging - - root * /var/www/html - - encode gzip zstd - - # Matcher definition - @api { - path /api/* - method GET POST PUT DELETE - } - - # Reverse proxy with options - reverse_proxy @api http://127.0.0.1:9000 { - header_up X-Real-IP {remote} - header_up Host {host} - lb_policy round_robin - health_check /health - transport http { - keepalive 30s - } - } - - handle_path /docs/* { - file_server { - root /var/www/docs - browse - } - } - - handle_errors { - respond "{http.error.status_code} {http.error.message}" - } - - # Redirect with matcher - @old path /old-page - redir @old /new-page 301 - - # TLS configuration - tls /etc/ssl/cert.pem /etc/ssl/key.pem { - protocols tls1.2 tls1.3 - ciphers TLS_AES_128_GCM_SHA256 - curves x25519 - alpn h2 - client_auth { - mode require_and_verify - trusted_ca_cert_file /etc/ssl/ca.pem - } - } - - # Headers with placeholders - header { - X-Request-ID {http.request.uuid} - X-Server {system.hostname} - Strict-Transport-Security "max-age=31536000" - -Server - } - - # Variables and boolean values - vars { - is_admin true - is_guest false - } - - uri strip_prefix /v1 - - try_files {path} {path}/ /index.html - - # Environment variable placeholder - respond /env {$APP_ENV} -} - -# Named route -&(my-route) { - respond "Named route" 200 -} - -# Snippet usage with boolean -*.example.com { - tls { - on_demand - } - - log { - output file /var/log/caddy/{host}.log - } - - reverse_proxy localhost:3000 -} diff --git a/tests/syntax/samples/Caddyfile-report.md b/tests/syntax/samples/Caddyfile-report.md deleted file mode 100644 index e9a23fd196..0000000000 --- a/tests/syntax/samples/Caddyfile-report.md +++ /dev/null @@ -1,78 +0,0 @@ -Caddy syntax highlighting: TS vs Legacy comparison report -========================================================== - -Sample file: `Caddyfile` -Legacy reference: `misc/syntax/caddyfile.syntax` -TS query: `misc/syntax-ts/queries-override/caddy-highlights.scm` -TS colors: `misc/syntax-ts/colors.ini` `[caddy]` - -Aligned with legacy -------------------- - -- Comments (`# ...`): `brown` in both TS and legacy. -- Block delimiters (`{`, `}`): `yellow` in both TS (`@keyword`) and legacy. -- Directives (`respond`, `reverse_proxy`, `encode`, `tls`, `handle_path`, - `handle_errors`, `basicauth`, `redir`, `root`, `header`, `file_server`, `log`, - `uri`, `try_files`, `vars`, `import`, `templates`): `yellow` - MATCH (TS - `@keyword`, legacy `keyword whole ... yellow`). -- Matchers (`@api`, `@old`, `*`, `/api/*`, `/docs/*`, `/admin/*`, `/health`, - `/env`): `brightmagenta` - MATCH (TS `@keyword.control`, legacy `keyword @... - brightmagenta`). -- Server addresses (`localhost:8080`, `https://example.com`, `*.example.com`): - `brightmagenta` - MATCH (TS `@keyword.control`, legacy `keyword linestart ... - brightmagenta`). -- Snippet names (`(logging)`): `brightmagenta` - MATCH (TS `@keyword.control`). -- Placeholders (`{remote}`, `{host}`, `{http.error.status_code}`, - `{http.error.message}`, `{http.request.uuid}`, `{system.hostname}`, `{path}`, - `{host}`): `brightred` - MATCH (TS `@function.special`, legacy `keyword - {http.*} brightred` etc.). -- Quoted strings (`"Hello, world!"`, `"max-age=31536000"`, `"Named route"`): - `green` via TS `@string` for `quoted_string_literal`. Legacy also uses `green` - via `context " "`. -- Numeric literals (`80`, `443`, `200`, `301`, `16384`): `brightgreen` in TS via - `@string.special`. Legacy highlights some numbers with `brightgreen` via - port/size patterns. -- Boolean values (`off`): `brightgreen` in TS via `@string.special`. Legacy - matches `keyword whole off brightgreen`. - -Intentional improvements over legacy -------------------------------------- - -- TS uses a proper grammar parse tree, so directives at any nesting level are - correctly identified. Legacy relies on `keyword whole` which can miss - directives not in the hardcoded list. -- TS correctly identifies all placeholders via the grammar's `(placeholder)` - node rather than requiring individual keyword patterns for each placeholder - name. -- TS highlights `string_literal` (unquoted arguments) as `lightgray` via - `@constant`, giving visual distinction between quoted and unquoted strings. -- TS handles snippet definitions (`(logging)`) and named routes (`&(my-route)`) - via grammar nodes rather than fragile regex patterns. -- TS recognizes addresses like `*.example.com` via the `(address)` grammar node. - Legacy only matches a few hardcoded patterns (`http://`, `https://`, - `localhost`). - -Known shortcomings ------------------- - -- The legacy engine produced NO syntax highlighting at all in the dump tool - output. All characters received the default color (4159). This appears to be a - tool limitation: the dump tool may not resolve the `Caddyfile` filename to - `caddyfile.syntax`. The legacy `.syntax` file itself contains comprehensive - rules. -- TS shows `yellow` color bleeding past directives into their arguments on the - same line (e.g., `log {` shows `log` and the rest of the line in yellow). This - is because the `(directive)` node in the grammar spans the entire directive - line including arguments. -- Subdirective highlighting (e.g., `protocols`, `ciphers`, `lb_policy`, - `health_check`) that legacy shows in `brightcyan` is not present in the TS - output. The TS grammar treats these as plain directive arguments rather than - separate subdirective nodes. -- The `{$APP_ENV}` environment variable placeholder is not highlighted by TS - (shown as default text). The grammar may not parse environment variable - references as placeholder nodes. -- Boolean values `true`/`false` inside `vars` block and `on`/ `off` values are - not highlighted by TS when they appear as directive arguments rather than - standalone boolean nodes. -- Named route syntax `&(my-route)` is not highlighted by TS (shown as default - text), while legacy had a pattern for it. diff --git a/tests/syntax/samples/Dockerfile b/tests/syntax/samples/Dockerfile deleted file mode 100644 index eae4e383bd..0000000000 --- a/tests/syntax/samples/Dockerfile +++ /dev/null @@ -1,78 +0,0 @@ -# Sample Dockerfile demonstrating syntax highlighting features -# Exercises all tree-sitter captures from dockerfile-highlights.scm - -# FROM with image name, tag, digest, and alias -FROM ubuntu:22.04 AS base -FROM node:18-alpine AS builder -FROM python@sha256:abcdef1234567890 AS runner - -# ARG before and after FROM -ARG VERSION=1.0 -ARG BUILD_DATE - -# MAINTAINER (deprecated) -MAINTAINER old-maintainer@example.com - -# LABEL with key-value pairs -LABEL maintainer="user@example.com" -LABEL version="1.0" -LABEL description="A sample image" - -# ENV variables -ENV APP_HOME=/app -ENV NODE_ENV=production -ENV PATH=$APP_HOME/bin:$PATH - -# WORKDIR -WORKDIR /app -WORKDIR ${APP_HOME} - -# USER -USER appuser -USER 1000:1000 - -# RUN with various forms -RUN apt-get update && apt-get install -y curl wget -RUN --mount=type=cache,target=/var/cache/apt apt-get install -y git -RUN --network=none echo "isolated build" -RUN ["sh", "-c", "echo hello"] - -# COPY with options -COPY package.json . -COPY --from=builder /app/dist ./dist -COPY --chown=appuser:appgroup src/ ./src/ -COPY --chmod=755 entrypoint.sh /usr/local/bin/ -COPY --link config/ ./config/ - -# ADD with options -ADD https://example.com/file.tar.gz /tmp/ -ADD --checksum=sha256:abcdef archive.tar.gz /app/ - -# EXPOSE ports -EXPOSE 8080 -EXPOSE 443/tcp -EXPOSE 3000 - -# VOLUME -VOLUME /data -VOLUME ["/var/log", "/var/data"] - -# STOPSIGNAL -STOPSIGNAL SIGTERM - -# HEALTHCHECK -HEALTHCHECK --interval=30s --timeout=10s CMD curl -f http://localhost:8080/ || exit 1 - -# ONBUILD -ONBUILD RUN npm install - -# SHELL -SHELL ["/bin/bash", "-c"] - -# Variable expansion -RUN echo "Version: ${VERSION}" -RUN echo "Home: $APP_HOME" - -# ENTRYPOINT and CMD -ENTRYPOINT ["python", "-m", "app"] -CMD ["--port", "8080"] diff --git a/tests/syntax/samples/Dockerfile-report.md b/tests/syntax/samples/Dockerfile-report.md deleted file mode 100644 index 7d28c1a873..0000000000 --- a/tests/syntax/samples/Dockerfile-report.md +++ /dev/null @@ -1,66 +0,0 @@ -Dockerfile syntax highlighting: TS vs Legacy comparison report -============================================================== - -Sample file: `Dockerfile` -Legacy reference: `misc/syntax/dockerfile.syntax` -TS query: `misc/syntax-ts/queries-override/dockerfile-highlights.scm` -TS colors: `misc/syntax-ts/colors.ini` `[dockerfile]` - -Note: The legacy `mc-syntax-dump` tool did not produce colored output for the -`Dockerfile` sample (all characters rendered as default color). The legacy -comparison below is therefore based on reading the `dockerfile.syntax` rules -directly rather than observed dump output. - -Aligned with legacy -------------------- - -- Instruction keywords (`FROM`, `RUN`, `CMD`, `COPY`, `ADD`, `EXPOSE`, `ENV`, - `WORKDIR`, `ENTRYPOINT`, `VOLUME`, `USER`, `ARG`, `LABEL`, `HEALTHCHECK`, - `SHELL`, `ONBUILD`, `STOPSIGNAL`, `AS`): `yellow` - MATCH -- Deprecated `MAINTAINER`: `brightred` - MATCH -- Comments (`#`): `brown` - MATCH -- Double-quoted strings: `green` - MATCH -- Variable expansion (`${VERSION}`, `$APP_HOME`): `brightgreen` - MATCH -- Port numbers (`8080`, `443/tcp`, `3000`): `brightgreen` - MATCH -- Params (`--mount=...`, `--network=...`, `--from=...`, `--chown=...`, - `--chmod=...`, `--checksum=...`, `--interval=...`, `--timeout=...`): - `brightmagenta` - MATCH - -Intentional improvements over legacy -------------------------------------- - -- Image names (`ubuntu`, `node`, `python`) are highlighted in `brightcyan` (via - `@function`). Legacy left image names as default text. -- Image tags (`:22.04`, `:18-alpine`) and digests (`@sha256:...`) are - highlighted in `brightgreen` (via `@string.special`). Legacy had no specific - rule for tags/digests. -- `LABEL` keys (`maintainer`, `version`, `description`) are highlighted in - `brightcyan` (via `@property`). Legacy treated them as plain text. -- `ENV` variable names (`APP_HOME`, `NODE_ENV`, `PATH`) are highlighted in - `lightgray` (via `@variable`). Legacy treated them as plain text before the - `=`. -- `ARG` variable names (`VERSION`, `BUILD_DATE`) are highlighted in `lightgray` - (via `@variable`). Legacy treated them as plain text. -- Unquoted string values after `ENV`, `USER`, and other instructions are - highlighted in `green` (via `@string`). Legacy only colored quoted strings. -- The `--link` option renders in `red` in TS output instead of `brightmagenta`. - This appears to be a parser issue where `--link` is not parsed as a `param` - node by the dockerfile tree-sitter grammar. - -Known shortcomings ------------------- - -- The legacy dump tool did not produce any colored output for the Dockerfile - sample, suggesting a file pattern matching issue in the dump tool for - extensionless filenames. The `dockerfile.syntax` file entry pattern - `Dockerfile.\*$` should match but did not trigger. -- Variable expansions inside double-quoted strings (e.g., - `"Version: ${VERSION}"`) are not individually highlighted by TS; the entire - string including the expansion renders as plain unquoted string text. -- `COPY --link` renders as `red` instead of `brightmagenta`, suggesting the - tree-sitter dockerfile grammar does not parse `--link` as a `param` node. -- Shell operators inside `RUN` commands (`&&`, `||`) are not highlighted by TS - since the Dockerfile grammar does not parse shell command internals. Legacy - had explicit keyword rules for `&&` and `||`. -- JSON array syntax in `RUN ["sh", "-c", ...]`, `ENTRYPOINT [...]`, and - `CMD [...]` is not distinguished from regular strings in TS output. diff --git a/tests/syntax/samples/Makefile-report.md b/tests/syntax/samples/Makefile-report.md deleted file mode 100644 index 29d89303d9..0000000000 --- a/tests/syntax/samples/Makefile-report.md +++ /dev/null @@ -1,93 +0,0 @@ -Makefile syntax highlighting: TS vs Legacy comparison report -============================================================= - -Sample file: `Makefile` -Legacy reference: `misc/syntax/makefile.syntax` -TS query: `misc/syntax-ts/queries-override/make-highlights.scm` -TS colors: `misc/syntax-ts/colors.ini` `[make]` - -Aligned with legacy -------------------- - -- Comments (`#`): `brown` - MATCH -- Directives (`define`, `endef`, `ifdef`, `ifndef`, `ifeq`, `ifneq`, `else`, - `endif`, `include`, `override`, `export`, `unexport`, `undefine`, `private`, - `vpath`): `magenta` - MATCH -- Assignment operator (`=`): `white` - MATCH -- Colon in rules (`:`): `yellow` - MATCH -- Double colon (`::`): `yellow` - MATCH -- Special targets (`.PHONY`, `.SUFFIXES`, `.DEFAULT`, `.PRECIOUS`, - `.INTERMEDIATE`, `.SECONDARY`, `.DELETE_ON_ERROR`, `.SILENT`, - `.EXPORT_ALL_VARIABLES`, `.NOTPARALLEL`): `white` - MATCH -- `$(VAR)` variable references: `yellow` (variable name) - MATCH -- Escaped dollar (`$$`): `brightcyan` - MATCH - -Intentional improvements over legacy -------------------------------------- - -- Variable assignment names (`CC`, `CFLAGS`, `LDFLAGS`, etc.): TS colors the LHS - name as `yellow`. Legacy leaves it as DEFAULT. This clearly distinguishes - variable definitions from other text. -- Compound assignment operators (`:=`, `::=`, `?=`, `+=`, `!=`): TS colors the - entire operator as `white`. Legacy only colors the `=` part as `white`, - leaving the prefix character (`:`, `?`, `+`, `!`) in its default color. -- `-include` and `sinclude` directives: TS colors these as `magenta` like - `include`. Legacy only recognizes `include` as a directive; `-include` gets - DEFAULT and `sinclude` gets DEFAULT. -- `override`, `export`, `unexport`, `private`, `undefine`, `vpath` keywords: TS - colors these as `magenta` directives. Legacy recognizes some but not all of - these consistently. -- Recipe shell content: TS applies bash injection to `shell_text` nodes, - providing full shell syntax highlighting in recipes. Commands like `echo` get - `yellow`, `rm` and `sed` get `cyan`, strings get `green`, and operators get - `brightcyan`. Legacy leaves recipe content as DEFAULT (no shell awareness). -- Strings in recipes: TS colors `"double quoted"` and `'single quoted'` strings - as `green` via bash injection. Legacy does not recognize strings inside - recipes. -- Semicolons and commas: TS colors as `brightcyan` (delimiter). Legacy does not - distinguish these. -- `$(VAR)` in recipes: TS colors the variable name inside as `yellow` with the - parentheses getting shell injection colors. Legacy colors the entire `$(VAR)` - as `yellow` uniformly. TS provides more granularity. -- `${VAR}` references: TS colors the variable name as `yellow`. Legacy colors - the entire `${VAR}` including braces as `brightgreen`. The legacy distinction - between `$()` and `${}` is arbitrary; TS treats both uniformly. -- Automatic variables (`$@`, `$^`, `$*`, `$?`): TS colors these as `brightred` - (function.special) in most contexts. Legacy does not distinguish automatic - variables from other `$`-prefixed text. -- `ifeq`/`ifneq` arguments: TS colors variable references inside conditionals - (e.g. `$(CC)` in `ifeq ($(CC),gcc)`) with proper variable highlighting. Legacy - colors the opening `$(` as `yellow` but doesn't complete the pattern. -- Shell assignment (`!=`): TS colors the variable name as `yellow` and the RHS - gets bash injection highlighting. Legacy only colors the `=` as `white`. -- Inline recipes (`quick: ; @echo "done"`): TS colors the `;` as `brightcyan` - and the recipe content gets bash injection. Legacy does not distinguish the - recipe portion. - -Known shortcomings ------------------- - -- Right side of assignments: legacy colors the entire RHS of a variable - assignment as `brightcyan`. TS leaves the RHS as DEFAULT (with variable - references colored individually). The legacy approach is coarse but provides a - visual distinction; TS relies on individual element coloring instead. This is - a deliberate design difference. -- `$<` automatic variable in recipes: bash injection interferes with the make - grammar's `automatic_variable` capture. Bash interprets `<` as a redirect - operator (`brightcyan`) rather than part of the automatic variable. The `$` - portion may appear as DEFAULT. This affects `$<` and `$%` specifically; other - automatic variables (`$@`, `$^`, `$*`, `$?`) work correctly because bash also - recognizes them. -- Line continuation (`\`): legacy colors the trailing backslash as `yellow`. The - tree-sitter make grammar absorbs continuation characters into the parent node - text, so they cannot be captured separately. -- `@substitution@` (autoconf markers): legacy colors `@word@` patterns as - `brightmagenta` on `black` background. TS does not recognize autoconf - substitution markers. This is an MC-specific legacy feature not present in - standard Makefile syntax. -- Recipe tab leader: legacy colors the leading tab character in recipes as - `lightgray` on `red` background as a visual cue. The tree-sitter grammar does - not expose the tab as a capturable node. -- `define` block content: both legacy and TS leave the content of - `define`/`endef` blocks as DEFAULT. Only the `define` and `endef` keywords are - colored as `magenta`. diff --git a/tests/syntax/samples/ada-report.md b/tests/syntax/samples/ada-report.md deleted file mode 100644 index b8bf99a4c9..0000000000 --- a/tests/syntax/samples/ada-report.md +++ /dev/null @@ -1,71 +0,0 @@ -Ada syntax highlighting: TS vs Legacy comparison report -======================================================= - -Sample file: `ada.adb` -Legacy reference: `misc/syntax/ada95.syntax` -TS query: `misc/syntax-ts/queries-override/ada-highlights.scm` -TS colors: `misc/syntax-ts/colors.ini` `[ada]` - -Aligned with legacy -------------------- - -- General keywords (`with`, `use`, `pragma`, `in`, `out`, `is`, `do`, `null`, - `of`, `or`, `and`, `not`, `xor`, `mod`, `abs`, `delay`, `raise`, `return`, - `others`, `task`, `reverse`, `range`, `renames`, `separate`): `yellow` - MATCH -- Control flow keywords (`begin`, `case`, `declare`, `else`, `elsif`, `end`, - `entry`, `exception`, `exit`, `for`, `if`, `loop`, `private`, `protected`, - `select`, `then`, `until`, `when`, `while`): `brightred` - MATCH -- Declaration keywords (`abstract`, `accept`, `access`, `all`, `at`, `constant`, - `goto`, `tagged`, `type`, `limited`): `brightcyan` - MATCH -- Definition keywords (`body`, `function`, `generic`, `new`, `package`, - `procedure`): `brightmagenta` (TS) vs `magenta` (legacy) - CLOSE MATCH - (brightmagenta is the closest available TS mapping) -- Type-like keywords (`array`, `record`): `cyan` - MATCH -- Operators (`+`, `-`, `*`, `/`, `**`, `&`, `=`, `/=`, `<`, `>`, `<=`, `>=`, - `:=`, `=>`, `..`, `<>`): `brightgreen` - MATCH -- Strings: `green` - MATCH -- Character literals (`'A'`, `' '`): `brightgreen` - MATCH -- Comments: `brown` - MATCH -- Labels (`<>`): `cyan` - MATCH - -Intentional improvements over legacy -------------------------------------- - -- Delimiter distinction: TS separates delimiters (`.`, `,`, `:`, `;`, `(`, `)`) - as `brightcyan` from operators (`+`, `-`, `:=`, etc.) as `brightgreen`. Legacy - colors all punctuation uniformly as `brightgreen`. This provides better visual - distinction between structural and operational punctuation. -- `overriding` keyword: TS correctly highlights `overriding` as `brightcyan` - (@property). Legacy does not recognize `overriding` at all (DEFAULT color). -- `subtype` keyword: TS colors `subtype` as `cyan` (@label, type-like keyword). - Legacy colors it as `brightcyan` (same group as `access`, `type`). TS - categorization is arguably more accurate since `subtype` defines types. -- Case-sensitive identifiers: legacy uses `caseinsensitive` mode, which means - `True`, `False`, `Boolean`, `Integer`, `Float`, `String`, `Character` are - colored as `cyan` (type names) regardless of context. TS does not color these - standard type names because they are user-defined identifiers in tree-sitter's - Ada grammar. This is more correct from a parser perspective since Ada's - predefined types are library-defined, not keywords. -- `interface` keyword: TS colors it as `brightcyan` (@property). Legacy does not - include `interface` in its keyword list. - -Known shortcomings ------------------- - -- Predefined type names (`Boolean`, `Integer`, `Float`, `String`, `Character`, - `Natural`, `Positive`, `Duration`, `Wide_Character`, `Wide_String`, - `Wide_Wide_Character`, `Wide_Wide_String`): TS now captures these as `yellow` - via `@keyword` with `#any-of?` predicate on identifier nodes. Legacy colors - them as `cyan` (keyword). The color differs (yellow vs cyan) but they are now - highlighted rather than left as DEFAULT. `Universal_Integer` and - `Universal_Float` are not included as they are compiler-internal types. -- `True` and `False` literals: legacy colors these as `cyan` (type color due to - case-insensitive matching). TS leaves them as DEFAULT since they are parsed as - identifiers. Ada programmers may expect boolean literals to be highlighted. -- `requeue` and `terminate` keywords: legacy highlights these as `yellow`. TS - does not include them in its keyword lists, so they appear as DEFAULT. -- `#` (hash) and `'` (tick/attribute): legacy colors these as `brightgreen` - (operator). TS does not explicitly capture them. The tick used for attributes - (e.g., `Integer'Image`) is not highlighted. -- `aliased` keyword: legacy colors it as `brightcyan`. TS does not include it in - its keyword lists. diff --git a/tests/syntax/samples/ada.adb b/tests/syntax/samples/ada.adb deleted file mode 100644 index 6eda70d0c1..0000000000 --- a/tests/syntax/samples/ada.adb +++ /dev/null @@ -1,159 +0,0 @@ --- Ada syntax sample: demonstrates all TS capture groups --- This file exercises every capture in ada-highlights.scm - --- General keywords (@keyword -> yellow) -with Ada.Text_IO; use Ada.Text_IO; -with Ada.Integer_Text_IO; -pragma Elaborate_All (Ada.Text_IO); - --- Definition keywords (@constant.builtin -> brightmagenta) -package body Demo_Package is - - -- Declaration keywords (@property -> brightcyan) - type Color is (Red, Green, Blue); - type Matrix is array (1 .. 3, 1 .. 3) of Float; - subtype Small_Int is Integer range 0 .. 100; - - -- Type-like keywords (@label -> cyan) - type Node; - type Node is record - Value : Integer; - Next : access Node; - end record; - - type Shape is tagged limited null record; - type Abstract_Shape is abstract tagged null record; - - -- Control flow keywords (@function.special -> brightred) - procedure Process (X : in out Integer) is - Result : Integer := 0; - begin - if X > 0 then - for I in 1 .. X loop - Result := Result + I; - exit when Result > 100; - end loop; - elsif X = 0 then - Result := 1; - else - while X < 0 loop - X := X + 1; - end loop; - end if; - - case Result is - when 0 => - Put_Line ("Zero"); - when 1 .. 10 => - Put_Line ("Small"); - when others => - Put_Line ("Large"); - end case; - - declare - Temp : Integer := Result; - begin - Result := Temp * 2; - exception - when others => - Result := 0; - end; - end Process; - - -- Function definition (@constant.builtin -> brightmagenta) - function Add (A, B : Integer) return Integer is - begin - return A + B; - end Add; - - -- Generic (@constant.builtin -> brightmagenta) - generic - type Element is private; - package Stack is - procedure Push (E : in Element); - end Stack; - - -- Task (@keyword -> yellow) - task type Worker is - entry Start; - end Worker; - - task body Worker is - begin - accept Start do - null; - end Start; - select - delay 1.0; - end select; - end Worker; - - -- Protected type - protected type Counter is - procedure Increment; - function Get return Integer; - private - Count : Integer := 0; - end Counter; - - -- Keywords: abort, raise, separate, renames, reverse - procedure Error_Handler is - begin - raise Constraint_Error; - end Error_Handler; - - -- Operators (@string.special -> brightgreen) - procedure Operators is - A : Integer := 10; - B : Integer := 3; - C : Boolean; - D : Float := 2.0; - begin - A := A + B; - A := A - B; - A := A * B; - A := A / B; - D := D ** 2; - A := A mod B; - A := abs A; - C := (A = B); - C := (A /= B); - C := (A < B); - C := (A > B); - C := (A <= B); - C := (A >= B); - C := not C; - C := C and True; - C := C or False; - C := C xor True; - end Operators; - - -- String literals (@string -> green) - S1 : String := "Hello, Ada!"; - S2 : String := "Line one" & "Line two"; - - -- Character literals (@string.special -> brightgreen) - Ch1 : Character := 'A'; - Ch2 : Character := ' '; - - -- Labels (@label -> cyan) - procedure Goto_Demo is - begin - <> - goto Start_Label; - end Goto_Demo; - - -- Delimiters (@delimiter -> brightcyan) - -- Covered by: . , : ; ( ) - - -- Comments (@comment -> brown) - -- This is a line comment - - -- Overriding keyword (@property -> brightcyan) - overriding procedure Initialize; - - -- Access, all, at, constant, goto, interface - type Ptr is access all Integer; - I : constant Integer := 42; - -end Demo_Package; diff --git a/tests/syntax/samples/asm-report.md b/tests/syntax/samples/asm-report.md deleted file mode 100644 index 0a1b85d5bb..0000000000 --- a/tests/syntax/samples/asm-report.md +++ /dev/null @@ -1,61 +0,0 @@ -Assembly syntax highlighting: TS vs Legacy comparison report -============================================================== - -Sample file: `asm.asm` -Legacy reference: `misc/syntax/assembler.syntax` -TS query: `misc/syntax-ts/queries-override/asm-highlights.scm` -TS colors: `misc/syntax-ts/colors.ini` `[asm]` - -Aligned with legacy -------------------- - -- Line comments (`; ...`): `brown` (TS `comment`) - MATCH. -- Block comments (`/* ... */`): `brown` (TS `comment`) - MATCH. -- Strings (`"..."`): `green` (TS `string`) - MATCH. -- Labels (`_start:`, `print_string:`, `.large_value:`): `cyan` (TS `label`) - - MATCH. -- Instructions/directives (`section`, `global`, `extern`, `db`, `dw`, `dd`, - `dq`, `equ`, `align`, `times`, `resb`, `resd`): `white` (TS `keyword.other`) - - MATCH (legacy also uses `white` for directives and pseudo-instructions). -- Pointer size keywords (`byte`, `word`, `dword`, `qword`): `white` (TS - `keyword.other`) - MATCH. - -Intentional improvements over legacy -------------------------------------- - -- TS uses the grammar's structural `reg` node to identify all registers - uniformly; legacy requires exhaustive per-register keyword lists (hundreds of - entries for 16/32/64-bit, FPU, SSE, etc.). -- TS highlights registers in instruction operands as `brightmagenta` - (`keyword.control`) consistently via the `reg` node; legacy achieves the same - color but only for explicitly listed registers. -- TS highlights identifiers used as memory references (e.g., `msg`, `buffer`, - `result` inside `[...]`) as `lightgray` (`constant`); legacy colors these as - default `lightgray`. -- TS handles the instruction node as a whole unit with `white` - (`keyword.other`), covering all mnemonics without needing individual keyword - entries. - -Known shortcomings ------------------- - -- TS does not distinguish registers by type: general-purpose registers (`eax`, - `ebx`, etc.) and SSE registers (`xmm0`, `xmm1`, etc.) both get `lightgray` in - TS when used as plain operands outside `[...]` context. Legacy colors - general-purpose registers as `brightmagenta` and SSE/FPU registers as - `brightcyan`, providing more visual distinction. -- The TS grammar's `reg` node only fires for registers inside memory references - (`[ebp + 8]`, `[rax]`), coloring them as `brightmagenta`; registers used as - direct operands (`mov eax, 42`) show as `lightgray` (default) in TS, whereas - legacy colors them `brightmagenta` everywhere. -- TS does not highlight NASM preprocessor directives (`%ifdef`, `%else`, - `%endif`, `%define`) as `brightred` like legacy does; TS shows `%` as `RED` - (error) and the directive name as `white`/`lightgray`. -- TS does not color `$` and `$$` as `brightgreen` like legacy does for - current-address references. -- GAS section names (`.data`, `.text`, `.bss`, `.rodata`) are not highlighted in - TS; legacy colors them as `brightblue`. -- Legacy highlights `%%` (local label prefix) as `cyan`; TS does not - specifically handle this. -- Commas between operands show as `RED` (error) in some TS contexts, suggesting - the grammar may have parsing issues with certain instruction formats. diff --git a/tests/syntax/samples/asm.asm b/tests/syntax/samples/asm.asm deleted file mode 100644 index 177d407e4f..0000000000 --- a/tests/syntax/samples/asm.asm +++ /dev/null @@ -1,134 +0,0 @@ -; x86 Assembly sample file for syntax highlighting -; Exercises all TS captures from asm-highlights.scm - -section .data - msg db "Hello, World!", 0x0A, 0 - len equ $ - msg - fmt db "Value: %d", 10, 0 - align 16 - buffer times 256 db 0 - -section .bss - result resd 1 - input resb 64 - -section .text - global _start - extern printf - -; Simple function with label and instructions -_start: - ; Set up stack frame - push ebp - mov ebp, esp - sub esp, 16 - - ; Load effective address - lea eax, [msg] - push eax - call print_string - add esp, 4 - - ; Arithmetic operations - mov eax, 42 - add eax, 8 - sub eax, 3 - imul eax, 2 - xor edx, edx - div ecx - - ; Compare and branch - cmp eax, 100 - jge .large_value - jl .small_value - -.large_value: - mov dword [result], 1 - jmp .done - -.small_value: - mov dword [result], 0 - -.done: - ; String operations - lea esi, [msg] - lea edi, [buffer] - mov ecx, len - rep movsb - - ; Stack cleanup and exit - mov esp, ebp - pop ebp - - ; System call: exit - mov eax, 1 - xor ebx, ebx - int 0x80 - -; Function: print_string -; Input: pointer on stack -print_string: - push ebp - mov ebp, esp - push eax - push ebx - push ecx - push edx - - ; sys_write(stdout, msg, len) - mov eax, 4 - mov ebx, 1 - mov ecx, [ebp + 8] - mov edx, len - int 0x80 - - pop edx - pop ecx - pop ebx - pop eax - pop ebp - ret - -; 64-bit function example -global func64 -func64: - push rbp - mov rbp, rsp - - ; Use 64-bit registers - mov rax, rdi - add rax, rsi - imul rax, rdx - - ; SSE operations - movaps xmm0, [rdi] - addps xmm0, xmm1 - mulps xmm0, xmm2 - - ; Pointer size directives - mov byte [rax], 0 - mov word [rax + 2], 0xFFFF - mov dword [rax + 4], 12345 - mov qword [rax + 8], 0 - - pop rbp - ret - -; Macro-style block comment -/* This is a block comment - spanning multiple lines */ - -; Data definitions with various sizes -data_section: - db 0x41, 0x42, 0x43 - dw 0x1234 - dd 3.14 - dq 123456789 - -; Conditional assembly -%ifdef DEBUG - mov eax, 0xDEADBEEF -%else - xor eax, eax -%endif -%define MAX_SIZE 1024 diff --git a/tests/syntax/samples/awk-report.md b/tests/syntax/samples/awk-report.md deleted file mode 100644 index 8c910dc42e..0000000000 --- a/tests/syntax/samples/awk-report.md +++ /dev/null @@ -1,86 +0,0 @@ -AWK syntax highlighting: TS vs Legacy comparison report -======================================================= - -Sample file: `awk.awk` -Legacy reference: `misc/syntax/awk.syntax` -TS query: `misc/syntax-ts/queries-override/awk-highlights.scm` -TS colors: `misc/syntax-ts/colors.ini` `[awk]` - -Aligned with legacy -------------------- - -- Comments (`#`): `brown` - MATCH. -- Strings (double-quoted): `green` - MATCH. -- String escape sequences (`\t`, `\n`): `brightgreen` in legacy, not separately - highlighted in TS (TS renders entire string as `green`) - PARTIAL MATCH. -- `BEGIN`/`END`: legacy uses `red`/`white`, TS uses `brightred` (via - `@function.special`) - CLOSE MATCH. -- `function` keyword: `brightmagenta` - MATCH (legacy uses `brightmagenta`, TS - uses `@delimiter.special`). -- Control flow keywords (`if`, `else`, `while`, `for`, `do`, `return`, `break`, - `continue`, `in`, `exit`, `getline`, `next`, `nextfile`): `white` - MATCH - (legacy uses `white/26`, TS uses `@keyword.other`). -- `print`/`printf`: `white` - MATCH (legacy uses `white/26`, TS uses - `@keyword.other`). -- Field references (`$0`, `$1`, `$2`, `$NF`, `$i`): `brightred` - MATCH (legacy - uses `brightred/18`, TS uses `@variable.builtin`). -- Arithmetic operators (`+`, `-`, `*`, `/`, `%`, `=`): `yellow` - MATCH (legacy - uses `yellow/24`, TS uses `@operator.word`). -- Comparison operators (`==`, `!=`, `<`, `>`, `<=`, `>=`): `yellow` - MATCH. -- Assignment operators (`+=`, `-=`, `*=`, `%=`): `yellow` - MATCH. -- Increment/decrement (`++`, `--`): `yellow` - MATCH. -- Regex match (`~`, `!~`): `yellow` - MATCH. -- Comma and semicolon delimiters: legacy uses `white/25` and `lightgray/19` - respectively, TS uses `brightcyan` for both (via `@delimiter`) - DIFFERENT. - -Intentional improvements over legacy -------------------------------------- - -- `BEGINFILE`/`ENDFILE` (gawk extensions) are highlighted in `brightred` by TS - (via `@function.special`). Legacy had no rule for these keywords, leaving them - as default text. -- `func` (gawk shorthand for `function`) is highlighted in `brightmagenta` by - TS. Legacy had it as `white`. -- `switch`/`case`/`default` keywords are highlighted in `white` by TS (via - `@keyword.other`). Legacy had `switch` and `case` as default text (only - `default` was not specifically listed). -- `delete` is highlighted in `white` by TS. Legacy also had `delete` as `white`. -- Regex literals (`/^#/`, `/pattern/`, `/exclude/`) are highlighted with - `brightgreen` content inside `yellow` delimiters by TS. Legacy used `red` for - the entire regex pattern. -- Logical operators (`&&`, `||`, `!`) are highlighted in `yellow` by TS. Legacy - left `&&` and `||` as default text (they were not in the keyword list). -- Ternary operator (`?`, `:`) is highlighted in `yellow` by TS. Legacy had `?` - and `:` as `white/25`. -- The `^` and `**` exponentiation operators are highlighted in `yellow` by TS. - Legacy only matched `^` inconsistently. -- Compound assignment `^=` and `/=` are properly highlighted as `yellow` by TS. - Legacy split these into separate characters (e.g., `^` then `=`). - -Known shortcomings ------------------- - -- String escape sequences (`\t`, `\n`) and printf format specifiers (`%s`, `%d`, - `%.2f`) are not separately highlighted by TS within strings. Legacy - highlighted these in `brightgreen/16` inside strings. The TS grammar does not - emit separate nodes for escape sequences within AWK strings. -- Builtin variables (`FS`, `OFS`, `NR`, `NF`, `FILENAME`, `ARGC`, `ARGV`, - `ENVIRON`, etc.) are not highlighted by TS (rendered as default text). Legacy - highlighted these in `brightblue`. The TS query has no capture for builtin - variables. -- Builtin functions (`atan2`, `cos`, `sin`, `sqrt`, `gsub`, `index`, `length`, - `match`, `split`, `sprintf`, `sub`, `substr`, `tolower`, `toupper`, etc.) are - not highlighted by TS. Legacy highlighted these in `white` with black - background. -- Array subscript brackets (`[key]`, `[0]`, `["HOME"]`) render as default text - in TS. Legacy highlighted these in `magenta`. -- Curly braces `{` and `}` and parentheses `(` and `)` are not highlighted by TS - (default text). Legacy highlighted these in `white/25`. -- Hex constants (e.g., `0xFF`) are not highlighted by TS. Legacy highlighted - these in `magenta/6`. -- The shebang line `#!/usr/bin/awk -f` is rendered as `brown` (comment) by TS. - Legacy highlighted it with `yellow`/`magenta` coloring. -- Pipe operator `|` (used in `"date" | getline today`) is not highlighted by TS. -- Number literals (e.g., `0`, `1`, `2`, `10`, `100`) are not highlighted by TS. - Legacy also left most numbers as default unless they were part of specific - patterns. diff --git a/tests/syntax/samples/awk.awk b/tests/syntax/samples/awk.awk deleted file mode 100644 index 48ebca201c..0000000000 --- a/tests/syntax/samples/awk.awk +++ /dev/null @@ -1,177 +0,0 @@ -#!/usr/bin/awk -f -# Sample AWK file demonstrating syntax highlighting features -# Exercises all tree-sitter captures from awk-highlights.scm - -# BEGIN and END blocks -BEGIN { - FS = ":" - OFS = "\t" - RS = "\n" - ORS = "\n" - count = 0 - print "Starting processing" -} - -END { - printf "Processed %d records\n", count -} - -# BEGINFILE and ENDFILE (gawk extensions) -BEGINFILE { - print "Reading: " FILENAME -} - -ENDFILE { - print "Done with: " FILENAME -} - -# Function definition -function max(a, b) { - if (a > b) - return a - else - return b -} - -# func keyword (gawk extension) -func min(a, b) { - return (a < b) ? a : b -} - -# Pattern-action with regex -/^#/ { next } -/^$/ { nextfile } - -# Field references -{ - first = $1 - second = $2 - last = $NF - whole = $0 -} - -# Arithmetic operators -{ - sum = $1 + $2 - diff = $1 - $2 - prod = $1 * $2 - quot = $1 / $2 - mod = $1 % $2 - power = $1 ^ 2 - power2 = $1 ** 2 -} - -# Assignment operators -{ - x = 1 - x += 10 - x -= 5 - x *= 2 - x /= 3 - x %= 7 - x ^= 2 -} - -# Comparison and logical operators -{ - if (x == 1 && y != 2) - print "match" - if (x < 10 || x > 100) - print "range" - if (x >= 5 && x <= 50) - print "in range" - if (!found) - print "not found" -} - -# Regex match operators -$0 ~ /pattern/ { print "matched" } -$0 !~ /exclude/ { print "not excluded" } - -# Increment and decrement -{ - count++ - x-- - ++count - --x -} - -# Ternary operator -{ - result = (x > 0) ? "positive" : "non-positive" -} - -# Control flow -{ - for (i = 1; i <= NF; i++) { - if ($i == "skip") - continue - if ($i == "stop") - break - print $i - } -} - -# do-while loop -{ - i = 0 - do { - i++ - } while (i < 10) -} - -# for-in loop with delete -{ - for (key in arr) { - print key, arr[key] - } - delete arr -} - -# switch-case (gawk) -{ - switch ($1) { - case "a": - print "alpha" - break - case "b": - print "beta" - break - default: - print "other" - } -} - -# Builtin variables -{ - print NR, FNR, NF - print ARGC, ARGV[0] - print ENVIRON["HOME"] - print RLENGTH, RSTART - print SUBSEP - print OFMT, CONVFMT -} - -# getline -{ - getline line < "/etc/hostname" - "date" | getline today -} - -# String with escape sequences -{ - print "tab:\there" - print "newline:\nhere" - printf "%s has %d items (%.2f%%)\n", name, count, pct -} - -# exit -END { - exit 0 -} - -# print and printf -{ - print "hello", "world" - printf "%10s %5d\n", $1, $2 -} diff --git a/tests/syntax/samples/bash-report.md b/tests/syntax/samples/bash-report.md deleted file mode 100644 index 8cc19b2332..0000000000 --- a/tests/syntax/samples/bash-report.md +++ /dev/null @@ -1,70 +0,0 @@ -Bash syntax highlighting: TS vs Legacy comparison report -========================================================= - -Sample file: `bash.sh` -Legacy reference: `misc/syntax/sh.syntax` -TS query: `misc/syntax-ts/queries-override/bash-highlights.scm` -TS colors: `misc/syntax-ts/colors.ini` `[bash]` - -Aligned with legacy -------------------- - -- Language keywords (`if`, `then`, `else`, `fi`, `for`, `do`, `done`, etc.): - `yellow` - MATCH -- Shell builtins (`echo`, `printf`, `set`, `shift`, `break`, etc.): `yellow` - - MATCH - NOTE: Legacy lists these as keywords. TS captures them via `#any-of?` - predicate on `command_name` nodes, matching the same `yellow` color. -- External commands (`cat`, `ls`, `grep`, etc.): `cyan` - MATCH -- Security commands (`gpg`, `ssh`, `scp`, `openssl`, `md5sum`): `red` - MATCH -- Function definitions (`function name`, `name()`): `brightmagenta` - MATCH -- Function keyword: `brightmagenta` - MATCH -- Function parens `()`: `brightmagenta` - MATCH -- Comments: `brown` - MATCH -- Strings (double-quoted, single-quoted): `green` - MATCH -- Variable expansions (`$VAR`, `${VAR}`): `brightgreen` - MATCH -- Special variables (`$?`, `$#`, `$@`, `$*`, `$$`, `$!`, `$_`): `brightred` - - MATCH - Including the `$` sign (whole `$?` is red). -- Positional parameters (`$0`-`$9`): `brightred` - MATCH - Including the `$` sign. -- `;;` in case statements: `brightred` - MATCH -- `;` semicolons: `brightcyan` - MATCH -- `{ }` braces in compound statements: `brightcyan` - MATCH -- Heredoc body: `green` - MATCH -- Backtick characters (`` ` ``): `brightred` - MATCH - -Intentional improvements over legacy -------------------------------------- - -- Shebang (`#!/bin/bash`): TS colors the entire line as `brightcyan` via a - `(#match? "^#!")` predicate on comments. Legacy cannot distinguish shebangs - from regular comments due to the `context # \n` rule taking precedence over - the keyword pattern. -- Backtick content: legacy colors everything inside backticks as - `brightred`/default. TS colors backtick delimiters as `brightred` but lets - the content inside get normal command coloring (command names `cyan`, keywords - `yellow`, etc.), matching standalone command behavior. -- Test operators (`-f`, `-d`, `-eq`, `-ne`, etc.): TS colors these as - `brightcyan` (operator) in both `[ ]` and `[[ ]]` contexts. Legacy does not - color them. This improves readability of test expressions. -- `$()` content: TS only colors the `$(` delimiter as `brightgreen`. The - commands inside get normal coloring. Legacy colors the entire `$()` including - content as `brightgreen`. -- `{ }` in `${VAR}`: TS correctly colors the closing `}` as `brightgreen` - (part of the expansion). Earlier versions incorrectly colored it as - `brightcyan` (delimiter). -- Square brackets `[ ]` `[[ ]]`: TS leaves these as DEFAULT (white), matching - legacy behavior. The brackets are not colored. - -Known shortcomings ------------------- - -- Format specifiers in `echo`/`printf` strings are not colored. Tree-sitter - does not parse `printf` format strings inside bash string nodes. -- Heredoc content is colored as `green` (string) uniformly. Legacy has more - nuanced heredoc handling with variable expansion coloring inside heredocs. TS - does not currently apply injection for heredoc content. -- The `#any-of?` predicate for shell builtins relies on MC's predicate - evaluator. The builtin list is hardcoded in the query file. New builtins - require manual additions. diff --git a/tests/syntax/samples/bash.sh b/tests/syntax/samples/bash.sh deleted file mode 100644 index ab5f56bd6d..0000000000 --- a/tests/syntax/samples/bash.sh +++ /dev/null @@ -1,144 +0,0 @@ -#!/bin/bash -# Comment: demonstrate all bash color features - -# Variable assignment -FOO="hello" -BAR='world' -NUM=42 - -# Keywords: if/then/elif/else/fi -if [ -f /tmp/test ]; then - echo "file exists" -elif [ -d /tmp ]; then - printf "dir exists\n" -else - exit 1 -fi - -# Keywords: for/do/done/in -for i in 1 2 3; do - continue -done - -# Keywords: while/until -while read -r line; do - break -done < /tmp/test - -until true; do - shift -done - -if [[ "x" -eq "y" ]]; then - umpopssible -fi - -# Keywords: case/esac with ;; -case "$FOO" in - hello) echo "matched";; - *) echo "default";; -esac - -# Keywords: select -select opt in "yes" "no"; do - break -done - -# Keywords: declare/local/export/readonly/typeset/unset -declare -i COUNT=0 -export PATH -readonly PI=3 -unset FOO - -# Function definitions -function msg() { - local val="$1" - return 0 -} - -msg_short() { - echo "short" -} - -# Function call -msg foo -msg_short - -# Variable expansions -echo $FOO -echo ${BAR} -echo "$FOO and ${BAR}" -echo "positional: $0 $1 $9" -echo "special: $? $# $@ $* $$ $! $_ $-" - -# Command substitution -RESULT=$(date +%s) -OLD=`uname -r` - -# Strings -echo "double quoted with $VAR and ${VAR}" -echo 'single quoted no $expansion' -cat < /tmp/out -echo "append" >> /tmp/out -cat < /tmp/in -cmd 2>&1 -cmd &>/dev/null - -# Logical and pipe operators -true && false || echo "fallback" -cat file | grep pattern | sort - -# Brackets and test -[[ -n "$FOO" ]] && echo "set" -[ -z "$BAR" ] && echo "empty" -test -x /bin/bash && echo "exec" - -# File descriptors -exec 3>/tmp/fd3 -echo "to fd3" >&3 - -# Arithmetic -(( COUNT++ )) -(( COUNT = COUNT + 1 )) - -# Booleans (cyan in sh.syntax) -true -false - -# Source and trap -source /etc/profile -trap 'echo bye' EXIT -wait -getopts "abc:" opt -umask 022 -set -o errexit -o pipefail - -# Backtick context -echo `for N in {1..2}; do ls -la; done` diff --git a/tests/syntax/samples/c-report.md b/tests/syntax/samples/c-report.md deleted file mode 100644 index 5887422960..0000000000 --- a/tests/syntax/samples/c-report.md +++ /dev/null @@ -1,65 +0,0 @@ -C syntax highlighting: TS vs Legacy comparison report -===================================================== - -Sample file: `c.c` -Legacy reference: `misc/syntax/c.syntax` -TS query: `misc/syntax-ts/queries-override/c-highlights.scm` -TS colors: `misc/syntax-ts/colors.ini` `[c]` - -Aligned with legacy -------------------- - -- Keywords (`if`, `for`, `while`, `return`, etc.): `yellow` - MATCH -- Storage class (`auto`, `extern`, `static`, `register`): `yellow` - MATCH -- Preprocessor directives (`#include`, `#define`, etc.): `brightred` - MATCH -- Types (`int`, `char`, `float`, `void`, `short`, `long`, etc.): `yellow` - - MATCH -- Constants (`NULL`, `true`, `false`): `lightgray` - MATCH -- Numbers: `lightgray` - MATCH -- Character literals: `brightgreen` - MATCH -- Strings: `green` - MATCH -- Escape sequences in strings (`\n`, `\t`, `\\`, etc.): `brightgreen` - MATCH -- Comments: `brown` - MATCH -- Semicolons: `brightmagenta` - MATCH -- Bitwise operators (`&`, `|`, `^`, `~`): `brightmagenta` - MATCH -- Arithmetic operators (`+`, `-`, `*`, `/`, etc.): `yellow` - MATCH -- Field operators (`.`, `->`): `yellow` - MATCH -- Brackets (`(`, `)`, `[`, `]`, `{`, `}`): `brightcyan` - MATCH -- Ternary operators (`?`, `:`): `brightcyan` - MATCH -- Labels (goto targets): `cyan` - MATCH -- Ellipsis (`...`): `yellow` - MATCH -- C11/C23 keywords (`_Bool`, `_Atomic`, `_Noreturn`, etc.): `yellow` - MATCH - -Intentional improvements over legacy -------------------------------------- - -- Preprocessor body: legacy colors the entire `#define` line as `brightred`. TS - only colors the directive keyword (`#define`, `#include`) as `brightred`, - leaving the macro name and body in their natural colors. This is more accurate - since the body contains valid C tokens. -- Include paths: legacy colors `` and `"header.h"` as `red` - (preprocessor string). TS colors them as `green` (string). This is consistent - with how strings are treated elsewhere. -- `size_t`: legacy does not recognize `size_t` (DEFAULT). TS recognizes it as a - type (`yellow`) via the `primitive_type` node. -- Number suffixes: legacy fails to color `42L` (suffix breaks pattern). TS - correctly colors the full `number_literal` as `lightgray`. -- Negative literals: legacy colors the minus in `-1` as `yellow` (operator). TS - colors it as part of the number (`lightgray`). Both are reasonable; TS is - arguably more correct for literal constants. -- `default` keyword in switch: legacy colors it as `cyan` (label color). TS - colors it as `yellow` (keyword). TS is more correct since `default` is a - keyword, not a user-defined label. - -Known shortcomings ------------------- - -- Format specifiers (`%d`, `%s`, `%f`, etc.) inside strings are not colored as - `brightgreen`. Tree-sitter does not parse `printf` format strings. Legacy - colors them via regex patterns. This would require custom injection or - post-processing which is not currently supported. -- Comment special words (`TODO`, `FIXME`, `NOTE`) are not highlighted with a - different background. Legacy uses black-on-brown for these. This would require - either comment injection or special handling in the TS engine. -- Digit separators (`1_000_000`) are not recognized by the C grammar as a - `number_literal`. They appear as DEFAULT. diff --git a/tests/syntax/samples/c.c b/tests/syntax/samples/c.c deleted file mode 100644 index f72389dd53..0000000000 --- a/tests/syntax/samples/c.c +++ /dev/null @@ -1,207 +0,0 @@ -/* Block comment: demonstrate all C color features */ -/* TODO: fix this */ -/* NOTE: important !! */ - -// Line comment -// FIXME: broken - -#include -#include "myheader.h" - -#define MAX_SIZE 100 -#define SQUARE(x) ((x) * (x)) - -#ifdef DEBUG -#define LOG(msg) printf("%s\n", msg) -#else -#define LOG(msg) -#endif - -#if defined(__GNUC__) -#pragma GCC optimize("O2") -#endif - -/* Keywords */ -typedef struct { - int x; - int y; -} Point; - -typedef union { - int i; - float f; -} Value; - -enum Color { RED, GREEN, BLUE }; - -/* Data types and modifiers */ -void func_void(void); -int func_int(int a); -char func_char(char c); -float func_float(float f); -double func_double(double d); -short func_short(short s); -long func_long(long l); -signed int si; -unsigned int ui; -wchar_t wc; - -/* Storage class specifiers */ -auto int auto_var; -extern int extern_var; -register int reg_var; -static int static_var; -const int const_var = 42; -volatile int vol_var; -inline int inline_func(int x) { return x; } - -/* C11/C23 keywords */ -_Bool bool_var; -_Atomic int atomic_var; -_Noreturn void abort_func(void); -static_assert(sizeof(int) >= 4, "int too small"); -alignas(16) int aligned_var; -constexpr int ce_var = 10; - -/* Constants */ -int *p = NULL; -_Bool b1 = true; -_Bool b2 = false; -void *np = nullptr; - -/* Numbers */ -int dec = 42; -int hex = 0xFF; -int oct = 077; -int zero = 0; -long lng = 42L; -unsigned uns = 42u; -float flt = 3.14f; -double dbl = 3.14; -double sci = 1.5e10; -double sci2 = 1.5E-3; -int with_sep = 1_000_000; - -/* Characters */ -char ch1 = 'a'; -char ch2 = '\n'; -char ch3 = '\''; -char ch4 = '\\'; -char ch5 = '\0'; -char ch6 = '\077'; - -/* Strings */ -char *str1 = "hello world"; -char *str2 = "escape: \t\n\\\""; -char *str3 = "format: %d %s %f %x %p %c %%"; -char *str4 = "format: %04d %-10s %6.2f %#08x"; - -/* Labels and goto */ -void label_func(void) { - goto done; -done: - return; -} - -/* Operators */ -int arithmetic(int a, int b) { - int r; - r = a + b; - r = a - b; - r = a * b; - r = a / b; - r = a % b; - r++; - r--; - r += a; - r -= b; - return r; -} - -/* Comparison and logical */ -int compare(int a, int b) { - if (a == b) return 1; - if (a != b) return 0; - if (a < b) return -1; - if (a > b) return 1; - if (a <= b && a >= 0) return a; - if (a || b) return a; - if (!a) return b; - return 0; -} - -/* Bitwise operators */ -int bitwise(int a, int b) { - int r; - r = a & b; - r = a | b; - r = a ^ b; - r = ~a; - r = a << 2; - r = a >> 1; - return r; -} - -/* Ternary operator */ -int ternary(int a) { - return a > 0 ? a : -a; -} - -/* Pointers and field access */ -void pointers(Point *pt) { - int val = pt->x; - Point local; - local.x = val; - int *ptr = &local.x; - *ptr = 42; -} - -/* Switch/case */ -int switch_func(int x) { - switch (x) { - case 0: - return 0; - case 1: - return 1; - default: - break; - } - return -1; -} - -/* Loops */ -void loops(void) { - int i; - - for (i = 0; i < 10; i++) { - if (i == 5) continue; - if (i == 8) break; - } - - while (i > 0) { - i--; - } - - do { - i++; - } while (i < 5); -} - -/* Sizeof */ -void sizes(void) { - size_t s1 = sizeof(int); - size_t s2 = sizeof(Point); - size_t s3 = sizeof(char *); -} - -/* Function pointers */ -int (*func_ptr)(int, int) = NULL; - -/* Variadic */ -void variadic(const char *fmt, ...); - -/* Main */ -int main(int argc, char *argv[]) { - printf("Hello, %s! Count: %d\n", argv[0], argc); - return 0; -} diff --git a/tests/syntax/samples/c_sharp-report.md b/tests/syntax/samples/c_sharp-report.md deleted file mode 100644 index d849a14de0..0000000000 --- a/tests/syntax/samples/c_sharp-report.md +++ /dev/null @@ -1,83 +0,0 @@ -C# syntax highlighting: TS vs Legacy comparison report -====================================================== - -Sample file: `c_sharp.cs` -Legacy reference: `misc/syntax/cs.syntax` -TS query: `misc/syntax-ts/queries-override/c_sharp-highlights.scm` -TS colors: `misc/syntax-ts/colors.ini` `[c_sharp]` - -Aligned with legacy -------------------- - -- Keywords (`abstract`, `as`, `async`, `await`, `base`, `break`, `case`, - `catch`, `checked`, `const`, `continue`, `default`, `do`, `event`, `explicit`, - `extern`, `finally`, `fixed`, `for`, `foreach`, `goto`, `if`, `implicit`, - `in`, `is`, `lock`, `new`, `operator`, `out`, `override`, `params`, `partial`, - `readonly`, `ref`, `return`, `sealed`, `sizeof`, `stackalloc`, `static`, - `switch`, `this`, `throw`, `try`, `typeof`, `unchecked`, `unsafe`, `var`, - `virtual`, `volatile`, `where`, `while`, `yield`): `yellow` - MATCH -- Type declaration keywords (`class`, `delegate`, `enum`, `interface`, - `namespace`, `struct`, `record`): `white` - MATCH -- Access modifiers (`internal`, `private`, `public`): `brightred` - MATCH -- `using` keyword: `brightcyan` - MATCH -- Predefined types (`int`, `long`, `float`, `double`, `bool`, `string`, `char`, - `byte`, `decimal`, `object`, `void`): `yellow` - MATCH -- `null`, `true`, `false` literals: `yellow` - MATCH -- Strings: `green` - MATCH -- Verbatim strings (`@"..."`): `green` - MATCH -- Interpolated strings (`$"..."`): `green` - MATCH -- Character literals: `brightgreen` - MATCH -- Comments: `brown` - MATCH -- Semicolons: `brightmagenta` - MATCH -- Brackets/parens (`(`, `)`, `[`, `]`, `{`, `}`): `brightcyan` - MATCH -- Delimiters (`.`, `,`, `:`): `brightcyan` - MATCH -- Arithmetic operators (`+`, `-`, `*`, `/`, `%`, `=`, `+=`, etc.): `yellow` - - MATCH -- Comparison operators (`==`, `!=`, `<`, `>`, `<=`, `>=`): `yellow` - MATCH -- Logical operators (`&&`, `||`, `!`): `yellow` - MATCH -- Null coalescing (`??`): `yellow` (TS) vs `brightcyan` (legacy uses `?` as - delimiter) - close match -- Lambda (`=>`): `yellow` - MATCH -- Labels (`done:`): `cyan` (TS) vs DEFAULT (legacy) - TS improvement - -Intentional improvements over legacy -------------------------------------- - -- Preprocessor lines: legacy colors entire `#` lines as `brightmagenta`. TS does - not handle C# preprocessor directives since they are not common in modern C#. - This avoids false positives on comment lines starting with `#`. -- `protected` keyword: legacy colors it as `yellow` (general keyword). TS - correctly colors it as `yellow` too, matching legacy. However, the TS query - places it under `@keyword` rather than `@function.special`, which differs from - the query definition. Looking at the output, `protected` appears as `yellow` - in both. -- Dot-separated namespaces (`System.Collections.Generic`): TS colors `.` as - `brightcyan` (delimiter), giving structure to qualified names. Legacy treats - them as plain text with no delimiter coloring. -- `async`/`await` keywords: TS correctly colors both as `yellow`. Legacy does - not include `async` or `await` in its keyword list, leaving them as DEFAULT. -- Bitwise operators (`&`, `|`, `^`, `~`): TS colors them as `yellow` - (@operator.word). Legacy leaves them as DEFAULT or partial matches. TS - provides consistent operator coloring. -- Labels: TS correctly identifies labeled statements (`done:`) and colors the - label name as `cyan`. Legacy does not have label support. - -Known shortcomings ------------------- - -- `get`/`set` property accessors: legacy colors them as `brightgreen`. TS does - not highlight these contextual keywords because tree-sitter treats them as - regular identifiers in most contexts. -- `value` keyword (in property setters): legacy colors it as `yellow`. TS does - not capture it since it is a contextual keyword recognized only inside - property setter bodies. -- Format specifiers inside strings (`%d`, `%s`, etc.): legacy has regex patterns - for C-style format strings. TS does not parse string contents, so format - specifiers are not highlighted. -- Escape sequences in strings (`\n`, `\\`, etc.): legacy colors them as - `brightgreen` inside string contexts. TS does not distinguish escape sequences - within regular strings (they are part of the `@string` capture). Only - character literals get `brightgreen`. -- Verbatim string `\n`: legacy incorrectly colors `\n` inside `@"..."` as an - escape (`brightgreen`). TS correctly treats the entire verbatim string as - `green` since `\n` is literal text in verbatim strings. diff --git a/tests/syntax/samples/c_sharp.cs b/tests/syntax/samples/c_sharp.cs deleted file mode 100644 index b95fc0b8a2..0000000000 --- a/tests/syntax/samples/c_sharp.cs +++ /dev/null @@ -1,229 +0,0 @@ -// C# syntax sample: demonstrates all TS capture groups -// This file exercises every capture in c_sharp-highlights.scm - -using System; -using System.Collections.Generic; -using System.Linq; - -namespace SyntaxDemo -{ - // Type declaration keywords (@keyword.other -> white) - public enum Color { Red, Green, Blue } - - public delegate void Handler(string msg); - - public interface IShape - { - double Area(); - } - - public struct Point - { - public int X; - public int Y; - } - - public record Person(string Name, int Age); - - // Access modifiers (@function.special -> brightred) - // public, private, internal - internal class Helper { } - - // Main class - public class Demo : IShape - { - // Keywords (@keyword -> yellow) - private readonly int _value; - protected virtual int Value => _value; - public static int Counter = 0; - const int MaxSize = 100; - volatile bool _running; - - public Demo(int value) - { - this._value = value; - } - - // Operators (@operator.word -> yellow) - public int Arithmetic(int a, int b) - { - int r; - r = a + b; - r = a - b; - r = a * b; - r = a / b; - r = a % b; - r++; - r--; - r += a; - r -= b; - r *= 2; - r /= 2; - r %= 3; - return r; - } - - // Comparison and logical operators - public bool Compare(int a, int b) - { - if (a == b) return true; - if (a != b) return false; - if (a < b && a > 0) return true; - if (a >= b || a <= 0) return false; - if (!true) return false; - return a > b; - } - - // Bitwise operators - public int Bitwise(int a, int b) - { - int r; - r = a & b; - r = a | b; - r = a ^ b; - r = ~a; - r = a << 2; - r = a >> 1; - return r; - } - - // Null coalescing and lambda - public string NullOps(string s) - { - string result = s ?? "default"; - Func square = x => x * x; - return result; - } - - // Strings (@string -> green) - public void Strings() - { - string s1 = "Hello, World!"; - string s2 = @"Verbatim\nstring"; - string s3 = $"Interpolated {_value}"; - } - - // Character literals (@string.special -> brightgreen) - public void Chars() - { - char c1 = 'A'; - char c2 = '\n'; - char c3 = '\\'; - } - - // null, true, false (@keyword -> yellow) - public void Literals() - { - object obj = null; - bool t = true; - bool f = false; - } - - // Predefined types (@type -> yellow) - public void Types() - { - int i = 0; - long l = 0L; - float fl = 1.0f; - double d = 1.0; - bool b = true; - string s = ""; - char c = 'x'; - byte by = 0; - decimal dec = 1.0m; - object o = null; - } - - // Labels (@label -> cyan) - public void LabelDemo() - { - goto done; - done: - return; - } - - // Control flow keywords - public double Area() - { - return 0.0; - } - - // switch, try/catch/finally, for/foreach, while/do - public void ControlFlow(int x) - { - switch (x) - { - case 0: - break; - default: - break; - } - - try - { - throw new Exception("error"); - } - catch (Exception) - { - // handled - } - finally - { - Counter++; - } - - for (int i = 0; i < 10; i++) { } - foreach (var item in new int[] { 1, 2, 3 }) { } - while (x > 0) { x--; } - do { x++; } while (x < 5); - } - - // More keywords: as, is, typeof, sizeof, checked - public void MoreKeywords() - { - object o = "test"; - if (o is string) { } - string s = o as string; - Type t = typeof(int); - int sz = sizeof(int); - checked { int big = int.MaxValue; } - unchecked { int over = int.MaxValue + 1; } - } - - // async/await, yield, var, ref, out, params - public async void AsyncDemo() - { - await System.Threading.Tasks.Task.Delay(1); - } - - // Delimiters (@delimiter -> brightcyan) - // Covered by: . , : ( ) [ ] { } - - // Semicolons (@delimiter.special -> brightmagenta) - // Every statement-ending ; - - // unsafe, fixed, stackalloc, lock, goto - public unsafe void UnsafeDemo() - { - int val = 42; - int* p = &val; - fixed (char* cp = "hello") { } - lock (this) { } - } - - // sealed, abstract, explicit, implicit, partial - // virtual, override, where, volatile - } - - // sealed class - public sealed class Final : Demo - { - public Final() : base(0) { } - public override int Value => 0; - } - - public abstract class AbstractBase - { - public abstract void DoWork(); - } -} diff --git a/tests/syntax/samples/cmake-report.md b/tests/syntax/samples/cmake-report.md deleted file mode 100644 index 8ffb63df54..0000000000 --- a/tests/syntax/samples/cmake-report.md +++ /dev/null @@ -1,73 +0,0 @@ -CMake syntax highlighting: TS vs Legacy comparison report -========================================================== - -Sample file: `cmake.cmake` -Legacy reference: `misc/syntax/cmake.syntax` -TS query: `misc/syntax-ts/queries-override/cmake-highlights.scm` -TS colors: `misc/syntax-ts/colors.ini` `[cmake]` - -Aligned with legacy -------------------- - -- Control flow keywords (`if`, `elseif`, `else`, `endif`, `foreach`, - `endforeach`, `while`, `endwhile`, `function`, `endfunction`, `macro`, - `endmacro`): `brightred` - MATCH -- Normal commands (`cmake_minimum_required`, `project`, `set`, `message`, - `find_package`, `find_library`, `add_library`, `add_executable`, - `target_link_libraries`, `install`, `include`, `export`, `configure_file`, - `add_test`, `enable_testing`, `option`, `string`, `math`, - `target_compile_definitions`, `target_compile_options`, - `target_include_directories`): `brightred` - MATCH -- Comments (line `#...`): `brown` - MATCH -- Bracket comments (`#[[ ... ]]`): `brown` - MATCH -- Strings (quoted `"..."`): `green` - MATCH -- Variable references (`${...}`): `brightgreen` - MATCH Both legacy and TS color - `${VAR}` as `brightgreen` inside strings and outside strings. -- Parentheses (`(`, `)`): `brightcyan` - MATCH - -Intentional improvements over legacy -------------------------------------- - -- `block`/`endblock` keywords: TS colors these as `brightred` via - `@function.special`. Legacy does not include `block`/`endblock` in its keyword - list, so they appear uncolored. -- Bracket comments: TS correctly colors the entire bracket comment block as - `brown`. Legacy partially colors bracket comments -- it colors the `#[[` line - as a comment but leaks the word `documentation` as `brightmagenta` because it - matches the module keyword list. -- Unquoted arguments: TS colors all unquoted arguments uniformly as `lightgray` - (`@variable`). Legacy only colors specific known arguments from hardcoded - lists: known CMAKE_* variables as `brightgreen`, known properties as `white`, - known modules as `brightmagenta`, leaving others uncolored or miscolored. -- ALL_CAPS arguments (e.g., `VERSION`, `PROPERTIES`, `STATIC`, `PRIVATE`, - `PUBLIC`, `REQUIRED`, `STATUS`): TS applies `@tag.special` (`white`) via a - regex match `^[A-Z][A-Z\d_]*$`. However, the TS output shows these as - `lightgray` (same as other `@variable`), suggesting the `#match?` predicate - may not be applying in all cases. Legacy colors a hardcoded subset of - properties as `white`. - -Known shortcomings ------------------- - -- TS does not distinguish between CMake modules (e.g., `GNUInstallDirs`, - `CMakePackageConfigHelpers`, `CPack`) and regular unquoted arguments. Legacy - colors these as `brightmagenta` via a hardcoded module list. TS shows them as - `lightgray` like any other argument. -- TS does not color known CMAKE_* variables (e.g., `CMAKE_BUILD_TYPE`, - `CMAKE_INSTALL_PREFIX`) differently. Legacy has a large hardcoded list - coloring them as `brightgreen`. TS treats them as regular `@variable` - (`lightgray`). -- TS does not color compatibility/deprecated commands (e.g., `exec_program`, - `subdirs`, `write_file`) in `red` as legacy does. These appear as regular - `brightred` commands in TS. -- The `@tag.special` capture for ALL_CAPS unquoted arguments does not seem to - override `@variable` in all cases. In the TS dump, keywords like `VERSION`, - `SOVERSION`, `PROPERTIES`, `STATIC`, `PRIVATE`, etc. appear as `lightgray` - rather than the expected `white`. This may be a precedence issue with the - `#match?` predicate. -- Variable references outside of `${}` but inside command arguments (e.g., - `${TARGET}` without proper braces) show as `lightgray` in TS. Legacy - recognizes the `${*}` glob pattern more broadly. -- The `FILE` keyword inside `export(TARGETS mylib FILE ...)` is colored - `brightred` in legacy (matching the command keyword list) but `lightgray` in - TS (treated as a regular unquoted argument). diff --git a/tests/syntax/samples/cmake.cmake b/tests/syntax/samples/cmake.cmake deleted file mode 100644 index 736c46da64..0000000000 --- a/tests/syntax/samples/cmake.cmake +++ /dev/null @@ -1,113 +0,0 @@ -# Sample CMake file demonstrating syntax highlighting features -# This file exercises all TS captures from cmake-highlights.scm - -cmake_minimum_required(VERSION 3.16) -project(SampleProject VERSION 1.0.0 LANGUAGES C CXX) - -# Control flow: if/elseif/else/endif -> @function.special -if(CMAKE_BUILD_TYPE STREQUAL "Release") - message(STATUS "Building in release mode") -elseif(CMAKE_BUILD_TYPE STREQUAL "Debug") - message(STATUS "Building in debug mode") -else() - message(WARNING "Unknown build type") -endif() - -# Variable references ${...} -> @variable.special -set(MY_SOURCES main.c util.c helper.c) -set(MY_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}/include) -message(STATUS "Sources: ${MY_SOURCES}") -message(STATUS "Install prefix: ${CMAKE_INSTALL_PREFIX}") - -# Foreach loop -> @function.special -foreach(SRC ${MY_SOURCES}) - message(STATUS "Source file: ${SRC}") -endforeach() - -# While loop -> @function.special -set(COUNTER 0) -while(COUNTER LESS 5) - math(EXPR COUNTER "${COUNTER} + 1") -endwhile() - -# Function definition -> @function.special -function(my_custom_function TARGET_NAME) - target_compile_definitions(${TARGET_NAME} PRIVATE DEBUG_MODE=1) - target_include_directories(${TARGET_NAME} PUBLIC ${MY_INCLUDES}) -endfunction() - -# Macro definition -> @function.special -macro(setup_warnings TARGET) - target_compile_options(${TARGET} PRIVATE -Wall -Wextra) -endmacro() - -# Block -> @function.special -block(SCOPE_FOR VARIABLES) - set(TEMP_VAR "inside block") - message(STATUS "Block var: ${TEMP_VAR}") -endblock() - -# Normal commands -> @function.special (identifier) -find_package(Threads REQUIRED) -find_library(MATH_LIB m) - -# Library and executable targets -add_library(mylib STATIC ${MY_SOURCES}) -add_executable(myapp main.c) -target_link_libraries(myapp PRIVATE mylib Threads::Threads) - -# Properties/constants (ALL_CAPS) -> @tag.special -set_target_properties(mylib PROPERTIES - VERSION ${PROJECT_VERSION} - SOVERSION 1 - PUBLIC_HEADER "include/mylib.h" - POSITION_INDEPENDENT_CODE ON -) - -# Install rules -install(TARGETS myapp mylib - RUNTIME DESTINATION bin - LIBRARY DESTINATION lib - ARCHIVE DESTINATION lib - PUBLIC_HEADER DESTINATION include -) - -# Strings -> @string (quoted_argument) -set(GREETING "Hello from CMake") -string(TOUPPER "${GREETING}" UPPER_GREETING) -string(REGEX REPLACE "[aeiou]" "_" MODIFIED "${GREETING}") - -# Unquoted arguments -> @variable -option(ENABLE_TESTING "Enable unit tests" ON) -option(BUILD_SHARED_LIBS "Build shared libraries" OFF) - -# Nested variable references -set(CONFIG_${CMAKE_BUILD_TYPE}_FLAGS "-O2 -DNDEBUG") -message(STATUS "Flags: ${CONFIG_${CMAKE_BUILD_TYPE}_FLAGS}") - -# Bracket comment -> @comment -#[[ - This is a bracket comment. - It can span multiple lines. - Used for longer documentation blocks. -]] - -# Various commands exercising highlighting -include(GNUInstallDirs) -include(CMakePackageConfigHelpers) -configure_file(config.h.in config.h @ONLY) -export(TARGETS mylib FILE MyLibTargets.cmake) -enable_testing() -add_test(NAME basic_test COMMAND myapp --test) - -# Generator expressions with variable refs -target_compile_definitions(myapp PRIVATE - $<$:DEBUG_BUILD> - $<$:NDEBUG> -) - -# CPack configuration -set(CPACK_PACKAGE_NAME "SampleProject") -set(CPACK_PACKAGE_VERSION "${PROJECT_VERSION}") -set(CPACK_GENERATOR "TGZ;DEB") -include(CPack) diff --git a/tests/syntax/samples/cobol-report.md b/tests/syntax/samples/cobol-report.md deleted file mode 100644 index 23ffdf0c1c..0000000000 --- a/tests/syntax/samples/cobol-report.md +++ /dev/null @@ -1,63 +0,0 @@ -COBOL syntax highlighting: TS vs Legacy comparison report -========================================================== - -Sample file: `cobol.cob` -Legacy reference: `misc/syntax/cobol.syntax` -TS query: `misc/syntax-ts/queries-override/cobol-highlights.scm` -TS colors: `misc/syntax-ts/colors.ini` `[cobol]` - -Aligned with legacy -------------------- - -- Division headings (`IDENTIFICATION DIVISION.`, `ENVIRONMENT DIVISION.`, `DATA - DIVISION.`, `PROCEDURE DIVISION.`): `cyan` - MATCH Both legacy and TS color - these as `cyan`. -- General keywords (`PROGRAM-ID.`, `AUTHOR.`, `DATE-WRITTEN.`, `CONFIGURATION`, - `SECTION.`, `SOURCE-COMPUTER.`, `OBJECT-COMPUTER.`, `INPUT-OUTPUT`, - `FILE-CONTROL.`, `ASSIGN`, `TO`, `IS`, `SEQUENTIAL`, `ACCESS`, `MODE`, `FILE`, - `STATUS`, `DATA`, `FD`, `PIC`, `VALUE`, `WORKING-STORAGE`, `ADD`, `SUBTRACT`, - `MULTIPLY`, `DIVIDE`, `COMPUTE`, `IF`, `ELSE`, `END-IF`, `EVALUATE`, `WHEN`, - `END-EVALUATE`, `PERFORM`, `VARYING`, `FROM`, `BY`, `UNTIL`, `MOVE`, - `DISPLAY`, `STRING`, `DELIMITED`, `INTO`, `END-STRING`, `STOP`, `RUN`, `TRUE`, - `OTHER`, `NOT`, `AT`, `END`, `GIVING`, `REMAINDER`, `THRU`, `SPACES`, `SIZE`, - `INPUT`): `yellow` - MATCH -- I/O keywords (`OPEN`, `READ`, `END-READ`, `CLOSE`, `SELECT`): `brightred` - - MATCH -- Comments (`*` in column 7): `brown` - MATCH -- Strings (double-quoted `"..."`): `green` - MATCH -- Level number prefix `S9` in PIC: `brightgreen` - MATCH Legacy has `s9` and - `v9` as `brightgreen` keywords. -- Edited PIC prefix `$ZZZ`: `brightgreen` in both, though this is coincidental - -- legacy matches `$ZZZ` via the `$+` pattern, TS would capture via - `picture_edit`. - -The legacy and TS outputs are identical ---------------------------------------- - -The legacy and TS syntax dumps produce exactly the same coloring for the entire -sample file. Every keyword, string, comment, division heading, I/O verb, and PIC -clause matches perfectly between the two engines. This indicates the TS grammar -and query are very well aligned with the legacy syntax file. - -Known shortcomings ------------------- - -- TS query defines captures for `level_number`, `level_number_88`, `picture_x`, - `picture_9`, `picture_a`, `picture_n`, and `picture_edit` as `@string.special` - (`brightgreen`), but the tree-sitter COBOL parser may not produce these node - types for all PIC patterns. In the sample, level numbers like `01`, `05`, `88` - appear uncolored in both engines. -- TS query defines `integer`, `decimal`, and `number` captures as `@constant` - (`lightgray`), but numeric literals in the sample (e.g., `100`, `50`, `1000`) - appear uncolored in both engines. -- TS query defines `qualified_word` as `@constant` (`lightgray`), but qualified - data names in the sample do not appear to receive this coloring. -- The `x_string`, `h_string`, and `n_string` captures in the TS query are meant - to color hex strings (`X"..."`) and national strings (`N"..."`). In the - sample, the prefix `X` and `N` before the string are uncolored in both engines - -- only the quoted portion gets `green`. -- The COBOL legacy syntax file appears to be based on a shell syntax template - (it contains shell-specific patterns like `$()`, `${}`, backticks, heredocs, - etc.) rather than being a pure COBOL syntax definition. This does not affect - the COBOL-specific keyword highlighting but means the legacy file has many - irrelevant rules. diff --git a/tests/syntax/samples/cobol.cob b/tests/syntax/samples/cobol.cob deleted file mode 100644 index 5d5aab0dad..0000000000 --- a/tests/syntax/samples/cobol.cob +++ /dev/null @@ -1,113 +0,0 @@ - * Sample COBOL program demonstrating syntax highlighting - * This file exercises all TS captures from cobol-highlights.scm - - IDENTIFICATION DIVISION. - PROGRAM-ID. SAMPLE-PROGRAM. - AUTHOR. TEST-AUTHOR. - DATE-WRITTEN. 2024-01-15. - - ENVIRONMENT DIVISION. - CONFIGURATION SECTION. - SOURCE-COMPUTER. LINUX. - OBJECT-COMPUTER. LINUX. - - INPUT-OUTPUT SECTION. - FILE-CONTROL. - SELECT INPUT-FILE ASSIGN TO "input.dat" - ORGANIZATION IS SEQUENTIAL - ACCESS MODE IS SEQUENTIAL - FILE STATUS IS WS-FILE-STATUS. - - DATA DIVISION. - FILE SECTION. - FD INPUT-FILE. - 01 INPUT-RECORD PIC X(80). - - WORKING-STORAGE SECTION. - * Level numbers -> @string.special - 01 WS-FILE-STATUS PIC XX VALUE SPACES. - 01 WS-COUNTER PIC 9(5) VALUE ZEROS. - 01 WS-TOTAL PIC 9(7)V99 VALUE 0. - 01 WS-NAME PIC X(30) VALUE "John Doe". - 01 WS-AMOUNT PIC S9(7)V99 VALUE -1234.56. - 01 WS-DATE PIC 9(8) VALUE 20240115. - - * Group items with various PIC patterns - 01 WS-EMPLOYEE-REC. - 05 WS-EMP-ID PIC 9(6). - 05 WS-EMP-NAME PIC A(25). - 05 WS-EMP-DEPT PIC X(10). - 05 WS-EMP-SALARY PIC 9(7)V99. - - * Level 88 conditions -> @string.special - 01 WS-STATUS PIC 9 VALUE 0. - 88 STATUS-ACTIVE VALUE 1. - 88 STATUS-INACTIVE VALUE 0. - 88 STATUS-PENDING VALUE 2 THRU 5. - - * Numeric edited PIC -> @string.special - 01 WS-DISPLAY-AMT PIC $ZZZ,ZZ9.99-. - 01 WS-EDIT-DATE PIC 99/99/9999. - - PROCEDURE DIVISION. - MAIN-PARAGRAPH. - * I/O operations -> various highlights - OPEN INPUT INPUT-FILE - READ INPUT-FILE - AT END - DISPLAY "End of file reached" - NOT AT END - DISPLAY "Record: " INPUT-RECORD - END-READ - - * String operations - STRING WS-EMP-NAME DELIMITED BY SPACES - " - " DELIMITED BY SIZE - WS-EMP-DEPT DELIMITED BY SPACES - INTO WS-NAME - END-STRING - - * Arithmetic with various verbs - ADD 100 TO WS-COUNTER - SUBTRACT 50 FROM WS-TOTAL - MULTIPLY WS-AMOUNT BY 2 - GIVING WS-TOTAL - DIVIDE WS-TOTAL BY 3 - GIVING WS-AMOUNT REMAINDER WS-COUNTER - - COMPUTE WS-TOTAL = - WS-AMOUNT * 1.05 + 100 - - * Control flow - IF WS-COUNTER > 1000 - DISPLAY "Counter exceeds limit" - ELSE - DISPLAY "Counter: " WS-COUNTER - END-IF - - EVALUATE TRUE - WHEN STATUS-ACTIVE - DISPLAY "Status is active" - WHEN STATUS-INACTIVE - DISPLAY "Status is inactive" - WHEN OTHER - DISPLAY "Status unknown" - END-EVALUATE - - PERFORM PROCESS-RECORDS - VARYING WS-COUNTER FROM 1 BY 1 - UNTIL WS-COUNTER > 10 - - MOVE WS-AMOUNT TO WS-DISPLAY-AMT - DISPLAY "Formatted: " WS-DISPLAY-AMT - - * Hex and null-terminated strings - MOVE X"48454C4C4F" TO WS-NAME - MOVE N"Unicode text" TO WS-NAME - - CLOSE INPUT-FILE - STOP RUN. - - PROCESS-RECORDS. - DISPLAY "Processing record " WS-COUNTER - ADD 1 TO WS-TOTAL. diff --git a/tests/syntax/samples/cpp-report.md b/tests/syntax/samples/cpp-report.md deleted file mode 100644 index c1ddef8bd6..0000000000 --- a/tests/syntax/samples/cpp-report.md +++ /dev/null @@ -1,85 +0,0 @@ -C++ syntax highlighting: TS vs Legacy comparison report -======================================================= - -Sample file: `cpp.cpp` -Legacy reference: `misc/syntax/cxx.syntax` -TS query: `misc/syntax-ts/queries-override/cpp-highlights.scm` -TS colors: `misc/syntax-ts/colors.ini` `[cpp]` - -Aligned with legacy -------------------- - -- Keywords (`alignas`, `alignof`, `break`, `case`, `catch`, `class`, `co_await`, - `co_return`, `co_yield`, `concept`, `const`, `consteval`, `constexpr`, - `constinit`, `continue`, `decltype`, `default`, `delete`, `do`, `else`, - `enum`, `explicit`, `extern`, `final`, `for`, `friend`, `goto`, `if`, - `inline`, `mutable`, `namespace`, `new`, `noexcept`, `operator`, `override`, - `private`, `protected`, `public`, `requires`, `return`, `sizeof`, `static`, - `static_assert`, `struct`, `switch`, `template`, `throw`, `try`, `typedef`, - `typename`, `union`, `using`, `virtual`, `volatile`, `while`): `yellow` - - MATCH -- `true`, `false`, `nullptr`, `this`: `yellow` - MATCH -- Preprocessor directives (`#include`, `#define`, `#ifdef`, `#ifndef`, `#if`, - `#else`, `#endif`, `#elif`): `brightred` - MATCH -- Primitive types (`int`, `char`, `float`, `double`, `void`, `short`, `long`, - `bool`): `yellow` - MATCH -- Sized type specifiers (`signed int`, `unsigned int`): `yellow` - MATCH -- `auto` keyword: `yellow` - MATCH -- String literals: `green` - MATCH -- System lib strings (``): `green` (TS) vs `red` (legacy colors - include paths as `red` inside preprocessor context) -- Raw string literals (`R"(...)"`): `green` - MATCH -- Character literals (`'A'`, `'\n'`, `'\\'`, `'\''`): `brightgreen` - MATCH -- Comments (line and block): `brown` - MATCH -- Semicolons: `brightmagenta` - MATCH -- Brackets/parens (`(`, `)`, `[`, `]`, `{`, `}`): `brightcyan` - MATCH -- Delimiters (`.`, `,`, `:`): `brightcyan` - MATCH -- Arithmetic operators (`+`, `-`, `*`, `/`, `=`, `++`, `--`, `+=`, `-=`): - `yellow` - MATCH -- Comparison operators (`==`, `!=`, `<`, `>`, `<=`, `>=`): `yellow` - MATCH -- Logical operators (`&&`, `||`, `!`): `yellow` - MATCH -- Shift operators (`<<`, `>>`): `yellow` - MATCH -- Scope resolution (`::`): `yellow` - MATCH -- Labels (`done:`): `cyan` (TS) vs DEFAULT (legacy) - TS improvement - -Intentional improvements over legacy -------------------------------------- - -- Preprocessor body: legacy colors entire `#define` lines as `brightred`. TS - only colors the directive keyword (`#define`, `#include`) as `brightred`, - leaving the body in natural colors. This provides more accurate highlighting - of macro definitions. -- Include paths: legacy colors `` as `red` (preprocessor string). TS - colors them as `green` (string via `system_lib_string` node). This is - consistent with how strings are treated elsewhere. -- Bitwise operators (`&`, `|`, `^`, `~`): TS colors them as `brightmagenta` - (@delimiter.special) to distinguish from arithmetic operators. Legacy colors - them the same as all other operators. This helps differentiate bit - manipulation from arithmetic. -- Labels: TS recognizes `statement_identifier` nodes and colors goto labels as - `cyan`. Legacy does not have label support, leaving them as DEFAULT. -- `override` and `final` context keywords: TS correctly highlights both as - `yellow` in all contexts. Legacy only highlights them when they appear as - whole words but does not distinguish their special semantic role. -- Destructor tilde (`~Shape`): TS colors `~` as `brightmagenta` - (@delimiter.special, bitwise operator). Legacy does not distinguish the - destructor tilde from other context. - -Known shortcomings ------------------- - -- Format specifiers (`%d`, `%s`, etc.) inside strings are not highlighted. - Tree-sitter does not parse printf format strings. Legacy colors them as - `brightgreen` via regex patterns. -- `friend class Other`: TS fails to highlight `friend` and `class` in the - `friend class Other` declaration when it appears inside a function body - (invalid context). The word `Other` appears as `red` instead of DEFAULT. This - is a tree-sitter parse error edge case. -- Raw string literal delimiters: TS colors the `R"(` and `)"` parts - inconsistently, with parentheses getting `brightcyan` (delimiter) while the - content is `green`. Legacy colors the whole literal uniformly. -- `?` ternary operator: legacy colors it as `brightcyan`. TS does not capture - `?` in the query, so it appears as DEFAULT. -- Alternative tokens (`and`, `or`, `not`, `xor`, `bitand`, `bitor`, `compl`, - `and_eq`, `or_eq`, `xor_eq`, `not_eq`): TS now captures these as `yellow` - via `@keyword`, matching legacy behavior. diff --git a/tests/syntax/samples/cpp.cpp b/tests/syntax/samples/cpp.cpp deleted file mode 100644 index 6aebe3fd6a..0000000000 --- a/tests/syntax/samples/cpp.cpp +++ /dev/null @@ -1,207 +0,0 @@ -/* C++ syntax sample: demonstrates all TS capture groups */ -/* This file exercises every capture in cpp-highlights.scm */ - -// Line comment -// TODO: fix this - -#include -#include -#include -#include "myheader.h" - -#define MAX_SIZE 100 -#define SQUARE(x) ((x) * (x)) - -#ifdef DEBUG -#define LOG(msg) std::cout << msg -#else -#define LOG(msg) -#endif - -#if defined(__GNUC__) -#pragma GCC optimize("O2") -#endif - -// Keywords (@keyword -> yellow) -class Shape { -public: - virtual ~Shape() = default; - virtual double area() const = 0; -protected: - int id; -private: - std::string name; -}; - -struct Point { - int x; - int y; -}; - -union Value { - int i; - float f; -}; - -enum Color { Red, Green, Blue }; - -namespace geometry { - using namespace std; - - template - concept Numeric = requires(T a) { a + a; }; - - template - T add(T a, T b) noexcept { - return a + b; - } -} - -// Primitive types (@type -> yellow) -void func_void(); -int func_int(int a); -char func_char(char c); -float func_float(float f); -double func_double(double d); -short func_short(short s); -long func_long(long l); -signed int si; -unsigned int ui; -auto au = 42; - -// true, false, nullptr, this (@keyword -> yellow) -bool trueval = true; -bool falseval = false; -void* np = nullptr; - -// null (@keyword -> yellow) - -// Operators (@operator.word -> yellow) -int arithmetic(int a, int b) { - int r; - r = a + b; - r = a - b; - r = a * b; - r = a / b; - r++; - r--; - r += a; - r -= b; - if (a == b) return 0; - if (a != b) return 1; - if (a < b) return -1; - if (a > b) return 1; - if (a <= b && a >= 0) return a; - if (a || b) return a; - if (!a) return b; - r = a << 2; - r = a >> 1; - return r; -} - -// Bitwise operators (@delimiter.special -> brightmagenta) -int bitwise(int a, int b) { - int r; - r = a & b; - r = a | b; - r = a ^ b; - r = ~a; - return r; -} - -// Scope resolution (@operator.word -> yellow) -void scope_demo() { - std::cout << "hello" << std::endl; - geometry::add(1, 2); -} - -// String literals (@string -> green) -const char* s1 = "Hello, World!"; -const char* s2 = "escape: \t\n\\\""; - -// System lib string (@string -> green) -// Covered by #include above - -// Raw string literal (@string -> green) -const char* raw = R"(raw string with "quotes")"; - -// Character literals (@string.special -> brightgreen) -char ch1 = 'A'; -char ch2 = '\n'; -char ch3 = '\\'; -char ch4 = '\''; - -// Labels (@label -> cyan) -void label_func() { - goto done; -done: - return; -} - -// Semicolons (@delimiter.special -> brightmagenta) -// Every statement-ending ; - -// Delimiters (@delimiter -> brightcyan) -// Covered by: , ( ) [ ] { } . : - -// Keywords continued -class Derived : public Shape { -public: - explicit Derived(int id) : Shape() { - this->id = id; - } - - double area() const override final { - return 0.0; - } -}; - -// Control flow keywords -void control_flow(int x) { - switch (x) { - case 0: - break; - default: - break; - } - - try { - throw std::runtime_error("error"); - } catch (const std::exception& e) { - // handled - } - - for (int i = 0; i < 10; i++) { - if (i == 5) continue; - } - - while (x > 0) { x--; } - do { x++; } while (x < 5); -} - -// More keywords: sizeof, typedef, static_assert -void more_keywords() { - typedef int Int32; - static_assert(sizeof(int) >= 4, "int too small"); - constexpr int ce = 42; - constinit static int ci = 0; - consteval int square(int n) { return n * n; } - mutable int m; - volatile int v; - extern int e; - inline int il = 0; - alignas(16) int al; - alignof(int); - decltype(ce) dt = ce; - friend class Other; - delete np; - new int(42); - static int sv; -} - -// Co-routines -// co_await, co_return, co_yield - -// Comments (@comment -> brown) -/* Block comment */ -// Line comment diff --git a/tests/syntax/samples/css-report.md b/tests/syntax/samples/css-report.md deleted file mode 100644 index 601ef4f321..0000000000 --- a/tests/syntax/samples/css-report.md +++ /dev/null @@ -1,86 +0,0 @@ -CSS syntax highlighting: TS vs Legacy comparison report -======================================================= - -Sample file: `tests/syntax/samples/css.css` -Legacy reference: `misc/syntax/css.syntax` -TS query: `misc/syntax-ts/queries-override/css-highlights.scm` -TS colors: `misc/syntax-ts/colors.ini` `[css]` - -Aligned with legacy -------------------- - -- Comments (`/* */`): `brown` - MATCH -- Tag selectors (`body`, `h1`, `h2`, `div`, `p`, `a`, `input`, `li`): `white` - via `@tag.special` - MATCH -- Class selectors (`.container`, `.box`): `green` via `@string` - MATCH -- Pseudo-classes (`:hover`, `:visited`, `:first-child`): `brightmagenta` via - `@delimiter.special` - MATCH -- Pseudo-elements (`::before`, `::after`): `white` via `@tag.special` - MATCH -- Property names (`margin`, `padding`, `color`, `font-size`, etc.): `lightgray` - via `@constant` - MATCH -- Color hex values (`#333333`, `#ff0000`): `red` via `@comment.error` - MATCH -- Numbers with units (`16px`, `1.5em`, `10pt`, `300px`): `brightgreen` via - `@string.special` - MATCH -- Plain values (`bold`, `center`, `auto`, `block`, etc.): `brightgreen` via - `@string.special` - MATCH -- `!important`: `brightred` via `@function.special` - MATCH -- Semicolons: `brightmagenta` via `@delimiter.special` - MATCH -- Commas and colons: `brightcyan` via `@delimiter` - MATCH -- `@media` at-rule: `brightred` via `@function.special` - MATCH -- `@import` at-rule: `brightred` via `@function.special` - MATCH -- `@charset` at-rule: `brightred` via `@function.special` - MATCH -- `@keyframes` at-rule: `brightred` via `@function.special` - MATCH -- `@supports` at-rule: `brightred` via `@function.special` - MATCH -- `@namespace` at-rule: `brightred` via `@function.special` - MATCH -- `@scope` at-rule: `brightred` via `@function.special` - MATCH -- Function names (`url`, `calc`, `rotate`, `blur`, `linear-gradient`, - `translateX`, `repeat`, `scale`): `magenta` via `@keyword.directive` - MATCH -- Logical operators (`and`, `not`, `only`): `yellow` via `@keyword` - MATCH -- String values (`"UTF-8"`, `"reset.css"`, `"Hello, world!"`): `white` via - `@keyword.other` - MATCH -- ID selectors (`#main-content`): `green` via `@string` - MATCH -- Combinators (`>`, `+`, `~`): `brightcyan` via `@operator` - MATCH -- Attribute selector operators (`=`, `^=`, `$=`, `*=`): `brightcyan` via - `@operator` - MATCH - -Intentional improvements over legacy -------------------------------------- - -- TS recognizes ALL CSS property names via the grammar's `property_name` node, - while legacy has a fixed list of known properties. Properties like `z-index`, - `transition`, `transform`, `grid-template-columns`, `gap`, `background-size`, - `border-radius`, `opacity`, `filter` are all properly colored `lightgray` by - TS. Legacy misses `z-index`, `transition`, `border-radius`, `background-size`, - `grid-template-columns`, `gap` and others. -- TS properly handles `#abc` (3-digit hex) as `red` color value. Legacy only - matched 6-digit hex codes. -- TS parses percentage values (`50%`, `100%`) as `brightgreen` (number + unit), - consistent with other numeric values. Legacy colored percentages as - `brightred`. -- TS handles `@scope` at-rule which legacy did not know about. -- TS highlights `nth-child()` arguments properly, with the pseudo-class in - `brightmagenta`. -- TS properly handles decimal numbers like `0.85`, `0.3s`, `1.2` as complete - tokens in `brightgreen`. Legacy sometimes splits them at the decimal point. -- TS correctly identifies `::` double-colon pseudo-element prefix as - `brightcyan` delimiter separate from the pseudo-element name. -- Curly braces `{` `}` are not highlighted by TS (default color), while legacy - colored them `yellow`. This is a neutral change -- both approaches are - reasonable. - -Known shortcomings ------------------- - -- Legacy colored curly braces `{` `}` in `yellow`, TS leaves them uncolored - (default). This is a minor visual difference. -- Legacy recognized specific font names (`Arial`, `Verdana`, `Helvetica`, - `sans-serif`) as `brightgreen` keywords. TS treats these as plain values via - `@string.special` which also results in `brightgreen`, so the effect is the - same in most cases. -- Legacy had distinct `context` blocks for `counter()`, `counters()`, `rgb()`, - `url()` functions with `magenta` interiors. TS colors only the function name - in `magenta` via `@keyword.directive`, with arguments following normal value - rules. -- Inside media query feature parentheses, TS does not color `max-width` as - `lightgray` property (no property_name node in that context in the legacy - output), though in the TS output it does appear to handle this correctly. diff --git a/tests/syntax/samples/css.css b/tests/syntax/samples/css.css deleted file mode 100644 index 649fae5cb4..0000000000 --- a/tests/syntax/samples/css.css +++ /dev/null @@ -1,212 +0,0 @@ -/* CSS Syntax Highlighting Sample */ -/* Demonstrates all TS captures for CSS grammar */ - -/* @charset at-rule */ -@charset "UTF-8"; - -/* @import at-rule */ -@import url("reset.css"); -@import "typography.css"; - -/* @namespace at-rule */ -@namespace svg url(http://www.w3.org/2000/svg); - -/* Tag selectors */ -body { - margin: 0; - padding: 0; - font-family: Arial, sans-serif; - font-size: 16px; - line-height: 1.5; - color: #333333; - background-color: #ffffff; -} - -h1, h2, h3 { - font-weight: bold; - margin-bottom: 1em; -} - -/* Class and ID selectors */ -.container { - max-width: 1200px; - margin: 0 auto; -} - -#main-content { - padding: 20px; -} - -/* Pseudo-classes and pseudo-elements */ -a:hover { - color: #0066cc; - text-decoration: underline; -} - -a:visited { - color: #551a8b; -} - -p:first-child { - margin-top: 0; -} - -li:nth-child(2n+1) { - background-color: #f0f0f0; -} - -p::before { - content: "\2022"; - margin-right: 0.5em; -} - -p::after { - content: ""; - display: block; - clear: both; -} - -/* Combinators and attribute selectors */ -div > p { - color: #444444; -} - -h2 + p { - font-size: 1.1em; -} - -h2 ~ p { - color: #666666; -} - -input[type="text"] { - border: 1px solid #cccccc; -} - -a[href^="https"] { - color: green; -} - -a[href$=".pdf"] { - color: red; -} - -a[href*="example"] { - font-weight: bold; -} - -/* Numbers and units */ -.box { - width: 300px; - height: 200px; - margin: 1.5em; - padding: 10pt; - border-radius: 50%; - opacity: 0.85; - z-index: 100; - transition: all 0.3s ease; -} - -/* Color values */ -.colors { - color: #ff0000; - background: #0a0b0c; - border-color: #abc; -} - -/* Functions */ -.gradient { - background: linear-gradient(to right, #ff0000, #00ff00); - width: calc(100% - 40px); - transform: rotate(45deg) scale(1.2); - filter: blur(5px); -} - -.image { - background-image: url("background.png"); - background-size: cover; -} - -/* !important */ -.override { - color: red !important; - display: block !important; -} - -/* @media at-rule */ -@media screen and (max-width: 768px) { - .container { - max-width: 100%; - padding: 10px; - } - - .box { - width: auto; - } -} - -@media print { - body { - font-size: 12pt; - color: black; - } -} - -/* Logical operators in media queries */ -@media not print and (color) { - body { - background: white; - } -} - -@media only screen and (min-width: 1024px) { - .wide { - display: flex; - } -} - -/* @keyframes at-rule */ -@keyframes fadeIn { - 0% { - opacity: 0; - } - 100% { - opacity: 1; - } -} - -@keyframes slide { - from { - transform: translateX(-100%); - } - to { - transform: translateX(0); - } -} - -/* @supports at-rule */ -@supports (display: grid) { - .layout { - display: grid; - grid-template-columns: repeat(3, 1fr); - gap: 20px; - } -} - -/* @scope at-rule */ -@scope (.card) { - h2 { - font-size: 1.5em; - } -} - -/* String values */ -.quote::before { - content: "Hello, world!"; -} - -/* Semicolons, colons, commas as delimiters */ -.multi { - font: italic bold 16px/1.2 "Helvetica Neue", Arial, sans-serif; - border: 1px solid black; -} diff --git a/tests/syntax/samples/cuda-report.md b/tests/syntax/samples/cuda-report.md deleted file mode 100644 index 63c82ecef9..0000000000 --- a/tests/syntax/samples/cuda-report.md +++ /dev/null @@ -1,91 +0,0 @@ -CUDA syntax highlighting: TS vs Legacy comparison report -======================================================== - -Sample file: `cuda.cu` -Legacy reference: `misc/syntax/cuda.syntax` -TS query: `misc/syntax-ts/queries-override/cuda-highlights.scm` -TS colors: `misc/syntax-ts/colors.ini` `[cuda]` - -Aligned with legacy -------------------- - -- C++ keywords (`break`, `case`, `catch`, `class`, `const`, `constexpr`, - `continue`, `default`, `delete`, `do`, `else`, `enum`, `explicit`, `extern`, - `final`, `for`, `friend`, `if`, `inline`, `namespace`, `new`, `noexcept`, - `override`, `private`, `protected`, `public`, `return`, `sizeof`, `static`, - `struct`, `switch`, `template`, `throw`, `try`, `typedef`, `typename`, - `union`, `using`, `virtual`, `volatile`, `while`): `yellow` - MATCH -- `true`, `false`, `this`: `yellow` - MATCH -- CUDA built-in variables (`threadIdx`, `blockIdx`, `blockDim`, `gridDim`, - `warpSize`): `white` - MATCH -- CUDA function qualifiers (`__global__`, `__device__`, `__host__`): `white` - (TS) vs `white` (legacy) - MATCH -- CUDA memory qualifiers (`__shared__`, `__constant__`, `__managed__`, - `__restrict__`): `white` (TS) vs `white` (legacy) - MATCH -- CUDA force-inline qualifiers (`__noinline__`, `__forceinline__`): `white` - - MATCH -- CUDA synchronization (`__syncthreads`, `__threadfence`): `white` - MATCH -- Preprocessor directives (`#include`, `#define`, `#ifdef`, `#if`, `#endif`, - `#pragma`): `brightred` - MATCH -- Primitive types (`int`, `float`, `double`, `void`, `char`, `long`): `yellow` - - MATCH -- Sized type specifiers (`unsigned int`, `signed int`): `yellow` - MATCH -- String literals: `green` - MATCH -- Character literals: `brightgreen` - MATCH -- Comments (line and block): `brown` - MATCH -- Semicolons: `brightmagenta` - MATCH -- Arithmetic operators (`+`, `-`, `*`, `/`, `%`, `=`, `++`, `--`, `+=`, `-=`): - `yellow` - MATCH -- Comparison operators (`==`, `!=`, `<`, `>`, `<=`, `>=`): `yellow` - MATCH -- Logical operators (`&&`, `||`, `!`): `yellow` - MATCH -- Shift operators (`<<`, `>>`): `yellow` - MATCH -- Scope resolution (`::`): `yellow` - MATCH - -Intentional improvements over legacy -------------------------------------- - -- Preprocessor body: legacy colors entire `#define` lines as `brightred`. TS - only colors the directive keyword (`#define`, `#include`) as `brightred`, - leaving macro body tokens in their natural colors. This is more accurate. -- Include paths: legacy colors `` as `red` (preprocessor - string). TS colors them as `green` (`system_lib_string`). This is consistent - with general string coloring. -- `nullptr` keyword: TS colors it as `lightgray` (@constant) to distinguish it - from other keywords. Legacy colors it the same as all keywords (`yellow`). - Note: The TS query maps `nullptr` to `@keyword` but `null` to `@constant`, - creating a subtle inconsistency where `nullptr` gets `lightgray` instead of - `yellow`. -- `NULL` macro: TS colors it as `lightgray` (@constant) via the `(null)` node. - Legacy does not recognize `NULL` as a keyword and leaves it as DEFAULT. TS - correctly identifies this constant. -- Labels: TS recognizes `statement_identifier` nodes and colors goto labels - (`done:`) as `cyan`. Legacy does not have label support. -- Bitwise operators (`&`, `|`, `^`, `~`): TS colors them as `yellow` - (@operator.word), matching the rest of the operators. Legacy treats them - inconsistently. -- Delimiter dots: TS colors `.` as `brightcyan` in CUDA built-in variable member - access (`threadIdx.x`). Legacy leaves `.` as DEFAULT. - -Known shortcomings ------------------- - -- CUDA qualifiers not highlighted in function signatures: TS uses `#match?` - predicates on `(identifier)` nodes to match `__global__`, `__device__`, - `__host__` etc. However, some positions (e.g., function signature context) may - not match because the tree-sitter C++ grammar parses these as different node - types. In the output, qualifiers in function declaration positions appear as - DEFAULT while those in variable declarations match correctly. -- Brackets, parens, braces (`(`, `)`, `{`, `}`, `[`, `]`): legacy colors these - as `brightcyan`. TS is inconsistent -- some are colored as `brightcyan` but - many appear as DEFAULT. The TS query does not include bracket/brace/paren - captures for CUDA (unlike the C++ query which has them). The `@delimiter` only - covers `.`, `,`, `:`. -- Kernel launch syntax (`<<<...>>>`): TS does not have special handling for CUDA - triple-angle-bracket syntax. The `<<<` and `>>>` are parsed as shift - operators, leading to inconsistent coloring. -- Format specifiers inside strings are not highlighted. Tree-sitter does not - parse printf format strings. -- Raw string literal: TS colors the entire `R"(...)"` as `green`. Legacy - similarly colors it uniformly. -- `?` ternary operator and `->` arrow: legacy colors `?` as `brightcyan`. TS - captures `->` as `@operator.word` but does not capture `?`. diff --git a/tests/syntax/samples/cuda.cu b/tests/syntax/samples/cuda.cu deleted file mode 100644 index 5195903b5a..0000000000 --- a/tests/syntax/samples/cuda.cu +++ /dev/null @@ -1,206 +0,0 @@ -/* CUDA syntax sample: demonstrates all TS capture groups */ -/* This file exercises every capture in cuda-highlights.scm */ - -// Line comment -// TODO: compute kernel - -#include -#include -#include "mykernel.h" - -#define BLOCK_SIZE 256 -#define GRID_SIZE(n) (((n) + BLOCK_SIZE - 1) / BLOCK_SIZE) - -#ifdef USE_SHARED -#define SMEM_SIZE 1024 -#endif - -#if defined(__CUDA_ARCH__) -#pragma unroll -#endif - -// CUDA qualifiers (@keyword.other -> white) -__global__ void vectorAdd(float* a, float* b, float* c, int n) -{ - int idx = threadIdx.x + blockIdx.x * blockDim.x; - if (idx < n) - { - c[idx] = a[idx] + b[idx]; - } -} - -__device__ float deviceFunc(float x, float y) -{ - return x * y; -} - -__host__ __device__ float hostDeviceFunc(float x) -{ - return x * x; -} - -// CUDA built-in variables (@keyword.other -> white) -__global__ void kernel() -{ - int tx = threadIdx.x; - int ty = threadIdx.y; - int bx = blockIdx.x; - int by = blockIdx.y; - int bdx = blockDim.x; - int bdy = blockDim.y; - int gx = gridDim.x; - int gy = gridDim.y; - int ws = warpSize; -} - -// CUDA memory qualifiers -__shared__ float sharedMem[256]; -__constant__ float constData[64]; -__managed__ int managedVar; -__restrict__ float* restrictPtr; - -// CUDA synchronization (@keyword.other -> white) -__global__ void syncKernel() -{ - __shared__ float temp[256]; - temp[threadIdx.x] = 0.0f; - __syncthreads(); - __threadfence(); -} - -// Keywords (@keyword -> yellow) -class CudaHelper { -public: - explicit CudaHelper(int deviceId) { - this->deviceId = deviceId; - } - virtual ~CudaHelper() = default; -protected: - int deviceId; -private: - bool initialized; -}; - -struct DeviceInfo { - int major; - int minor; -}; - -enum MemType { Host, Device, Managed }; - -namespace cuda_utils { - using namespace std; - template - T* allocate(int n) { - T* ptr; - return ptr; - } -} - -// true, false, nullptr, this (@keyword -> yellow) -bool flag = true; -bool off = false; -void* np = nullptr; - -// Primitive types (@type -> yellow) -void func_void(); -int func_int(int a); -float func_float(float f); -double func_double(double d); -unsigned int ui; -signed int si; -long l; - -// Operators (@operator.word -> yellow) -int ops(int a, int b) { - int r; - r = a + b; - r = a - b; - r = a * b; - r = a / b; - r = a % b; - r++; - r--; - r += a; - r -= b; - if (a == b) return 0; - if (a != b) return 1; - if (a < b) return -1; - if (a > b) return 1; - if (a <= b && a >= 0) return a; - if (a || b) return a; - if (!a) return b; - r = a & b; - r = a | b; - r = a ^ b; - r = ~a; - r = a << 2; - r = a >> 1; - return r; -} - -// Scope resolution -void scope_demo() { - cuda_utils::allocate(100); -} - -// Strings (@string -> green) -const char* s1 = "Hello, CUDA!"; -const char* s2 = "escape: \t\n\\\""; -const char* raw = R"(raw string)"; - -// Character literals (@string.special -> brightgreen) -char ch1 = 'A'; -char ch2 = '\n'; -char ch3 = '\\'; - -// null (@constant -> lightgray) -void* pnull = NULL; - -// Labels (@label -> cyan) -void label_func() { - goto done; -done: - return; -} - -// Delimiters (@delimiter -> brightcyan): . , : -// Semicolons (@delimiter.special -> brightmagenta): ; -// Arrow operator: -> - -// Control flow -void control(int x) { - switch (x) { - case 0: break; - default: break; - } - try { throw 1; } catch (int e) { } - for (int i = 0; i < 10; i++) { continue; } - while (x > 0) { x--; } - do { x++; } while (x < 5); -} - -// More keywords -typedef int Int32; -union DataUnion { int i; float f; }; -volatile int vvar; -inline void ifunc() {} -static int svar; -extern int evar; -using IntVec = int; -struct Final final {}; -void sized() { sizeof(int); } - -// Host code launching kernel -int main() { - float* d_a; - float* d_b; - float* d_c; - int n = 1024; - vectorAdd<<>>(d_a, d_b, d_c, n); - return 0; -} - -// Comments (@comment -> brown) -/* Block comment */ -// Line comment diff --git a/tests/syntax/samples/d-report.md b/tests/syntax/samples/d-report.md deleted file mode 100644 index eb91a25fee..0000000000 --- a/tests/syntax/samples/d-report.md +++ /dev/null @@ -1,94 +0,0 @@ -D syntax highlighting: TS vs Legacy comparison report -===================================================== - -Sample file: `d.d` -Legacy reference: `misc/syntax/d.syntax` -TS query: `misc/syntax-ts/queries-override/d-highlights.scm` -TS colors: `misc/syntax-ts/colors.ini` `[d]` - -Aligned with legacy -------------------- - -- Keywords (`abstract`, `alias`, `assert`, `break`, `case`, `cast`, `catch`, - `class`, `const`, `continue`, `debug`, `default`, `delegate`, `delete`, - `deprecated`, `do`, `else`, `enum`, `export`, `extern`, `final`, `finally`, - `for`, `foreach`, `foreach_reverse`, `function`, `goto`, `if`, `interface`, - `invariant`, `in`, `is`, `lazy`, `mixin`, `module`, `new`, `out`, `override`, - `package`, `pragma`, `private`, `protected`, `public`, `return`, `scope`, - `static`, `struct`, `switch`, `synchronized`, `template`, `throw`, `try`, - `typeid`, `typeof`, `union`, `unittest`, `version`, `while`, `with`): `yellow` - - MATCH -- `import` keyword: `magenta` - MATCH -- Types (`auto`, `bool`, `byte`, `ubyte`, `short`, `ushort`, `int`, `uint`, - `long`, `ulong`, `float`, `double`, `real`, `char`, `wchar`, `dchar`, - `string`, `wstring`, `dstring`, `void`): `yellow` - MATCH -- `true`, `false`, `null`, `super`, `this`: `brightred` - MATCH -- String literals: `green` - MATCH -- Raw strings (`r"..."`): `green` - MATCH -- Hex strings (`x"..."`): `green` (TS) vs `brightgreen` (legacy) - CLOSE (legacy - treats hex strings specially) -- Token strings (`q{...}`): `green` - MATCH -- Character literals: `brightgreen` - MATCH -- Comments (line `//` and block `/* */`): `brown` - MATCH -- Semicolons: `brightmagenta` - MATCH -- Arithmetic operators (`+`, `-`, `*`, `/`, `%`, `=`, `++`, `--`, `+=`, `-=`, - `*=`, `/=`, `%=`): `yellow` - MATCH -- Comparison operators (`==`, `!=`, `<`, `>`, `<=`, `>=`): `yellow` - MATCH -- Logical operators (`&&`, `||`, `!`): `yellow` - MATCH -- Bitwise operators (`&`, `|`, `^`, `~`, `<<`, `>>`, `&=`, `|=`, `^=`, `<<=`, - `>>=`): `yellow` - MATCH -- Power operator (`^^`): `yellow` - MATCH -- Range operator (`..`): `yellow` (TS) vs `yellow` (legacy) - MATCH -- Lambda operator (`=>`): `yellow` - MATCH -- Labels (`end:`): `cyan` - MATCH - -Intentional improvements over legacy -------------------------------------- - -- Special keywords (`__FILE__`, `__LINE__`): TS colors them as `brightred` - (@function.special). Legacy colors them as `red`. TS uses a brighter variant - for better visibility. -- `size_t` and `ptrdiff_t` types: TS correctly highlights these as `yellow` - (@type). Legacy does not recognize them, leaving them as DEFAULT. -- `immutable`, `nothrow`, `pure`, `shared` keywords: TS correctly highlights - these as `yellow` (@keyword). Legacy includes some of these but TS provides - more consistent coverage. -- Module fully-qualified name (`syntax_demo` after `module`): TS colors it as - `yellow` (@type) via the `module_fqn` capture. Legacy does not highlight the - module name. -- Delimiter distinction: TS separates `.` and `,` as `brightcyan` (@delimiter) - from operators. Legacy colors many delimiters as `brightcyan` too but is - inconsistent with `:` (legacy uses `brightcyan`, TS uses `yellow` as - @operator.word for `:` since it appears in slice expressions). -- `Object` class name: TS does not special-case `Object` (appears as DEFAULT). - Legacy colors it as `brightmagenta` (special object). TS approach is more - accurate since `Object` is a library type, not a language keyword. - -Known shortcomings ------------------- - -- Brackets, parens, braces (`(`, `)`, `{`, `}`, `[`, `]`): legacy colors these - as `brightcyan`. TS does not capture them in the D query. Many brackets appear - as DEFAULT in TS output. Only a few contexts (array literals, function params) - get colored via other captures. -- Operator overload names (`opAdd`, `opSub`, `opCmp`, etc.): legacy colors these - as `gray` (special operator names). TS does not recognize them as they are - regular identifiers to tree-sitter. -- D array/type attributes (`.length`, `.ptr`, `.sizeof`, `.init`, etc.): legacy - highlights these as `yellow` via `wholeright` rules. TS does not recognize - property-style attributes. -- Hex string: TS colors `x"48 65 6C 6C 6F"` as `green` (generic string). Legacy - colors it as `brightgreen` to distinguish hex strings from regular strings. -- Nested block comments (`/+ ... +/`): legacy has support for D's nestable block - comments. TS should support them via the grammar but the sample does not test - them. -- `lazy` keyword context issue: TS output shows `lazy` followed by `int` colored - as `red` (error node) in certain positions, suggesting a tree-sitter parse - issue with `lazy` parameter declarations outside of function signatures. -- `?` ternary and `$` operators: legacy colors `?` and `$` as `brightcyan`. TS - does not capture these. -- D-specific predefined entities (`_argptr`, `_arguments`): legacy colors these - as `brightred`. TS does not recognize them. -- Escape sequences inside strings: legacy colors `\n`, `\\`, etc. as - `brightgreen` within string contexts. TS does not distinguish escapes inside - strings (all part of `@string` in `green`). diff --git a/tests/syntax/samples/d.d b/tests/syntax/samples/d.d deleted file mode 100644 index 24594f70e3..0000000000 --- a/tests/syntax/samples/d.d +++ /dev/null @@ -1,205 +0,0 @@ -// D syntax sample: demonstrates all TS capture groups -// This file exercises every capture in d-highlights.scm - -// import (@keyword.directive -> magenta) -module syntax_demo; -import std.stdio; -import std.string; - -// Keywords (@keyword -> yellow) -class Shape { - abstract void draw(); - final int getId() { return 0; } -} - -struct Point { - int x; - int y; -} - -union Value { - int i; - float f; -} - -enum Color { Red, Green, Blue } - -interface Drawable { - void render(); -} - -// More keywords -void keywords_demo() { - alias IntArray = int[]; - assert(true); - const int cv = 42; - immutable int iv = 100; - static int sv = 0; - extern int ev; - export void efunc() {} - deprecated("old") void oldFunc() {} - pragma(msg, "compiling"); - synchronized void syncFunc() {} - scope(exit) {} - nothrow void ntFunc() {} - pure int pureFunc() { return 0; } - lazy int lazyParam; - override void draw() {} - private int priv; - protected int prot; - public int pub; - package int pkg; - ref int refParam; - shared int sharedVar; - mixin template Mix() {} - version(linux) {} - debug {} -} - -// Control flow keywords (@keyword -> yellow) -void control_flow() { - for (int i = 0; i < 10; i++) { - if (i == 5) continue; - if (i == 8) break; - } - foreach (item; [1, 2, 3]) {} - foreach_reverse (item; [3, 2, 1]) {} - while (true) { break; } - do { } while (false); - switch (1) { - case 0: break; - default: break; - } - try { - throw new Exception("error"); - } catch (Exception e) { - } finally { - } - goto end; -end: - return; -} - -// Types (@type -> yellow) -void type_demo() { - auto a = 42; - bool b = true; - byte by = 0; - ubyte uby = 0; - short s = 0; - ushort us = 0; - int i = 0; - uint ui = 0; - long l = 0; - ulong ul = 0; - float f = 1.0; - double d = 1.0; - real r = 1.0; - char c = 'x'; - wchar wc; - dchar dc; - string str = "hello"; - wstring ws; - dstring ds; - size_t sz; - ptrdiff_t pt; - void* vp; -} - -// true, false, null, super, this (@variable.builtin -> brightred) -class Derived : Shape { - this() { super(); } - bool active = true; - bool inactive = false; - Object obj = null; - void method() { this.draw(); } -} - -// Special keywords (@function.special -> brightred) -void special() { - auto f = __FILE__; - auto l = __LINE__; -} - -// module_fqn (@type -> yellow) -// Covered by: module syntax_demo; above - -// Operators (@operator.word -> yellow) -void operators() { - int a = 10; - int b = 3; - int r; - r = a + b; - r = a - b; - r = a * b; - r = a / b; - r = a % b; - r = a ^^ b; - r++; - r--; - r += a; - r -= b; - r *= 2; - r /= 2; - r %= 3; - if (a == b) {} - if (a != b) {} - if (a < b) {} - if (a > b) {} - if (a <= b) {} - if (a >= b) {} - if (a && b) {} - if (a || b) {} - if (!a) {} - r = a & b; - r = a | b; - r = a ^ b; - r = ~a; - r = a << 2; - r = a >> 1; - r &= b; - r |= b; - r ^= b; - r <<= 1; - r >>= 1; - auto rng = 0 .. 10; - auto fn = (int x) => x * x; - int[string] aa; - aa["key" : "val"]; -} - -// String literals (@string -> green) -string s1 = "Hello, D!"; -string raw = r"raw\nstring"; -string hex = x"48 65 6C 6C 6F"; -string tok = q{auto x = 1;}; - -// Character literal (@string.special -> brightgreen) -char c1 = 'A'; -char c2 = '\n'; -char c3 = '\\'; - -// Labels (@label -> cyan) -void label_demo() { - goto target; -target: - return; -} - -// Delimiters (@delimiter -> brightcyan): . , -// Semicolons (@delimiter.special -> brightmagenta): ; - -// Comments (@comment -> brown) -/* Block comment */ -// Line comment - -// unittest, delegate, function, template, typeid, typeof -unittest { - assert(typeid(int) !is null); - auto dg = delegate int() { return 42; }; - auto fn = function int() { return 0; }; - typeof(42) x; - delete x; - new int; - with (Point(1, 2)) {} -} diff --git a/tests/syntax/samples/diff-report.md b/tests/syntax/samples/diff-report.md deleted file mode 100644 index f6d847253e..0000000000 --- a/tests/syntax/samples/diff-report.md +++ /dev/null @@ -1,49 +0,0 @@ -Diff syntax highlighting: TS vs Legacy comparison report -========================================================== - -Sample file: `diff.diff` -Legacy reference: `misc/syntax/diff.syntax` -TS query: `misc/syntax-ts/queries-override/diff-highlights.scm` -TS colors: `misc/syntax-ts/colors.ini` `[diff]` - -Aligned with legacy -------------------- - -- Comment lines (`# ...`): `brightcyan` (TS `tag`) - MATCH -- `diff` command lines: `brightred` (TS `function.special`) - MATCH (legacy uses - `white` on `red` background, TS uses `brightred` foreground; both use red - emphasis) -- `---`/`+++` filenames: `brightmagenta` (TS `markup.heading`) - MATCH -- `@@` location headers: `brightcyan` (TS `delimiter`) - MATCH -- Addition lines (`+`): `brightgreen` (TS `markup.addition`) - MATCH -- Deletion lines (`-`): `brightred` (TS `markup.deletion`) - MATCH -- Context lines (` ` prefix): `yellow` (TS `keyword`) - MATCH (legacy uses - `yellow` as default context) - -Intentional improvements over legacy -------------------------------------- - -- TS separately identifies `similarity`, `index`, and `mode` lines with `brown` - (`comment` capture), giving them distinct muted color; legacy colors `index` - and `similarity` lines as generic `yellow` default context. -- TS highlights `file_change` nodes (`new file mode`, `deleted file mode`, - `rename from/to`) as `brightmagenta` (`markup.heading`), clearly - distinguishing them from plain context; legacy treats some of these as default - `yellow`. -- TS parses filenames within `---`/`+++` lines granularly, coloring the - `---`/`+++` prefix separately from the filename path; legacy colors the entire - line as `brightmagenta`. -- `mode` lines (e.g., `mode 100644`) are colored as `yellow` (`keyword`) in TS - when not standalone, providing distinct emphasis. - -Known shortcomings ------------------- - -- Legacy colors the leading space of context lines as `black` on `white` - background (visible whitespace indicator); TS has no equivalent per-character - background highlighting. -- Legacy context lines starting with whitespace get `lightgray` color for the - text content; TS colors the entire context line uniformly as `yellow`. -- The `diff` command line prefix text (`diff --git`) in TS is split between - `brightred` (`function.special`) and `brightmagenta` (`markup.heading` for - filenames); legacy colors the entire line as `white` on `red` background. diff --git a/tests/syntax/samples/diff.diff b/tests/syntax/samples/diff.diff deleted file mode 100644 index 47461a6d81..0000000000 --- a/tests/syntax/samples/diff.diff +++ /dev/null @@ -1,61 +0,0 @@ -# This is a comment about the patch -# Another comment line -diff --git a/src/main.c b/src/main.c -index abc1234..def5678 100644 ---- a/src/main.c -+++ b/src/main.c -@@ -10,8 +10,12 @@ static int init_module(void) - context line stays the same - another context line here -- int old_variable = 0; -- cleanup(old_variable); -+ int new_variable = 42; -+ if (new_variable > 0) { -+ process(new_variable); -+ } -+ cleanup(new_variable); - remaining context -diff --git a/README.md b/README.md -new file mode 100644 ---- /dev/null -+++ b/README.md -@@ -0,0 +1,5 @@ -+# Project Title -+ -+A new readme file for the project. -+ -+See docs for details. -diff --git a/old_module.c b/old_module.c -deleted file mode 100644 ---- a/old_module.c -+++ /dev/null -@@ -1,4 +0,0 @@ --#include --void old_func(void) { -- printf("removed\n"); --} -similarity index 85% -rename from utils.c -rename to helpers.c -index 1111111..2222222 100644 ---- a/utils.c -+++ b/helpers.c -@@ -1,6 +1,6 @@ - #include "helpers.h" --static const char *name = "utils"; -+static const char *name = "helpers"; - void helper_init(void) { -- log("utils init"); -+ log("helpers init"); - } -diff --git a/config.h b/config.h -index aaa1111..bbb2222 100644 -mode 100644 ---- a/config.h -+++ b/config.h -@@ -5,3 +5,4 @@ - #define VERSION_MAJOR 1 - #define VERSION_MINOR 0 --#define VERSION_PATCH 3 -+#define VERSION_PATCH 4 -+#define VERSION_TAG "release" diff --git a/tests/syntax/samples/dot-report.md b/tests/syntax/samples/dot-report.md deleted file mode 100644 index a89bf40a53..0000000000 --- a/tests/syntax/samples/dot-report.md +++ /dev/null @@ -1,68 +0,0 @@ -DOT/Graphviz syntax highlighting: TS vs Legacy comparison -========================================================== - -Sample file: `dot.dot` -Legacy reference: `misc/syntax/dot.syntax` -TS query: `misc/syntax-ts/queries-override/dot-highlights.scm` -TS colors: `misc/syntax-ts/colors.ini` `[dot]` - -Aligned with legacy -------------------- - -- Graph keywords (`digraph`, `graph`, `subgraph`): `brightred` - MATCH -- Node/edge keywords (`node`, `edge`): `yellow` - MATCH -- Edge operators (`->`, `--`): `brightred` - MATCH -- Attribute names (`rankdir`, `label`, `fontname`, `fontsize`, `bgcolor`, - `splines`, `nodesep`, `shape`, `style`, `fillcolor`, `color`, `arrowhead`, - `arrowsize`, `penwidth`, `tooltip`, `constraint`, `dir`, `weight`, `width`): - `white` - MATCH -- Assignment operator (`=`): `yellow` - MATCH -- Strings (double-quoted `"..."`): `green` - MATCH -- Comments (line `//` and block `/* */`): `brown` - MATCH -- Brackets (`{`, `}`, `[`, `]`): `brightcyan` - MATCH -- Commas (`,`): `brightcyan` - MATCH -- Semicolons (`;`): `brightmagenta` - MATCH -- Preprocessor (`#include`): `brightred` - MATCH - -Intentional improvements over legacy -------------------------------------- - -- HTML strings (angle-bracket `<...>`): TS colors the entire HTML string as - `green` via `@string`, with HTML tags recognized internally. Legacy colors - angle brackets as `green` and recognizes `` elements as `cyan` within the - HTML content. TS provides structural HTML parsing (attribute names as - `yellow`, attribute values as `cyan`, tags as `brightcyan`). -- Cluster subgraph names: legacy colors `cluster_` prefix as `yellow` via - `wholeleft cluster_`. TS does not special-case the `cluster_` prefix -- the - entire identifier after `subgraph` appears uncolored (default). -- Unquoted attribute values (e.g., `box`, `ellipse`, `diamond`, `true`, - `dashed`, `LR`): legacy does not color these (default text). TS also does not - color them, but they appear as `red` in the TS dump. This is because the TS - DOT grammar marks uncaptured text in red as a fallback -- this is a rendering - artifact, not an intentional color. - -Known shortcomings ------------------- - -- TS shows `red` coloring for node identifiers (e.g., `SampleGraph`, `start`, - `process_a`, `process_b`, `decision`, `end_node`, `api_server`, etc.) and - unquoted attribute values (e.g., `LR`, `box`, `ellipse`, `14`, `0.8`, - `normal`, `true`, `dashed`, `back`, `blue`). Legacy leaves these as default - (uncolored). The `red` appears to be a fallback color for uncaptured nodes in - the TS grammar. This is a regression -- identifiers and unquoted values should - appear in default text color. -- The `strict` keyword is defined in the TS query but not exercised in the - sample (no `strict graph` or `strict digraph`). Legacy also defines `strict` - as `brightred`. -- TS does not recognize the `cluster_` prefix specially. Legacy colors - `cluster_` as `yellow` via `wholeleft` matching, which helps visually identify - cluster subgraphs. TS treats `cluster_services` and `cluster_frontend` as - plain identifiers. -- The undirected `graph` keyword at the top level does not get - `@function.special` coloring in TS when it is the first token on a line - outside of a digraph block. In the sample output, the last `graph - UndirectedSample` line shows `graph` without color in the TS dump. This - appears to be a parsing issue. -- The `operator` node type for `=` is captured as `@operator.word` (`yellow`) in - both, but the `:` is defined in the legacy syntax as `brightcyan` while TS - does not separately capture `:` (it is not listed in the query). diff --git a/tests/syntax/samples/dot.dot b/tests/syntax/samples/dot.dot deleted file mode 100644 index 24a6ef4e5b..0000000000 --- a/tests/syntax/samples/dot.dot +++ /dev/null @@ -1,128 +0,0 @@ -// Sample Graphviz DOT file demonstrating syntax highlighting -// This file exercises all TS captures from dot-highlights.scm - -// Simple digraph with edge operator -> -digraph SampleGraph { - // Graph-level attributes - rankdir = LR; - label = "Sample Graph"; - fontname = "Helvetica"; - fontsize = 14; - bgcolor = "white"; - splines = true; - nodesep = 0.8; - - // Node defaults -> @keyword - node [ - shape = box, - style = "filled,rounded", - fillcolor = "#E8E8E8", - fontname = "Courier", - fontsize = 12, - width = 1.5 - ]; - - // Edge defaults -> @keyword - edge [ - color = "#333333", - arrowhead = normal, - arrowsize = 0.8, - penwidth = 1.2 - ]; - - // Nodes with attributes - start [ - label = "Start", - shape = ellipse, - fillcolor = "#90EE90" - ]; - process_a [label = "Process A", tooltip = "First step"]; - process_b [label = "Process B", tooltip = "Second step"]; - decision [ - label = "Decision?", - shape = diamond, - fillcolor = "#FFD700" - ]; - end_node [ - label = "End", - shape = ellipse, - fillcolor = "#FFB6C1" - ]; - - // Edges with -> operator - start -> process_a [label = "begin"]; - process_a -> process_b [ - label = "next", - style = dashed, - constraint = true - ]; - process_b -> decision; - decision -> end_node [label = "yes", color = "green"]; - decision -> process_a [ - label = "no", - color = "red", - style = dotted - ]; - - // Subgraph with cluster prefix - subgraph cluster_services { - label = "Backend Services"; - style = filled; - fillcolor = "#F0F8FF"; - color = "#4169E1"; - - api_server [label = "API Server"]; - database [label = "Database", shape = cylinder]; - cache [label = "Cache", shape = hexagon]; - - api_server -> database [label = "query"]; - api_server -> cache [label = "lookup"]; - database -> cache [ - label = "populate", - dir = back - ]; - } - - // Another subgraph - subgraph cluster_frontend { - label = "Frontend"; - color = "#228B22"; - style = dashed; - - web_app [label = "Web App"]; - mobile [label = "Mobile App"]; - } - - // Cross-cluster edges - web_app -> api_server; - mobile -> api_server; - process_b -> web_app [style = bold, label = "deploy"]; - - /* Block comment demonstrating - multi-line comment syntax - for the DOT language */ - - // HTML labels - report [label = < - - - -
Report
Generated
- >]; - database -> report [label = "export"]; -} - -// Preprocessor line -#include "common.dot" - -// Undirected graph with -- operator -graph UndirectedSample { - // Strict would prevent duplicate edges - a -- b [weight = 1.0]; - b -- c; - c -- a [label = "cycle"]; - - a [label = "Node A"]; - b [label = "Node B", color = blue]; - c [label = "Node C"]; -} diff --git a/tests/syntax/samples/erlang-report.md b/tests/syntax/samples/erlang-report.md deleted file mode 100644 index e3a08b5e8a..0000000000 --- a/tests/syntax/samples/erlang-report.md +++ /dev/null @@ -1,78 +0,0 @@ -Erlang syntax highlighting: TS vs Legacy comparison report -============================================================ - -Sample file: `erlang.erl` -Legacy reference: `misc/syntax/erlang.syntax` -TS query: `misc/syntax-ts/queries-override/erlang-highlights.scm` -TS colors: `misc/syntax-ts/colors.ini` `[erlang]` - -Aligned with legacy -------------------- - -- Control flow keywords (`fun`, `end`, `if`, `case`, `of`, `receive`, `after`, - `when`, `begin`, `try`, `catch`, `throw`): `yellow` - MATCH -- Module directives (`-module`, `-export`, `-compile`, `-record`, `-define`): - `brightmagenta` - MATCH -- Word operators (`and`, `andalso`, `band`, `bnot`, `bor`, `bsl`, `bsr`, `bxor`, - `div`, `not`, `or`, `orelse`, `rem`, `xor`): `brown` - MATCH -- Comparison operators (`==`, `/=`, `=:=`, `=/=`, `<`, `>`, `=<`, `>=`): `brown` - - MATCH -- Comments (`%` to end of line): `brown` - MATCH -- Strings (double-quoted `"..."`): `green` - MATCH String escape sequences - (`~n`, `~p`, `~w`) colored as `brightgreen` in both engines. -- Atoms (lowercase identifiers like `ok`, `error`, `zero`, `positive`, - `negative`, `pong`, `stop`, `ping`, `data`, `hello_world`): `lightgray` - - MATCH -- Variables (uppercase identifiers like `N`, `X`, `A`, `B`, `Result`, `List`, - `Person`, `From`, `Payload`, `Pid`, `Value`, `Pairs`, `Bin`): `white` - MATCH -- Arrow operators (`->`, `<-`): `yellow` - MATCH -- Assignment/send (`=`, `!`): `yellow` - MATCH -- Arithmetic operators (`+`, `-`, `*`, `/`, `++`, `--`): `yellow` - MATCH -- Binary operators (`<<`, `>>`): `yellow` in TS, `brightcyan` in legacy. See - below. -- Delimiters (`,`, `.`, `;`, `(`, `)`, `[`, `]`): `brightcyan` - MATCH -- Curly braces (`{`, `}`): `cyan` - MATCH -- Pipe operators (`|`, `||`): `brightcyan` - MATCH -- Function calls (`factorial`, `fib`, `process_list`, `classify`, `check_range`, - `safe_divide`): TS does not currently capture regular function calls, so they - appear as `lightgray` (atoms). Legacy colors these the same way (`lightgray`). -- Module-qualified calls (`io:format`): `brightgreen` in legacy (hardcoded BIF - list). See below. -- Character literals (`$A`, `$\n`): `red` in both - MATCH -- Quoted atoms (`'complex atom name'`): `red` in both - MATCH -- Macro references (`?TIMEOUT`): `red` in both - MATCH -- Boolean atoms (`true`, `false`): `red` in both - MATCH - -The legacy and TS outputs are identical ---------------------------------------- - -The legacy and TS syntax dumps produce exactly the same coloring for every line -in the sample file. All keywords, operators, atoms, variables, strings, -comments, delimiters, and structural elements match perfectly between the two -engines. - -Known shortcomings ------------------- - -- TS captures `module_attribute` and `module_export` as `@delimiter.special` - (`brightmagenta`), which matches legacy. However, legacy also colors - `-behaviour`, `-include`, `-include_lib`, `-vsn`, `-author`, `-copyright` with - specific colors. TS relies on the grammar producing `module_attribute` nodes - for all `-directive` forms. -- `lists:map`, `lists:filter` and similar stdlib calls are colored `darkgray` - (gray) in both engines via legacy's hardcoded keyword list and TS's atom - coloring. This is consistent but the color is unusual. -- TS does not have a separate capture for BIF (built-in function) calls like - `spawn`, `self`, `io:format`. These are colored `brightgreen` in legacy via a - hardcoded BIF list. TS also colors them `brightgreen` through the - `function_clause` and `expr_function_call` captures when matching specific - atoms, producing the same result. -- The `binary` type annotation in `<>` is colored - `lightgray` (atom) in both engines. This is correct behavior since `binary` is - an atom in this context. -- TS does not distinguish between regular atoms and special atoms like - `true`/`false`. Both are captured as `@constant` (`lightgray`). Legacy colors - `true`/`false` as `red`. In the TS dump, these also appear as `red`, - suggesting the grammar does distinguish boolean atoms from regular atoms even - though the query does not have an explicit boolean capture. The `red` color - comes from the character/quoted-atom context in legacy. diff --git a/tests/syntax/samples/erlang.erl b/tests/syntax/samples/erlang.erl deleted file mode 100644 index 4e7e8857e1..0000000000 --- a/tests/syntax/samples/erlang.erl +++ /dev/null @@ -1,172 +0,0 @@ -%% Sample Erlang module demonstrating syntax highlighting -%% This file exercises all TS captures from erlang-highlights.scm - --module(sample). --export([start/0, factorial/1, fib/1, process_list/1]). --compile([export_all, nowarn_export_all]). - -%% Records and type definitions --record(person, {name, age, email}). --define(MAX_RETRIES, 5). --define(TIMEOUT, 30000). - -%% Function with pattern matching and guards -start() -> - io:format("Starting sample module~n"), - Person = #person{name = "Alice", age = 30, email = "a@b.c"}, - Result = factorial(10), - io:format("Factorial of 10: ~p~n", [Result]), - List = [1, 2, 3, 4, 5], - Processed = process_list(List), - io:format("Processed: ~p~n", [Processed]), - ok. - -%% Recursive function with pattern matching -factorial(0) -> 1; -factorial(N) when N > 0 -> - N * factorial(N - 1). - -%% Fibonacci with multiple clauses -fib(0) -> 0; -fib(1) -> 1; -fib(N) when N > 1 -> - fib(N - 1) + fib(N - 2). - -%% List processing with various constructs -process_list([]) -> []; -process_list([H | T]) -> - [H * 2 | process_list(T)]. - -%% Case expression -classify(Value) -> - case Value of - 0 -> zero; - N when N > 0 -> positive; - N when N < 0 -> negative - end. - -%% If expression -check_range(X) -> - if - X > 100 -> large; - X > 10 -> medium; - X > 0 -> small; - true -> non_positive - end. - -%% Try-catch with throw -safe_divide(_, 0) -> - throw(division_by_zero); -safe_divide(A, B) -> - try - A / B - catch - error:badarith -> {error, arithmetic}; - throw:Reason -> {error, Reason} - after - io:format("Division attempted~n") - end. - -%% Receive with timeout -wait_for_message() -> - receive - {ping, From} -> - From ! pong, - wait_for_message(); - {data, Payload} -> - process_data(Payload); - stop -> - ok - after ?TIMEOUT -> - {error, timeout} - end. - -%% Fun (anonymous function) -apply_transform(List) -> - Doubled = lists:map(fun(X) -> X * 2 end, List), - Evens = lists:filter( - fun(X) -> X rem 2 == 0 end, - List - ), - {Doubled, Evens}. - -%% List comprehension with generators and filters -comprehension_demo() -> - Pairs = [{X, Y} || X <- [1, 2, 3], - Y <- [a, b, c], - X > 1], - Pairs. - -%% Binary and bit syntax -binary_demo() -> - Bin = <<1, 2, 3, 4>>, - <> = Bin, - Size = byte_size(Bin), - {A, B, Size}. - -%% Tuple and record operations -record_demo() -> - P1 = #person{name = "Bob", age = 25, email = "b@c.d"}, - Name = P1#person.name, - P2 = P1#person{age = 26}, - {Name, P2}. - -%% Word operators: and, or, not, div, rem, band, bor, etc. -bitwise_demo(A, B) -> - R1 = A band B, - R2 = A bor B, - R3 = A bxor B, - R4 = bnot A, - R5 = A bsl 2, - R6 = A bsr 1, - R7 = A div B, - R8 = A rem B, - Result = (R1 + R2 + R3 + R4 + R5 + R6) * R7, - Cond = (A > 0) and (B > 0), - Other = (A == 0) or (B == 0), - NotA = not (A =:= 0), - Check = (A =/= B) andalso (A =< B), - Final = Check orelse (A >= B), - {Result, R8, Cond, Other, NotA, Final}. - -%% Comparison operators -compare(A, B) -> - Eq = A == B, - Neq = A /= B, - ExEq = A =:= B, - ExNeq = A =/= B, - Lt = A < B, - Gt = A > B, - Lte = A =< B, - Gte = A >= B, - {Eq, Neq, ExEq, ExNeq, Lt, Gt, Lte, Gte}. - -%% Characters and strings -string_demo() -> - Str = "Hello, World!", - Char = $A, - NewLine = $\n, - Atom = hello_world, - QuotedAtom = 'complex atom name', - {Str, Char, NewLine, Atom, QuotedAtom}. - -%% Spawn and message passing -spawn_demo() -> - Pid = spawn(fun() -> wait_for_message() end), - Pid ! {ping, self()}, - receive - pong -> ok - end. - -%% Begin block -begin_demo(X) -> - begin - Y = X + 1, - Z = Y * 2, - Z - end. - -%% Internal helper -process_data(Data) -> - io:format("Data: ~w~n", [Data]), - {ok, Data}. diff --git a/tests/syntax/samples/fortran-report.md b/tests/syntax/samples/fortran-report.md deleted file mode 100644 index d41e926889..0000000000 --- a/tests/syntax/samples/fortran-report.md +++ /dev/null @@ -1,83 +0,0 @@ -Fortran syntax highlighting: TS vs Legacy comparison report -============================================================= - -Sample file: `fortran.f90` -Legacy reference: `misc/syntax/fortran.syntax` -TS query: `misc/syntax-ts/queries-override/fortran-highlights.scm` -TS colors: `misc/syntax-ts/colors.ini` `[fortran]` - -Aligned with legacy -------------------- - -- Type declarations (`integer`, `real`, `character`, `logical`, `complex`, - `double`): `brightcyan` - MATCH -- Declaration keywords (`dimension`, `allocatable`, `intent`, `in`, `out`, - `inout`, `parameter`, `type`, `contains`, `public`, `private`, `pointer`, - `target`, `save`): `brightcyan` - MATCH -- Control flow (`do`, `if`, `then`, `else`, `elseif`, `endif`, `enddo`, `call`, - `return`, `stop`, `continue`, `cycle`, `exit`, `goto`, `while`, `select`, - `case`, `default`, `allocate`): `brightgreen` - MATCH -- I/O keywords (`read`, `write`, `print`, `open`, `close`, `inquire`): - `brightmagenta` - MATCH -- Program structure keywords (`program`, `subroutine`, `function`, `module`, - `end`): `yellow` - MATCH -- Comments (`!` to end of line): `brown` - MATCH -- Strings (single-quoted `'...'` and double-quoted `"..."`): `green` - MATCH -- Boolean literals (`.true.`, `.false.`): `brightred` - MATCH -- Arithmetic operators (`+`, `-`, `*`, `/`, `**`, `=`): `yellow` - MATCH -- Comparison operators (`==`, `/=`, `<`, `>`, `<=`, `>=`): `yellow` - MATCH -- Delimiters (`,`, `::`, `(`, `)`): `brightcyan` - MATCH -- Statement labels (`100`): legacy colors these as `brightred` (matching the - label number pattern in column 1-5). TS also shows `100` at column 1 without - specific label coloring. See differences below. - -Intentional improvements over legacy -------------------------------------- - -- `use` keyword: TS colors `use` as `yellow` (`@keyword`). Legacy colors `use` - inconsistently -- it is not in the legacy keyword list. -- `implicit` keyword: TS colors `implicit` as `yellow` (`@keyword`). Legacy - colors `implicit` as `brightcyan` (declaration). Both are reasonable choices; - TS treats it as a structural keyword. -- `none` keyword: TS does not color `none` separately (appears as default text). - Legacy colors `none` as `brightcyan`. The TS query includes `none` in the - `@keyword` list, but it may not match in all contexts. -- `only` keyword: TS colors `only` as `yellow` (`@keyword`). Legacy does not - have `only` in its keyword list. -- `result` keyword: TS colors `result` as `yellow` (`@keyword`). Legacy does not - have `result` as a keyword. -- `contains` keyword: TS colors `contains` as `brightcyan` (`@property`). Legacy - does not list `contains` as a keyword. -- `recursive` keyword: not in either keyword list; appears as default text in - both engines. - -Known shortcomings ------------------- - -- Logical operators `.and.`, `.or.`, `.not.`, `.eq.`, `.gt.`, `.lt.`, `.ge.`, - `.le.`, `.ne.`, `.eqv.`, `.neqv.`: FIXED -- now captured via field-based - `logical_expression`, `relational_expression`, and `unary_expression` operator - patterns as `@function.special` (`brightred`), matching legacy. -- `name` identifier: legacy colors `name` as `brightmagenta` because it matches - the I/O keyword `name` in the legacy list. TS correctly treats `name` as a - regular identifier when used as a variable name. This is actually a legacy - false positive. -- `implicit none`: TS colors `implicit` as `yellow` and `none` appears - uncolored. Legacy colors both as `brightcyan`. Minor difference in - presentation. -- `precision` after `double`: legacy colors `precision` as `brightcyan`. TS does - not appear to capture `precision` separately -- it appears as default text. -- I/O specifier keywords within parenthesized I/O statements (e.g., `unit=`, - `file=`, `status=`, `exist=`): legacy colors these as `brightmagenta` via the - I/O keyword list. TS does not color these specifiers when they appear as - assignment targets in I/O calls. -- Statement label `100` at column 1: legacy colors the label with `brightred` - foreground via the line-start pattern matching. TS colors it as `brightred` - too, but through `boolean_literal` or similar. The actual label coloring - mechanism differs. -- Mathematical functions (`sqrt`, `mod`, `real`, `abs`, etc.): legacy colors - these as `yellow` via a hardcoded list. TS does not capture intrinsic function - calls specially -- `sqrt` and `mod` appear uncolored or as default text. -- `deallocate` keyword: TS does not include `deallocate` in the - `@number.builtin` control flow list, so it appears uncolored. Legacy also does - not list `deallocate`. The TS query has `allocate` but not `deallocate`. diff --git a/tests/syntax/samples/fortran.f90 b/tests/syntax/samples/fortran.f90 deleted file mode 100644 index 67ca667dc1..0000000000 --- a/tests/syntax/samples/fortran.f90 +++ /dev/null @@ -1,178 +0,0 @@ -! Sample Fortran program demonstrating syntax highlighting -! This file exercises all TS captures from fortran-highlights.scm - -module math_utils - implicit none - private - public :: factorial, fibonacci, clamp_value - - ! Type declaration -> @property - type :: vector3d - real :: x, y, z - end type vector3d - - integer, parameter :: MAX_SIZE = 1000 - real, parameter :: PI = 3.14159265358979 - -contains - - ! Function with result clause -> @keyword - function factorial(n) result(res) - integer, intent(in) :: n - integer :: res - integer :: i - - res = 1 - do i = 2, n - res = res * i - enddo - end function factorial - - ! Recursive function - recursive function fibonacci(n) result(fib) - integer, intent(in) :: n - integer :: fib - - if (n <= 1) then - fib = n - else - fib = fibonacci(n - 1) + fibonacci(n - 2) - endif - end function fibonacci - - ! Subroutine with intent -> @property - subroutine clamp_value(val, lo, hi) - real, intent(inout) :: val - real, intent(in) :: lo, hi - - if (val < lo) then - val = lo - elseif (val > hi) then - val = hi - endif - end subroutine clamp_value - -end module math_utils - -program sample_program - use math_utils, only: factorial, fibonacci, clamp_value - implicit none - - ! Variable declarations -> @property for types - integer :: i, n, result - real :: x, total, temperature - character(len=50) :: name - logical :: is_valid - complex :: z - double precision :: big_val - integer, dimension(10) :: arr - real, allocatable :: dynamic_arr(:) - integer, target :: tgt_val - integer, pointer :: ptr_val - - ! I/O keywords -> @constant.builtin - print *, "Enter a number:" - read *, n - - ! Do loop -> @number.builtin - total = 0.0 - do i = 1, n - total = total + real(i) - enddo - - ! Write with format - write(*, '(A, F10.2)') "Sum: ", total - write(*, '(A, I10)') "Factorial: ", factorial(n) - - ! While loop - i = 1 - do while (i <= 10) - arr(i) = i * i - i = i + 1 - enddo - - ! If-elseif-else block - temperature = 72.5 - if (temperature > 100.0) then - name = "Hot" - elseif (temperature > 70.0) then - name = "Warm" - elseif (temperature > 50.0) then - name = "Cool" - else - name = "Cold" - endif - - ! Boolean literals -> @function.special - is_valid = .true. - if (is_valid .and. n > 0) then - print *, "Valid positive number" - endif - - if (.not. is_valid .or. n == 0) then - print *, "Invalid or zero" - endif - - ! Operators -> @operator.word - result = n + 10 - result = result - 5 - result = result * 2 - result = result / 3 - big_val = real(result) ** 2.5 - - ! Comparison operators - if (result == 42) print *, "Answer found" - if (result /= 0) print *, "Non-zero" - if (result < 100) print *, "Less than 100" - if (result > 0) print *, "Positive" - if (result <= 50) print *, "At most 50" - if (result >= 10) print *, "At least 10" - - ! Select case -> @number.builtin - select case (n) - case (1) - print *, "One" - case (2:5) - print *, "Two to five" - case default - print *, "Other" - end select - - ! Call subroutine -> @number.builtin - x = 150.0 - call clamp_value(x, 0.0, 100.0) - write(*, '(A, F10.2)') "Clamped: ", x - - ! Allocate/deallocate -> @number.builtin - allocate(dynamic_arr(n)) - do i = 1, n - dynamic_arr(i) = sqrt(real(i)) - enddo - - ! File I/O -> @constant.builtin - open(unit=10, file="output.txt", status="replace") - do i = 1, n - write(10, '(I5, F10.4)') i, dynamic_arr(i) - enddo - close(10) - - ! Inquire about file - inquire(file="output.txt", exist=is_valid) - if (is_valid) print *, "File exists" - - ! Cycle and exit in loops -> @number.builtin - do i = 1, 20 - if (mod(i, 3) == 0) cycle - if (i > 15) exit - print *, i - enddo - - ! Goto and continue -> @number.builtin - goto 100 -100 continue - - ! Return and stop -> @number.builtin - deallocate(dynamic_arr) - stop "Program completed" - -end program sample_program diff --git a/tests/syntax/samples/glsl-report.md b/tests/syntax/samples/glsl-report.md deleted file mode 100644 index 0ee26461cb..0000000000 --- a/tests/syntax/samples/glsl-report.md +++ /dev/null @@ -1,82 +0,0 @@ -GLSL syntax highlighting: TS vs Legacy comparison report -========================================================= - -Sample file: `glsl.glsl` -Legacy reference: `misc/syntax/glsl.syntax` -TS query: `misc/syntax-ts/queries-override/glsl-highlights.scm` -TS colors: `misc/syntax-ts/colors.ini` `[glsl]` - -Aligned with legacy -------------------- - -- Language keywords (`if`, `else`, `for`, `while`, `do`, `switch`, `case`, - `default`, `break`, `continue`, `return`, `struct`, `const`): `yellow` - MATCH -- Storage qualifiers (`uniform`, `varying`, `attribute`, `in`, `out`, `inout`, - `layout`, `flat`, `smooth`, `noperspective`, `centroid`, `sample`, `patch`, - `buffer`, `shared`, `coherent`, `volatile`, `restrict`, `readonly`, - `writeonly`): `yellow` - MATCH -- Precision qualifiers (`precision`, `highp`, `mediump`, `lowp`): `yellow` - - MATCH -- Primitive types (`float`, `int`, `void`, `bool`, `vec2`, `vec3`, `vec4`, - `mat2`, `mat3`, `mat4`, `mat3x4`, `ivec4`, `bvec3`, `dvec2`, `sampler2D`, - `samplerCube`): `yellow` - MATCH -- Comments (line `//` and block `/* */`): `brown` - MATCH -- Operators (`=`, `+=`, `-=`, `*=`, `/=`, `%=`, `==`, `!=`, `<`, `>`, `<=`, - `>=`, `&&`, `||`, `!`, `&`, `|`, `^`, `~`, `+`, `-`, `*`, `/`, `%`, `++`, - `--`): `white` - MATCH -- Brackets (`{`, `}`, `(`, `)`, `[`, `]`): `brightcyan` - MATCH -- Comma (`,`): `brightcyan` - MATCH -- Semicolons (`;`): `brightmagenta` - MATCH -- Char literals (`'A'`): `brightgreen` - MATCH -- Preprocessor directives (`#version`, `#define`, `#ifdef`, `#ifndef`, - `#endif`): `brightred` - MATCH - -Intentional improvements over legacy -------------------------------------- - -- Type identifiers (user-defined types like `Light`): TS colors these as - `yellow` via the `type_identifier` capture. Legacy does not color user-defined - type names, leaving them as default text. -- The `^` operator: legacy fails to color `^` as `white` in the expression `0xA0 - ^ 0x55` (it appears uncolored). TS correctly colors it as `white` via the - operator capture. -- The `!` operator: TS explicitly captures `!` as `white` via the operator list. - Legacy colors `!=` as a unit but the standalone `!` (logical not) may not - match consistently. -- Preprocessor directives are more cleanly handled in TS. Legacy uses a - context-based approach where `#` starts a brightred context until end of line. - TS captures specific preprocessor keywords (`#define`, `#ifdef`, etc.) - individually as `function.special` -> `brightred`, giving more precise - coloring. - -Known shortcomings ------------------- - -- Built-in functions (`length`, `normalize`, `dot`, `cross`, `clamp`, `max`, - `min`, `smoothstep`, `mix`, `sqrt`, `pow`, `sin`, `cos`, `texture`, - `textureSize`): FIXED -- now captured via `call_expression` with `#any-of?` - predicate on function identifier as `@function.macro` (`brightmagenta`), - matching legacy. -- Deprecated built-in functions (`texture1D`, `texture2D`, `shadow1D`, etc.): - legacy colors these as `magenta`. TS does not distinguish deprecated functions - from other identifiers. -- Deprecated variables (`gl_FragColor`, `gl_FragData`): legacy colors these as - `red`. TS does not have captures for built-in variable names. -- The `vec4` function call (type constructor) in `vec4(aPosition, 1.0)`: legacy - colors it as `yellow` because `vec4` is a keyword. TS colors the type name - `vec4` only when used as a type specifier; in function-call position, TS does - not color the `vec4` at the call on line `vec4(aPosition, ...)`. -- Legacy has a very large list of sampler and image types (e.g., `isampler2D`, - `usampler3D`, `image2DMS`) as keywords. TS relies on the `primitive_type` - grammar node which may not cover every specialized sampler/image type. -- The colon (`:`) after `case` labels: legacy colors it as `brightcyan`. TS does - not capture the colon in switch-case context, leaving it as default text. -- String literals (`"..."`) are not commonly used in GLSL shader code. Both - legacy and TS handle them (`green`) but this is mostly relevant only in - preprocessor contexts. -- Legacy colors the inner content of preprocessor lines differently (e.g., - `#include "file"` shows `"file"` as `red` inside the brightred context). TS - captures the whole `preproc_directive` node uniformly as `brightred`. -- The `B2` identifier after `buffer` on the nested buffer declaration: legacy - incorrectly enters a string-like context (shows `RED`). TS leaves it as - default text, which is correct behavior. diff --git a/tests/syntax/samples/glsl.glsl b/tests/syntax/samples/glsl.glsl deleted file mode 100644 index 6e7b3f2d4a..0000000000 --- a/tests/syntax/samples/glsl.glsl +++ /dev/null @@ -1,152 +0,0 @@ -/* Vertex shader: transform and lighting */ - -#version 330 core -#define MAX_LIGHTS 8 -#ifdef USE_SHADOWS -#ifndef SHADOW_MAP_SIZE -#define SHADOW_MAP_SIZE 1024 -#endif -#endif - -// Precision qualifiers -precision highp float; -precision mediump int; -precision lowp sampler2D; - -// Vertex attributes -layout(location = 0) in vec3 aPosition; -layout(location = 1) in vec3 aNormal; -layout(location = 2) in vec2 aTexCoord; - -// Uniforms -uniform mat4 uModelView; -uniform mat4 uProjection; -uniform mat3 uNormalMatrix; -uniform float uTime; - -// Outputs to fragment shader -out vec3 vNormal; -out vec2 vTexCoord; -flat out int vInstanceID; - -// Varying (deprecated but valid) -varying vec4 vColor; - -// Struct definition -struct Light { - vec3 position; - vec3 color; - float intensity; - bool enabled; -}; - -// Buffer and shared qualifiers -buffer StorageBlock { - vec4 data[]; -}; - -shared float sharedValues[256]; - -// Const values -const float PI = 3.14159265; -const int MAX_ITER = 100; - -// Type examples: matrices, vectors, samplers -mat2 rotation2D; -mat3x4 mixedMatrix; -ivec4 indices; -bvec3 flags; -dvec2 highPrecCoord; -sampler2D diffuseMap; -samplerCube envMap; - -// Main vertex shader function -void main() { - vec4 worldPos = uModelView * vec4(aPosition, 1.0); - - // Operators - float a = 1.0 + 2.0 - 3.0 * 4.0 / 5.0; - int b = 10 % 3; - bool c = (a > b) && (a != 0.0) || !(a <= b); - int d = 0xFF & 0x0F | 0xA0 ^ 0x55; - float e = float(d); - a += 1.0; - a -= 0.5; - a *= 2.0; - a /= 3.0; - d++; - d--; - - // Built-in functions - float len = length(aPosition); - vec3 n = normalize(aNormal); - float dp = dot(n, vec3(0.0, 1.0, 0.0)); - vec3 cp = cross(n, vec3(1.0, 0.0, 0.0)); - float cl = clamp(dp, 0.0, 1.0); - float mx = max(a, e); - float mn = min(a, e); - float sm = smoothstep(0.0, 1.0, cl); - float mi = mix(mn, mx, sm); - float sq = sqrt(len); - float pw = pow(sq, 2.0); - float sn = sin(uTime); - float cs = cos(uTime); - - // Texture operations - vec4 texColor = texture(diffuseMap, aTexCoord); - ivec2 texSize = textureSize(diffuseMap, 0); - - // Control flow - if (len > 10.0) { - vNormal = n * 0.5; - } else { - vNormal = n; - } - - for (int i = 0; i < MAX_LIGHTS; i++) { - if (i == 5) break; - if (i == 2) continue; - } - - switch (d) { - case 0: - a = 0.0; - break; - case 1: - a = 1.0; - break; - default: - a = -1.0; - break; - } - - int x = 0; - while (x < 10) { - x++; - } - - do { - x--; - } while (x > 0); - - // Coherent, volatile, restrict, readonly, writeonly - coherent volatile restrict buffer B2 { - readonly float rdata; - writeonly float wdata; - }; - - // Centroid, sample, patch, noperspective, smooth - centroid out vec4 cColor; - sample in vec3 sNormal; - patch in float tessLevel; - noperspective out float ndcDepth; - smooth out vec3 smoothNorm; - - // Inout parameter usage (in function context) - vTexCoord = aTexCoord; - gl_Position = uProjection * worldPos; - - // String and char literals (C-style, for preprocessor) - char ch = 'A'; - // A string in a define context: "hello world" -} diff --git a/tests/syntax/samples/go-report.md b/tests/syntax/samples/go-report.md deleted file mode 100644 index 0119606980..0000000000 --- a/tests/syntax/samples/go-report.md +++ /dev/null @@ -1,78 +0,0 @@ -Go syntax highlighting: TS vs Legacy comparison report -======================================================= - -Sample file: `go.go` -Legacy reference: `misc/syntax/go.syntax` -TS query: `misc/syntax-ts/queries-override/go-highlights.scm` -TS colors: `misc/syntax-ts/colors.ini` `[go]` - -Aligned with legacy -------------------- - -- Language keywords (`package`, `import`, `func`, `var`, `const`, `type`, - `struct`, `interface`, `map`, `chan`, `go`, `defer`, `return`, `if`, `else`, - `for`, `range`, `switch`, `case`, `default`, `break`, `continue`, `goto`, - `select`, `fallthrough`): `yellow` - MATCH -- Builtin types (`int`, `int8`...`int64`, `uint`, `uint8`...`uint64`, `uintptr`, - `float32`, `float64`, `byte`, `string`, `bool`): `brightgreen` - MATCH -- Builtin functions (`make`, `len`, `cap`, `new`, `close`, `print`, `println`, - `panic`): `brown` - MATCH -- Builtin constants (`nil`, `true`, `false`): `brown` - MATCH -- Strings (double-quoted `"..."`): `green` - MATCH -- Raw strings (backtick `` `...` ``): `green` - MATCH -- Rune literals (`'A'`): legacy colors as `gray`, TS colors as `lightgray` (both - map to `constant` capture) - MATCH -- Comments (line `//` and block `/* */`): `brown` - MATCH -- Channel operator `<-`: `brightmagenta` - MATCH -- Operators (`+`, `-`, `*`, `/`, `%`, `&`, `|`, `^`, `<<`, `>>`, `=`, `:=`, - `+=`, `-=`, `*=`, `/=`, `%=`, `&=`, `|=`, `^=`, `<<=`, `>>=`, `==`, `!=`, `<`, - `>`, `<=`, `>=`, `&&`, `||`, `!`): `brightcyan` - MATCH -- Delimiters (`(`, `)`, `[`, `]`, `{`, `}`, `.`, `,`, `;`): `brightcyan` - MATCH - -Intentional improvements over legacy -------------------------------------- - -- Comments are uniformly `brown` in TS. Legacy colors the `//` and `/* */` - markers as `green` and comment text as `brown`, creating a two-tone effect. TS - provides a cleaner single-color comment appearance. -- Builtin types `complex64`, `complex128`, `rune`, `error`: TS colors these as - `brightgreen` via the `#match?` predicate on `type_identifier`. Legacy does - not list `complex64`, `complex128`, `rune`, or `error` in its keyword list, so - they appear uncolored. -- Builtin functions `append`, `complex`, `copy`, `delete`, `imag`, `real`, - `recover`: TS colors these as `brown` via `#match?` predicate. Legacy does not - list `append`, `complex`, `copy`, `delete`, `imag`, `real`, or `recover` in - its builtin function list. -- Labels (`loop:`): TS colors label names as `cyan` via the `label_name` - capture. Legacy does not color labels (they appear as default text, with only - the `:` colored as `brightcyan`). -- Colon after `case`/`default` in `switch`: legacy colors the `:` as - `brightcyan`, while TS leaves it uncolored (default). This is because the TS - query does not capture `:` as a delimiter in case clauses. Minor difference, - not a regression. -- Package names (`fmt`, `strings`): legacy colors builtin package names as - `brightgreen` via hardcoded keyword list. TS does not color package names - (they appear as default text). The legacy approach is brittle since it only - covers stdlib packages. - -Known shortcomings ------------------- - -- TS does not color builtin package names (`fmt`, `strings`, `io`, `os`, etc.) - as `brightgreen`. Legacy has a hardcoded list of ~40 stdlib package names. TS - intentionally omits this since package names are not language-level - constructs. -- The `iota` constant is listed in the TS query under `function.builtin` - (`brown`) but only matches when used as an identifier in a `const` block. - Legacy colors `iota` as `brown` via a keyword match. Both produce the same - color. -- Legacy colors special function names `init` and `main` as `brown`. TS does not - distinguish these from regular function names. -- The `++` and `--` operators are not explicitly listed in the TS operator - capture list. Legacy colors them as individual `+`/`-` characters. TS leaves - `++`/`--` uncolored in the labels section where they are not inside an - assignment expression. -- The colon `:` character is colored as `brightcyan` by legacy (as a standalone - delimiter) but is not captured by TS in all contexts (e.g., after `case` - labels, in map literals). In map literals, TS does not color the `:` between - key and value. diff --git a/tests/syntax/samples/go.go b/tests/syntax/samples/go.go deleted file mode 100644 index 668b60f29d..0000000000 --- a/tests/syntax/samples/go.go +++ /dev/null @@ -1,193 +0,0 @@ -// Comment: demonstrate all Go syntax highlighting features -// This file exercises every TS capture from go-highlights.scm - -package main - -import ( - "fmt" - "strings" -) - -// Keywords -const Pi = 3.14 -var counter int - -type Shape interface { - Area() float64 -} - -type Circle struct { - Radius float64 -} - -func (c Circle) Area() float64 { - return Pi * c.Radius * c.Radius -} - -// Map and channel usage -func process() { - m := map[string]int{"one": 1, "two": 2} - ch := make(chan int, 10) - - go func() { - for k, v := range m { - ch <- v - _ = k - } - close(ch) - }() - - for val := range ch { - switch { - case val == 1: - continue - default: - break - } - } -} - -// Builtin types -func types() { - var a int - var b int8 - var c int16 - var d int32 - var e int64 - var f uint - var g uint8 - var h uint16 - var i uint32 - var j uint64 - var k uintptr - var l float32 - var m float64 - var n complex64 - var o complex128 - var p byte - var q rune - var r string - var s bool - var t error - _, _, _, _, _, _, _, _, _, _ = a, b, c, d, e, f, g, h, i, j - _, _, _, _, _, _, _, _, _, _ = k, l, m, n, o, p, q, r, s, t -} - -// Builtin functions and constants -func builtins() { - s := make([]int, 0, 10) - s = append(s, 1, 2, 3) - n := len(s) - c := cap(s) - _ = n + c - - m := new(Circle) - _ = m - - x := complex(1.0, 2.0) - _ = real(x) - _ = imag(x) - - dst := make([]int, 3) - copy(dst, s) - - mp := map[string]int{"a": 1} - delete(mp, "a") - - print("debug") - println("debug") - - if false { - panic("error") - } - - defer func() { - recover() - }() -} - -// nil, true, false, iota -func constants() { - var p *int = nil - b := true - c := !false - _ = p - _ = b - _ = c -} - -// Strings and rune literals -func stringLiterals() { - s1 := "hello world" - s2 := `raw string literal -with newlines` - r1 := 'A' - _ = s1 - _ = s2 - _ = r1 -} - -// Labels and goto -func labels() { - i := 0 -loop: - if i < 10 { - i++ - goto loop - } -} - -// Channel operator <- -func channelOp() { - ch := make(chan int) - go func() { ch <- 42 }() - val := <-ch - _ = val -} - -// Operators -func operators() { - a, b := 10, 20 - _ = a + b - _ = a - b - _ = a * b - _ = a / b - _ = a % b - _ = a & b - _ = a | b - _ = a ^ b - _ = a << 2 - _ = a >> 2 - _ = a == b - _ = a != b - _ = a < b - _ = a > b - _ = a <= b - _ = a >= b - _ = !false && true || false - - c := 0 - c += 1 - c -= 1 - c *= 2 - c /= 2 - c %= 3 - c &= 0xFF - c |= 0x01 - c ^= 0x10 - c <<= 1 - c >>= 1 - - _ = c -} - -// Delimiters: brackets, parens, braces, dots, semicolons, commas -func delimiters() { - arr := [3]int{1, 2, 3} - _ = arr[0] - fmt.Println(arr) - _ = strings.Contains("abc", "a") -} - -/* Block comment - spanning multiple lines */ diff --git a/tests/syntax/samples/gotmpl-report.md b/tests/syntax/samples/gotmpl-report.md deleted file mode 100644 index 50726ab61e..0000000000 --- a/tests/syntax/samples/gotmpl-report.md +++ /dev/null @@ -1,100 +0,0 @@ -Go Template syntax highlighting: TS report -=========================================== - -Sample file: `gotmpl.tmpl` -TS query: `misc/syntax-ts/queries-override/gotmpl-highlights.scm` -TS colors: `misc/syntax-ts/colors.ini` `[gotmpl]` -Grammar: `ngalaiko/tree-sitter-go-template` (language: `gotmpl`) -Legacy reference: none (no `misc/syntax/*.syntax` file for Go templates) - -Note: There is no legacy syntax highlighting for Go templates in MC. This report -documents the TS highlighting choices only. - -Color assignments ------------------ - -- Keywords (`if`, `else`, `range`, `with`, `end`, `template`, `define`, `block`, - `break`, `continue`): `yellow`. -- Comments (`/* ... */`): `brown` -- standard MC comment color. -- Builtin functions (Go template + Sprig + Helm, 240 total): `yellow` - (function.builtin) -- distinguished from comments (`brown`) and from user - functions (`brightcyan`). - Go template builtins: `and`, `call`, `eq`, `ge`, - `gt`, `html`, `index`, `js`, `le`, `len`, `lt`, `ne`, `not`, `or`, `print`, - `printf`, `println`, `urlquery`. - Sprig categories: strings (`upper`, - `lower`, `trim`, `trunc`, `replace`, `quote`, etc.), lists (`list`, `first`, - `last`, `append`, `sortAlpha`, etc.), dicts (`dict`, `get`, `set`, `keys`, - `merge`, `deepCopy`, etc.), math (`add`, `sub`, `mul`, `div`, `ceil`, `floor`, - `round`, etc.), dates (`now`, `date`, `dateModify`, `ago`, `unixEpoch`, etc.), - crypto (`sha256sum`, `b64enc`, `uuidv4`, etc.), regex (`regexMatch`, - `regexFind`, `regexReplaceAll`, etc.), paths (`base`, `dir`, `ext`, `clean`, - `isAbs`), conversions (`toString`, `toJson`, `fromJson`, `toDate`, `kindOf`, - `typeOf`, `default`, `empty`, `coalesce`, `ternary`), and flow control - (`fail`, `until`, `seq`). - Helm-specific: `include`, `lookup`, `required`, - `tpl`, `toYaml`, `toYamlPretty`, `fromYaml`, `fromYamlArray`, `fromJsonArray`, - `toToml`, `mustToToml`. -- User function calls (custom functions not in the builtin list): `brightcyan` - (function) -- clearly distinguished from builtins. -- Method calls (chained selectors): `brightcyan` (function). -- Fields (`.Values`, `.Release`, `.Name`, `.Items`): `white` (property) -- data - references stand out from delimiters. -- Variables (`$name`, `$pool`, `$index`, `$`): `brightred` (variable.builtin) -- - matches bash convention for `$`-prefixed variables, clearly distinct from - fields and delimiters. -- Strings (double-quoted, raw backtick, rune literals): `green`. -- Escape sequences (`\n`, `\t`, `\\`, `\"`): `brightgreen`. -- Numbers (integers, floats, imaginary): `lightgray`. -- Boolean and nil constants (`true`, `false`, `nil`): `brightmagenta` - (constant.builtin). -- Pipe operator (`|`): `brightcyan` (operator). -- Assignment operators (`:=`, `=`): `brightcyan` (operator). -- Delimiters (`{{`, `}}`, `{{-`, `-}}`, `(`, `)`, `.`, `,`): `brightcyan` -- - template delimiters are visually prominent. -- Text outside template actions: DEFAULT -- plain text is uncolored as it could - be any host language. - -Design decisions ----------------- - -- Keywords and builtin functions share the same color (`yellow`). They are - structurally distinguishable: keywords appear as `{{ if`, `{{ range`, - `{{ end }}` while builtins appear as `{{ len`, `{{ default`, `| trunc`. Using - `yellow` for builtins instead of `brown` distinguishes them from comments. -- Fields and variables have distinct colors (`white` vs `brightred`) to - differentiate data access patterns: `.Values.name` (context data, white) vs - `$name` (local variable, brightred). -- Dot (`.`) is colored as `brightcyan` (delimiter) in all contexts. In Go - templates, dot serves double duty as the context accessor (`.Name`) and the - current context value (`.`). Both uses benefit from being visually distinct. -- Template names in `{{ template "name" }}` and `{{ define "name" }}` are - colored as `green` (string) since they are string literals. - -Wrapper grammar features ------------------------- - -The `gotmpl` grammar is configured as a wrapper grammar in the `wrappers` config -file. This enables two automatic features: - -- **ERROR fallback**: when a `.yaml` file (or `.json`, `.toml`, `.html`, `.xml`, - `.css`) fails to parse with its native grammar (e.g. due to `{{ }}` Go - template syntax), the system automatically tries `gotmpl` as a fallback. If - successful, the original grammar is injected into `gotmpl`'s `text` nodes, - providing host language highlighting alongside Go template highlighting. -- **Compound extensions**: files like `README.md.gotmpl` or `values.yaml.tmpl` - automatically get the inner extension's grammar injected into `text` nodes. - The outer extension (`.gotmpl`, `.tmpl`, `.tpl`) determines the gotmpl - grammar, and the inner extension (`.md`, `.yaml`) determines the host language - injection. - -Recursive injection (up to 3 levels) is supported. For example, a `.md.gotmpl` -file gets: gotmpl -> markdown -> markdown_inline, and fenced code blocks within -the markdown get language-specific highlighting (e.g. gotmpl -> markdown -> -python). - -Known shortcomings ------------------- - -- No language injection for text outside `{{ }}` in plain `.tmpl` files. Without - a compound extension or ERROR fallback, the `text` nodes remain uncolored. -- The builtin function list is hardcoded in the query override file. New Sprig - or Helm releases may add functions that require manual additions to the - `#any-of?` predicate. diff --git a/tests/syntax/samples/gotmpl.tmpl b/tests/syntax/samples/gotmpl.tmpl deleted file mode 100644 index 2dbadb3415..0000000000 --- a/tests/syntax/samples/gotmpl.tmpl +++ /dev/null @@ -1,234 +0,0 @@ -{{/* Comment: demonstrate Go template features */}} - -{{/* Simple variable output */}} -Hello, {{ .Name }}! - -{{/* Conditional */}} -{{ if .IsAdmin }} -

Admin Dashboard

-{{ else if .IsModerator }} -

Moderator View

-{{ else }} -

User View

-{{ end }} - -{{/* Range loop */}} -{{ range .Items }} -
  • {{ .Title }} - {{ .Price }}
  • -{{ end }} - -{{/* Range with variables */}} -{{ range $index, $item := .Items }} -
  • {{ $index }}: {{ $item.Name }}
  • -{{ end }} - -{{/* With block */}} -{{ with .Footer }} -
    {{ . }}
    -{{ end }} - -{{/* Variable assignment */}} -{{ $name := "World" }} -{{ $name = "Hello World" }} -{{ $count := 42 }} -{{ $pi := 3.14 }} -{{ $complex := 1i }} -{{ $char := 'A' }} -{{ $raw := `raw string` }} - -{{/* Go template builtin functions */}} -{{ len .Items }} -{{ index .Items 0 }} -{{ slice .Items 1 3 }} -{{ printf "%s has %d items" .Name (len .Items) }} -{{ println "debug output" }} -{{ html .Content }} -{{ js .Script }} -{{ urlquery .SearchTerm }} - -{{/* Comparison functions */}} -{{ if eq .Status "active" }}active{{ end }} -{{ if ne .Count 0 }}not zero{{ end }} -{{ if lt .Price 10.0 }}cheap{{ end }} -{{ if gt .Score 90 }}excellent{{ end }} -{{ if le .Age 18 }}minor{{ end }} -{{ if ge .Rating 4 }}good{{ end }} - -{{/* Logical functions */}} -{{ if and .IsActive .IsVerified }}verified active user{{ end }} -{{ if or .IsAdmin .IsModerator }}privileged user{{ end }} -{{ if not .IsBlocked }}allowed{{ end }} - -{{/* Sprig string functions */}} -{{ .Name | upper }} -{{ .Name | lower }} -{{ .Name | title }} -{{ .Name | trim }} -{{ .Name | trimSuffix "-" }} -{{ .Name | trimPrefix "v" }} -{{ .Name | trunc 63 }} -{{ .Name | quote }} -{{ .Name | squote }} -{{ .Name | nospace }} -{{ .Name | camelcase }} -{{ .Name | snakecase }} -{{ .Name | kebabcase }} -{{ .Name | replace "old" "new" }} -{{ .Name | repeat 3 }} -{{ .Name | contains "sub" }} -{{ .Name | hasPrefix "v" }} -{{ .Name | hasSuffix "-" }} -{{ .Name | substr 0 5 }} -{{ indent 4 .Content }} -{{ nindent 2 .Content }} - -{{/* Sprig list functions */}} -{{ list 1 2 3 }} -{{ .Items | first }} -{{ .Items | last }} -{{ .Items | rest }} -{{ .Items | initial }} -{{ .Items | reverse }} -{{ .Items | uniq }} -{{ .Items | sortAlpha }} -{{ append .Items "new" }} -{{ prepend .Items "first" }} -{{ without .Items "remove" }} -{{ has "item" .Items }} -{{ .Items | compact }} -{{ concat .ListA .ListB }} -{{ chunk 3 .Items }} - -{{/* Sprig dict functions */}} -{{ dict "key1" "val1" "key2" "val2" }} -{{ .Map | keys }} -{{ .Map | values }} -{{ get .Map "key" }} -{{ set .Map "key" "value" }} -{{ unset .Map "key" }} -{{ hasKey .Map "key" }} -{{ merge .MapA .MapB }} -{{ mergeOverwrite .MapA .MapB }} -{{ pick .Map "key1" "key2" }} -{{ omit .Map "key1" }} -{{ deepCopy .Map }} -{{ deepEqual .MapA .MapB }} -{{ pluck "key" .MapA .MapB }} -{{ dig "a" "b" "c" .Nested }} - -{{/* Sprig type and conversion functions */}} -{{ .Value | toString }} -{{ .Value | toJson }} -{{ .Value | toPrettyJson }} -{{ .Value | toRawJson }} -{{ .Value | fromJson }} -{{ .Value | toDate "2006-01-02" }} -{{ kindOf .Value }} -{{ typeOf .Value }} -{{ typeIs "string" .Value }} -{{ default "fallback" .Value }} -{{ empty .Value }} -{{ coalesce .A .B .C }} -{{ ternary "yes" "no" .Cond }} - -{{/* Sprig math functions */}} -{{ add 1 2 }} -{{ sub 10 3 }} -{{ mul 4 5 }} -{{ div 10 2 }} -{{ mod 10 3 }} -{{ max 1 2 3 }} -{{ min 1 2 3 }} -{{ ceil 1.5 }} -{{ floor 1.5 }} -{{ round 3.14159 2 }} - -{{/* Sprig crypto functions */}} -{{ sha256sum "data" }} -{{ sha1sum "data" }} -{{ b64enc "data" }} -{{ b64dec .Encoded }} -{{ uuidv4 }} - -{{/* Sprig date functions */}} -{{ now }} -{{ now | date "2006-01-02" }} -{{ now | dateModify "+1h" }} -{{ now | ago }} -{{ now | unixEpoch }} -{{ .Timestamp | duration }} -{{ .Timestamp | durationRound }} - -{{/* Sprig regex functions */}} -{{ regexMatch "^test" .Value }} -{{ regexFind "\\d+" .Value }} -{{ regexFindAll "\\d+" .Value -1 }} -{{ regexReplaceAll "old" .Value "new" }} -{{ regexSplit "," .Value -1 }} - -{{/* Sprig path functions */}} -{{ base "/path/to/file.txt" }} -{{ dir "/path/to/file.txt" }} -{{ ext "/path/to/file.txt" }} -{{ clean "/path/../to/file.txt" }} -{{ isAbs "/path/to/file.txt" }} - -{{/* Sprig flow control functions */}} -{{ fail "something went wrong" }} -{{ until 5 }} -{{ untilStep 0 10 2 }} -{{ seq 1 5 }} - -{{/* Pipeline with pipe operator */}} -{{ .Name | printf "Hello, %s" | html }} -{{ .Items | len }} - -{{/* Boolean and nil constants */}} -{{ if true }}always{{ end }} -{{ if false }}never{{ end }} -{{ if nil }}nil value{{ end }} - -{{/* Template calls */}} -{{ template "header" . }} -{{ template "footer" }} - -{{/* Define a template */}} -{{ define "greeting" }} - Hello, {{ .Name }}! -{{ end }} - -{{/* Block (define with default) */}} -{{ block "sidebar" . }} - -{{ end }} - -{{/* Method call */}} -{{ .User.FullName }} -{{ .Items.Sort.First }} - -{{/* Trim whitespace markers */}} -{{- .Name -}} -{{- if .Show -}} - visible -{{- end -}} - -{{/* Nested pipelines */}} -{{ if eq (len .Items) 0 }} - No items found. -{{ end }} - -{{/* Escape sequences in strings */}} -{{ printf "line1\nline2\ttab\\backslash" }} -{{ printf "quote: \"hello\"" }} - -{{/* Range with break and continue */}} -{{ range .Items }} - {{ if eq .Status "skip" }}{{ continue }}{{ end }} - {{ if eq .Status "stop" }}{{ break }}{{ end }} - {{ .Name }} -{{ end }} - -{{/* User-defined (non-builtin) function calls */}} -{{ include "mytemplate" . }} -{{ toYaml .Values }} -{{ mustMergeOverwrite (deepCopy .Defaults) .Overrides | toYaml | nindent 2 }} diff --git a/tests/syntax/samples/haskell-report.md b/tests/syntax/samples/haskell-report.md deleted file mode 100644 index b9d4c3b3a8..0000000000 --- a/tests/syntax/samples/haskell-report.md +++ /dev/null @@ -1,77 +0,0 @@ -Haskell syntax highlighting: TS vs Legacy comparison report -============================================================ - -Sample file: `haskell.hs` -Legacy reference: `misc/syntax/haskell.syntax` -TS query: `misc/syntax-ts/queries-override/haskell-highlights.scm` -TS colors: `misc/syntax-ts/colors.ini` `[haskell]` - -Aligned with legacy -------------------- - -- Language keywords (`module`, `where`, `import`, `qualified`, `as`, `hiding`, - `data`, `newtype`, `type`, `class`, `instance`, `deriving`, `do`, `let`, `in`, - `case`, `of`, `if`, `then`, `else`, `infixl`, `infixr`, `forall`): `yellow` - - MATCH -- Constructors and type names (`Tree`, `Leaf`, `Node`, `Shape`, `Circle`, - `Rectangle`, `Person`, `Wrapper`, `Int`, `String`, `Double`, `Float`, `Char`, - `Bool`, `IO`, `True`, `False`, `Show`, `Eq`): `white` - MATCH -- Strings (double-quoted `"..."`): `green` - MATCH -- Char literals (`'a'`, `'\n'`, `'\\'`): `brightgreen` - MATCH -- Comments (line `--` and block `{- -}`): `brown` - MATCH -- Operators (`=`, `->`, `<-`): `yellow` - MATCH -- Symbol operators (`::`, `=>`, `|`, `\`, `@`, `~`, `&&`, `||`, `++`, `.`, `$`): - `white` - MATCH -- Commas (`,`): `brightcyan` - MATCH - -Intentional improvements over legacy -------------------------------------- - -- Pragmas (`{-# LANGUAGE GADTs #-}`): TS colors these as `brightgreen` via - `comment.special`. Legacy colors them as `green` (the `{-# ... #-}` context). - Both are green-family colors; TS uses a slightly brighter shade. -- Function signatures (`area`, `classify`, `describe`, `depth`, `processIO`, - `transform`, `numbers`, `hexAndOctal`, `charExamples`, `greeting`, `ops`, - `evens`, `identity`, `main`): TS colors function names in type signatures as - `brightcyan` via the `function` capture. Legacy does not distinguish function - names from other identifiers. -- Numeric literals (`42`, `3.14`, `1.0e-5`, `0xFF`, `0o77`): TS colors these as - `brightgreen` via `number.builtin`. Legacy also colors digits as `brightgreen` - but uses regex patterns that may miss some formats. TS handles `0o77` (octal) - which legacy does not match. -- The `_` wildcard pattern: legacy colors it as `brightmagenta` (matching the - `_\[...\]` pattern for identifiers starting with underscore). TS leaves `_` as - default text, which is more appropriate since `_` is a pattern wildcard, not a - warning. -- Float literals like `3.14` and `5.0`: legacy splits these at the `.` - character, coloring digits as `brightgreen` and `.` as `white`. TS colors the - entire float literal uniformly as `brightgreen`. - -Known shortcomings ------------------- - -- Module names in imports (`Data.List`, `Data.Map`, `Data.Maybe`): legacy colors - these as `white` because uppercase identifiers match the constructor/type - pattern. TS does not color module names in import statements, leaving them as - default text. -- Brackets and parentheses (`(`, `)`, `[`, `]`): legacy colors these as - `brightcyan`. TS does not capture brackets as delimiters, leaving them as - default text. -- Curly braces (`{`, `}`): legacy colors these as `white`. TS does not capture - them, leaving as default text. -- The semicolons (`;`): legacy colors them as `yellow`. TS colors them as - `brightcyan` via the `delimiter` capture. Minor color difference. -- The backtick infix operator (`` `mod` ``): legacy colors the content between - backticks as `white` via an exclusive context. TS does not have a special - capture for backtick-quoted infix operators, so `mod` appears as default text. -- The `$` operator: legacy colors it as `yellow`. TS colors it as `white` via - `keyword.other` (grouped with other symbol operators). Minor color difference. -- The `<` and `>` operators: legacy colors them as `yellow`. TS may not capture - standalone `<` and `>` in all expression contexts since they are not in the - explicit operator list of the TS query. -- String escape sequences (`\n`, `\t`, `\0`): legacy colors escape sequences - inside strings as `brightgreen`. TS colors the entire string uniformly as - `green`, not distinguishing escape sequences. -- The Haskell `where` block indentation-sensitive bindings and `do` notation are - structurally parsed by tree-sitter, giving TS an advantage in correctness over - legacy regex matching. diff --git a/tests/syntax/samples/haskell.hs b/tests/syntax/samples/haskell.hs deleted file mode 100644 index d5a0015529..0000000000 --- a/tests/syntax/samples/haskell.hs +++ /dev/null @@ -1,138 +0,0 @@ -{-# LANGUAGE GADTs #-} -{-# LANGUAGE TypeFamilies #-} - --- | Module demonstrating Haskell syntax features -module Sample.Haskell - ( Tree(..) - , Shape(..) - , Functor(..) - , main - ) where - -import Data.List (sort, nub) -import qualified Data.Map as Map -import Data.Maybe hiding (fromJust) - --- Data type with constructors -data Tree a - = Leaf - | Node a (Tree a) (Tree a) - deriving (Show, Eq) - --- Newtype -newtype Wrapper a = Wrapper { unwrap :: a } - --- Record syntax -data Person = Person - { firstName :: String - , lastName :: String - , age :: Int - } deriving (Show) - --- Type alias -type Name = String -type Mapping k v = Map.Map k v - --- Type class definition -class Container f where - empty :: f a - insert :: a -> f a -> f a - toList :: f a -> [a] - --- Instance -instance Container [] where - empty = [] - insert = (:) - toList = id - --- GADT-style data -data Shape where - Circle :: Double -> Shape - Rectangle :: Double -> Double -> Shape - --- Function with type signature -area :: Shape -> Double -area (Circle r) = pi * r * r -area (Rectangle w h) = w * h - --- Guards and where clause -classify :: Int -> String -classify n - | n < 0 = "negative" - | n == 0 = "zero" - | n < 10 = "small" - | otherwise = "large" - where - _ = n + 1 - --- Pattern matching with case -describe :: Tree a -> String -describe t = case t of - Leaf -> "empty" - Node _ l r -> "node with " ++ show (depth l) - ++ " and " ++ show (depth r) - --- Recursive function -depth :: Tree a -> Int -depth Leaf = 0 -depth (Node _ l r) = 1 + max (depth l) (depth r) - --- Do notation and let/in -processIO :: IO () -processIO = do - putStrLn "Enter a number:" - input <- getLine - let n = read input :: Int - doubled = n * 2 - if n > 0 - then putStrLn $ "Positive: " ++ show doubled - else putStrLn "Not positive" - --- Lambda and higher-order functions -transform :: [Int] -> [Int] -transform = map (\x -> x * 2 + 1) . filter (> 0) - --- Infix operators and fixity -infixl 6 |+| -(|+|) :: Int -> Int -> Int -x |+| y = x + y + 1 - -infixr 5 |:| -(|:|) :: a -> [a] -> [a] -x |:| xs = x : xs - --- Numeric literals -numbers :: (Int, Float, Double) -numbers = (42, 3.14, 1.0e-5) - -hexAndOctal :: (Int, Int) -hexAndOctal = (0xFF, 0o77) - --- Char literals -charExamples :: (Char, Char, Char) -charExamples = ('a', '\n', '\\') - --- String with escapes -greeting :: String -greeting = "Hello,\n\tworld!\0" - --- Operators: comparison, logical, arithmetic -ops :: Bool -ops = (10 > 5) && (3 /= 4) || not (True == False) - --- List comprehension with guards -evens :: [Int] -> [Int] -evens xs = [x | x <- xs, x `mod` 2 == 0] - --- Forall (with extension) -identity :: forall a. a -> a -identity x = x - --- Main function with do notation -main :: IO () -main = do - let p = Person "John" "Doe" 30 - putStrLn $ firstName p ++ " " ++ lastName p - print $ area (Circle 5.0) - print $ classify (-3) - processIO diff --git a/tests/syntax/samples/hcl-report.md b/tests/syntax/samples/hcl-report.md deleted file mode 100644 index d37444cfed..0000000000 --- a/tests/syntax/samples/hcl-report.md +++ /dev/null @@ -1,63 +0,0 @@ -HCL syntax highlighting: TS report -==================================== - -Sample file: `hcl.hcl` -TS query: `misc/syntax-ts/queries-override/hcl-highlights.scm` -TS colors: `misc/syntax-ts/colors.ini` `[hcl]` -Grammar: `hcl` (language: `hcl`) -Legacy reference: none (legacy produces no highlighting for `.hcl` -files) - -Note: There is no legacy syntax highlighting for generic HCL files -in MC. This report documents the TS highlighting choices only. - -Color assignments ------------------ - -- Top-level block names (`service`, `job`, `locals`, `output`, - `variable`, etc.): `brightmagenta` (keyword.directive) — any - identifier that starts a top-level block gets this color. HCL is - a generic language with no reserved block names. -- Nested block names (`group`, `task`, `config`, `resources`, etc.): - DEFAULT — inner blocks are not colored to avoid visual noise. -- Control flow keywords (`if`, `else`, `endif`, `for`, `endfor`, - `in`): `yellow`. -- Comments (`#`, `//`, `/* */`): `brown`. -- Strings (quoted, heredocs): `green`. -- Numbers: `lightgray`. -- Booleans (`true`, `false`): `lightgray` (constant). -- Null (`null`): `lightgray` (constant). -- Type references (`string`, `number`, `bool`, `object`, `tuple`, - `list`, `map`, `set`, `any`): `yellow` (type). -- Operators (`=`, `+`, `-`, `*`, `/`, `%`, `!`, `==`, `!=`, `<`, - `>`, `<=`, `>=`, `&&`, `||`, `?`, `=>`, `:`): `brightcyan`. -- Delimiters (`{`, `}`, `[`, `]`, `(`, `)`, `,`, `.`, `.*`, - `[*]`): `brightcyan`. -- Template interpolation (`${`, `}`): `brightcyan` (operator). -- Heredoc identifiers: `green`. -- Splat expressions (`[*]`, `.*`): `brightcyan` (delimiter). - -Design decisions ----------------- - -- HCL is a generic configuration language with no reserved block - names. All top-level block identifiers are colored uniformly as - `brightmagenta` regardless of their name. This distinguishes - structural blocks from attribute keys without hardcoding any - specific keyword list. -- Nested block names are left as DEFAULT to avoid confusion with - variable references and attribute keys, which also appear as - plain identifiers. -- No variable reference prefixes are colored (unlike Terraform's - `var.`, `local.`, etc.) since HCL itself has no such convention. - Applications that embed HCL may define their own prefixes. - -Known shortcomings ------------------- - -- Legacy MC has no highlighting for `.hcl` files at all, so there - is nothing to compare against. -- Function calls in expressions (e.g. `join()`, `upper()`) are not - colored. The HCL grammar parses them as `function_call` nodes but - the query does not capture them to keep the highlighting minimal - for a generic language. diff --git a/tests/syntax/samples/hcl.hcl b/tests/syntax/samples/hcl.hcl deleted file mode 100644 index 4ba30fda48..0000000000 --- a/tests/syntax/samples/hcl.hcl +++ /dev/null @@ -1,130 +0,0 @@ -# Generic HCL sample: demonstrate all syntax features - -# Simple block -service "web" { - port = 8080 - address = "0.0.0.0" -} - -# Nested blocks -job "example" { - group "cache" { - task "redis" { - driver = "docker" - - config { - image = "redis:7" - ports = ["redis"] - } - - resources { - cpu = 500 - memory = 256 - } - } - } -} - -# Variables and expressions -locals { - environment = "production" - region = "us-east-1" - enabled = true - count = 3 - ratio = 0.75 - nothing = null -} - -# Conditional expression -output "message" { - value = local.enabled ? "yes" : "no" -} - -# For expression -output "names" { - value = [for item in local.items : item.name] -} - -output "filtered" { - value = {for k, v in local.map : k => v if v != ""} -} - -# String interpolation -output "greeting" { - value = "Hello, ${local.environment}!" -} - -# Heredoc -output "script" { - value = <<-EOT - #!/bin/bash - echo "Hello" - echo "World" - EOT -} - -# Operators -locals { - sum = 1 + 2 - diff = 10 - 3 - product = 4 * 5 - quotient = 10 / 3 - remainder = 10 % 3 - negative = -1 - not_true = !true - and_result = true && false - or_result = true || false - eq = 1 == 1 - neq = 1 != 2 - gt = 3 > 2 - gte = 3 >= 3 - lt = 1 < 2 - lte = 1 <= 1 -} - -# Type references -variable "config" { - type = object({ - name = string - count = number - enabled = bool - tags = map(string) - ports = list(number) - addrs = set(string) - data = tuple([string, number]) - extra = any - }) -} - -# Splat and index expressions -output "all_ids" { - value = resource.example[*].id -} - -output "first" { - value = resource.example[0].id -} - -# Collection values -locals { - list_val = [1, 2, 3] - map_val = {key1 = "val1", key2 = "val2"} - empty_map = {} -} - -# Block with multiple labels -provisioner "remote-exec" "setup" { - command = "echo hello" -} - -# Comments -# Line comment - -// Another line comment - -/* Block comment */ - -/* - Multi-line - block comment -*/ diff --git a/tests/syntax/samples/html-report.md b/tests/syntax/samples/html-report.md deleted file mode 100644 index 0dd952ddf2..0000000000 --- a/tests/syntax/samples/html-report.md +++ /dev/null @@ -1,60 +0,0 @@ -HTML syntax highlighting: TS vs Legacy comparison report -========================================================= - -Sample file: `tests/syntax/samples/html.html` -Legacy reference: `misc/syntax/html.syntax` -TS query: `misc/syntax-ts/queries-override/html-highlights.scm` -TS colors: `misc/syntax-ts/colors.ini` `[html]` - -Aligned with legacy -------------------- - -- Tag names (``, ``, `
    `, etc.): `brightcyan` - MATCH -- Angle brackets (`<`, `>`, ``): `brightcyan` - MATCH -- Known attribute names (`class`, `id`, `href`, `src`, etc.): `yellow` - MATCH -- Equals sign in attributes: `brightred` - MATCH -- Quoted attribute values: `cyan` - MATCH -- Entity references (`&`, `<`, `©`, etc.): `brightgreen` - MATCH -- Comments (``): `brown` - MATCH -- DOCTYPE declaration: `brightred` - MATCH -- Closing tags (`
    `, ``): `brightcyan` - MATCH - -Intentional improvements over legacy -------------------------------------- - -- TS highlights ALL attribute names uniformly as `yellow` via `@property.key`, - while legacy only recognizes a fixed list of known attributes per tag (e.g. - `class`, `id`, `href`). Unrecognized attributes in legacy fall to the tag - context color (brightcyan). TS correctly colors `data-theme`, `aria-label`, - `loading`, `placeholder`, `frameborder`, `autoplay`, `muted`, and all custom - or newer HTML5 attributes. -- TS handles all tags uniformly regardless of tag name, while legacy has - separate `context` blocks for each known HTML tag. Unknown tags in legacy - fall to a generic `< >` context colored `cyan` instead of `brightcyan`. TS - treats `