Skip to content

Commit 2db41e0

Browse files
committed
Syntax: id/operator regexes update, some fixes
Update and simplify a little the regexes used in detecting identifiers and operators. Might help performance a little. Also minor fixes to a few regexes (euler, quotable...)
1 parent 7de2dac commit 2db41e0

File tree

1 file changed

+60
-43
lines changed

1 file changed

+60
-43
lines changed

syntax/julia.vim

Lines changed: 60 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -29,45 +29,60 @@ let s:julia_spellcheck_comments = get(g:, "julia_spellcheck_comments", 1)
2929

3030
let s:julia_highlight_operators = get(g:, "julia_highlight_operators", 1)
3131

32-
" characters which cannot be used in identifiers. This list is very incomplete:
33-
" 1) it only cares about charactes below 256
34-
" 2) it doesn't distinguish between what's allowed as the 1st char vs in the
35-
" rest of an identifier (e.g. digits and `!`)
36-
" Despite these shortcomings, it seems to do a decent job.
37-
" note: \U5B and \U5D are '[' and ']'
38-
let s:nonid_chars = "\U01-\U07" . "\U0E-\U1F" .
39-
\ "?\"#$'(,.:;=@`\\U5B{" .
40-
\ "\U80-\UA1" . "\UA7\UA8\UAB\UAD\UAF\UB4" . "\UB6-\UB8" . "\UBB\UBF"
41-
42-
let s:nonidS_chars = "[:space:])\\U5D}" . s:nonid_chars
43-
44-
" the following excludes '!' since it can be used as an identifier,
45-
" and '$' since it can be used in interpolations
46-
" note that \U2D is '-'
47-
let s:uniop_chars = "+\\U2D~¬√∛∜⋆"
48-
49-
let s:binop_chars = "=+\\U2D*/\\%÷^&|⊻<>≤≥≡≠≢∈∉⋅×∪∩⊆⊈⊂⊄⊊←→∋∌⊕⊖⊞⊟∘∧⊗⊘↑↓∨⊠±⟂⋆"
50-
51-
" the following is a list of all remainig valid operator chars,
52-
" but it's more efficient when expressed with ranges (see below)
53-
" let s:binop_chars_extra = "↔↚↛↠↣↦↮⇎⇏⇒⇔⇴⇶⇷⇸⇹⇺⇻⇼⇽⇾⇿⟵⟶⟷⟷⟹⟺⟻⟼⟽⟾⟿⤀⤁⤂⤃⤄⤅⤆⤇⤌⤍⤎⤏⤐⤑⤔⤕⤖⤗⤘⤝⤞⤟⤠⥄⥅⥆⥇⥈⥊⥋⥎⥐⥒⥓⥖⥗⥚⥛⥞⥟⥢⥤⥦⥧⥨⥩⥪⥫⥬⥭⥰⧴⬱⬰⬲⬳⬴⬵⬶⬷⬸⬹⬺⬻⬼⬽⬾⬿⭀⭁⭂⭃⭄⭇⭈⭉⭊⭋⭌←→" .
54-
" \ "∝∊∍∥∦∷∺∻∽∾≁≃≄≅≆≇≈≉≊≋≌≍≎≐≑≒≓≔≕≖≗≘≙≚≛≜≝≞≟≣≦≧≨≩≪≫≬≭≮≯≰≱≲≳≴≵≶≷≸≹≺≻≼≽≾≿⊀⊁⊃⊅⊇⊉⊋⊏⊐⊑⊒⊜⊩⊬⊮⊰⊱⊲⊳⊴⊵⊶⊷⋍⋐⋑⋕⋖⋗⋘⋙⋚⋛⋜⋝⋞⋟⋠⋡⋢⋣⋤⋥⋦⋧⋨⋩⋪⋫⋬⋭⋲⋳⋴⋵⋶⋷⋸⋹⋺⋻⋼⋽⋾⋿⟈⟉⟒⦷⧀⧁⧡⧣⧤⧥⩦⩧⩪⩫⩬⩭⩮⩯⩰⩱⩲⩳⩴⩵⩶⩷⩸⩹⩺⩻⩼⩽⩾⩿⪀⪁⪂⪃⪄⪅⪆⪇⪈⪉⪊⪋⪌⪍⪎⪏⪐⪑⪒⪓⪔⪕⪖⪗⪘⪙⪚⪛⪜⪝⪞⪟⪠⪡⪢⪣⪤⪥⪦⪧⪨⪩⪪⪫⪬⪭⪮⪯⪰⪱⪲⪳⪴⪵⪶⪷⪸⪹⪺⪻⪼⪽⪾⪿⫀⫁⫂⫃⫄⫅⫆⫇⫈⫉⫊⫋⫌⫍⫎⫏⫐⫑⫒⫓⫔⫕⫖⫗⫘⫙⫷⫸⫹⫺⊢⊣" .
55-
" \ "⊔∓∔∸≂≏⊎⊽⋎⋓⧺⧻⨈⨢⨣⨤⨥⨦⨧⨨⨩⨪⨫⨬⨭⨮⨹⨺⩁⩂⩅⩊⩌⩏⩐⩒⩔⩖⩗⩛⩝⩡⩢⩣" .
56-
" \ "⊙⊚⊛⊡⊓∗∙∤⅋≀⊼⋄⋇⋉⋊⋋⋌⋏⋒⟑⦸⦼⦾⦿⧶⧷⨇⨰⨱⨲⨳⨴⨵⨶⨷⨸⨻⨼⨽⩀⩃⩄⩋⩍⩎⩑⩓⩕⩘⩚⩜⩞⩟⩠⫛⊍▷⨝⟕⟖⟗" .
57-
" \ "⇵⟰⟱⤈⤉⤊⤋⤒⤓⥉⥌⥍⥏⥑⥔⥕⥘⥙⥜⥝⥠⥡⥣⥥⥮⥯↑↓"
58-
59-
" same as above, but with character ranges, for performance
60-
let s:binop_chars_extra = "\\U214B\\U2190-\\U2194\\U219A\\U219B\\U21A0\\U21A3\\U21A6\\U21AE\\U21CE\\U21CF\\U21D2\\U21D4\\U21F4-\\U21FF\\U2208-\\U220D\\U2213\\U2214\\U2217-\\U2219\\U221D\\U2224-\\U222A\\U2237\\U2238\\U223A\\U223B\\U223D\\U223E\\U2240-\\U228B\\U228D-\\U229C\\U229E-\\U22A3\\U22A9\\U22AC\\U22AE\\U22B0-\\U22B7\\U22BB-\\U22BD\\U22C4-\\U22C7\\U22C9-\\U22D3\\U22D5-\\U22ED\\U22F2-\\U22FF\\U25B7\\U27C8\\U27C9\\U27D1\\U27D2\\U27D5-\\U27D7\\U27F0\\U27F1\\U27F5-\\U27F7\\U27F7\\U27F9-\\U27FF\\U2900-\\U2918\\U291D-\\U2920\\U2944-\\U2970\\U29B7\\U29B8\\U29BC\\U29BE-\\U29C1\\U29E1\\U29E3-\\U29E5\\U29F4\\U29F6\\U29F7\\U29FA\\U29FB\\U2A07\\U2A08\\U2A1D\\U2A22-\\U2A2E\\U2A30-\\U2A3D\\U2A40-\\U2A45\\U2A4A-\\U2A58\\U2A5A-\\U2A63\\U2A66\\U2A67\\U2A6A-\\U2AD9\\U2ADB\\U2AF7-\\U2AFA\\U2B30-\\U2B44\\U2B47-\\U2B4C\\UFFE9-\\UFFEC"
61-
62-
" a Julia identifier, sort of
63-
let s:idregex = '\%([^' . s:nonidS_chars . '0-9!' . s:uniop_chars . s:binop_chars . '][^' . s:nonidS_chars . s:uniop_chars . s:binop_chars . s:binop_chars_extra . ']*\)'
64-
65-
let s:operators = '\%(' . '\.\%([-+*/^÷%|&!]\|//\|\\\|<<\|>>>\?\)\?=' .
66-
\ '\|' . '[:$<>]=\|||\|&&\||>\|<|\|<:\|>:\|::\|<<\|>>>\?\|//\|[-=]>\|\.\{3\}' .
67-
\ '\|' . '\.\?[' . s:uniop_chars . '!]' .
68-
\ '\|' . '\.\?[' . s:binop_chars . s:binop_chars_extra . ']' .
32+
" List of characters, up to \UFF, which cannot be used in identifiers.
33+
" (It includes operator characters; we don't consider them identifiers.)
34+
" This is used mostly in lookbehinds with `\@<=`, e.g. when we need to check
35+
" that that we're not in the middle of an identifier.
36+
" It doesn't include a few characters (spaces and all closing parentheses)
37+
" because those may or may not be valid in the lookbehind on a case-by-case
38+
" basis.
39+
let s:nonid_chars = '\U00-\U08' . '\U0A-\U1F' .
40+
\ '\U21-\U28' . '\U2A-\U2F' . '\U3A-\U40' . '\U5B-\U5E' . '\U60' . '\U7B\U7C' .
41+
\ '\U7E-\UA1' . '\UA7\UA8' . '\UAB-\UAD' . '\UAF\UB1\UB4' . '\UB6-\UB8' . '\UBB\UBF' . '\UD7\UF7'
42+
43+
" The complete list
44+
let s:nonidS_chars = '[:space:])\U5D}' . s:nonid_chars
45+
46+
47+
" List of all valid operator chars up to \UFF (NOTE: they must all be included
48+
" in s:nonidS_chars, so that if we include that, then this is redundant)
49+
" It does not include '!' since it can be used in an identifier.
50+
" The list contains the following characters: '%&*+-/<=>\\^|~¬±×÷'
51+
let s:op_chars = '\U25\U26\U2A\U2B\U2D\U2F\U3C-\U3E\U5C\U5E\U7C\U7E\UAC\UB1\UD7\UF7'
52+
53+
" List of all valid operator chars above \UFF
54+
" Written with ranges for performance reasons
55+
" The list contains the following characters: '…⁝⅋←↑→↓↔↚↛↜↝↞↠↢↣↤↦↩↪↫↬↮↶↷↺↻↼↽⇀⇁⇄⇆⇇⇉⇋⇌⇍⇎⇏⇐⇒⇔⇚⇛⇜⇝⇠⇢⇴⇵⇶⇷⇸⇹⇺⇻⇼⇽⇾⇿∈∉∊∋∌∍∓∔∗∘∙√∛∜∝∤∥∦∧∨∩∪∷∸∺∻∽∾≀≁≂≃≄≅≆≇≈≉≊≋≌≍≎≏≐≑≒≓≔≕≖≗≘≙≚≛≜≝≞≟≠≡≢≣≤≥≦≧≨≩≪≫≬≭≮≯≰≱≲≳≴≵≶≷≸≹≺≻≼≽≾≿⊀⊁⊂⊃⊄⊅⊆⊇⊈⊉⊊⊋⊍⊎⊏⊐⊑⊒⊓⊔⊕⊖⊗⊘⊙⊚⊛⊜⊞⊟⊠⊡⊢⊣⊩⊬⊮⊰⊱⊲⊳⊴⊵⊶⊷⊻⊼⊽⋄⋅⋆⋇⋉⋊⋋⋌⋍⋎⋏⋐⋑⋒⋓⋕⋖⋗⋘⋙⋚⋛⋜⋝⋞⋟⋠⋡⋢⋣⋤⋥⋦⋧⋨⋩⋪⋫⋬⋭⋮⋯⋰⋱⋲⋳⋴⋵⋶⋷⋸⋹⋺⋻⋼⋽⋾⋿⌿▷⟂⟈⟉⟑⟒⟕⟖⟗⟰⟱⟵⟶⟷⟹⟺⟻⟼⟽⟾⟿⤀⤁⤂⤃⤄⤅⤆⤇⤈⤉⤊⤋⤌⤍⤎⤏⤐⤑⤒⤓⤔⤕⤖⤗⤘⤝⤞⤟⤠⥄⥅⥆⥇⥈⥉⥊⥋⥌⥍⥎⥏⥐⥑⥒⥓⥔⥕⥖⥗⥘⥙⥚⥛⥜⥝⥞⥟⥠⥡⥢⥣⥤⥥⥦⥧⥨⥩⥪⥫⥬⥭⥮⥯⥰⦷⦸⦼⦾⦿⧀⧁⧡⧣⧤⧥⧴⧶⧷⧺⧻⨇⨈⨝⨟⨢⨣⨤⨥⨦⨧⨨⨩⨪⨫⨬⨭⨮⨰⨱⨲⨳⨴⨵⨶⨷⨸⨹⨺⨻⨼⨽⩀⩁⩂⩃⩄⩅⩊⩋⩌⩍⩎⩏⩐⩑⩒⩓⩔⩕⩖⩗⩘⩚⩛⩜⩝⩞⩟⩠⩡⩢⩣⩦⩧⩪⩫⩬⩭⩮⩯⩰⩱⩲⩳⩴⩵⩶⩷⩸⩹⩺⩻⩼⩽⩾⩿⪀⪁⪂⪃⪄⪅⪆⪇⪈⪉⪊⪋⪌⪍⪎⪏⪐⪑⪒⪓⪔⪕⪖⪗⪘⪙⪚⪛⪜⪝⪞⪟⪠⪡⪢⪣⪤⪥⪦⪧⪨⪩⪪⪫⪬⪭⪮⪯⪰⪱⪲⪳⪴⪵⪶⪷⪸⪹⪺⪻⪼⪽⪾⪿⫀⫁⫂⫃⫄⫅⫆⫇⫈⫉⫊⫋⫌⫍⫎⫏⫐⫑⫒⫓⫔⫕⫖⫗⫘⫙⫛⫷⫸⫹⫺⬰⬱⬲⬳⬴⬵⬶⬷⬸⬹⬺⬻⬼⬽⬾⬿⭀⭁⭂⭃⭄⭇⭈⭉⭊⭋⭌←↑→↓'
56+
let s:op_chars_wc = '\U2026\U205D\U214B\U2190-\U2194\U219A-\U219E\U21A0\U21A2-\U21A4\U21A6\U21A9-\U21AC\U21AE\U21B6\U21B7\U21BA-\U21BD\U21C0\U21C1\U21C4\U21C6\U21C7\U21C9\U21CB-\U21D0\U21D2\U21D4\U21DA-\U21DD\U21E0\U21E2\U21F4-\U21FF\U2208-\U220D\U2213\U2214\U2217-\U221D\U2224-\U222A\U2237\U2238\U223A\U223B\U223D\U223E\U2240-\U228B\U228D-\U229C\U229E-\U22A3\U22A9\U22AC\U22AE\U22B0-\U22B7\U22BB-\U22BD\U22C4-\U22C7\U22C9-\U22D3\U22D5-\U22FF\U233F\U25B7\U27C2\U27C8\U27C9\U27D1\U27D2\U27D5-\U27D7\U27F0\U27F1\U27F5-\U27F7\U27F9-\U27FF\U2900-\U2918\U291D-\U2920\U2944-\U2970\U29B7\U29B8\U29BC\U29BE-\U29C1\U29E1\U29E3-\U29E5\U29F4\U29F6\U29F7\U29FA\U29FB\U2A07\U2A08\U2A1D\U2A1F\U2A22-\U2A2E\U2A30-\U2A3D\U2A40-\U2A45\U2A4A-\U2A58\U2A5A-\U2A63\U2A66\U2A67\U2A6A-\U2AD9\U2ADB\U2AF7-\U2AFA\U2B30-\U2B44\U2B47-\U2B4C\UFFE9-\UFFEC'
57+
58+
" Operator characters (including '!' this time). When highlighting, we mostly
59+
" can just recognize one character at a time, since longer operators are
60+
" basically made up of characters from this list. An exception is the dot
61+
" character which must be treated a bit specially.
62+
let s:operators1 = '\.\?[!' . s:op_chars . s:op_chars_wc . ']'
63+
let s:operatorsd = '\.\.\.\?'
64+
65+
" Full operators regex, to be used when we actually need to recognize when an
66+
" operator has ended (e.g. in quoted symbols)
67+
let s:operators = '\%(' . '\.\%([-+*/^÷%|&⊻]\|//\|\\\|>>\|>>>\?\)\?=' .
68+
\ '\|' . '[:<>]=\|||\|&&\||>\|<|\|[<>:]:\|<<\|>>>\?\|//\|[-=]>\|\.\.\.\?' .
69+
\ '\|' . s:operators1 .
6970
\ '\)'
7071

72+
73+
" Characters that can be used to start an identifier. Above \UBF we don't
74+
" bother checking. (If a UTF8 operator is used, it will take precedence anyway.)
75+
let s:id_charsH = '\%([A-Za-z_\UA2-\UA6\UA9\UAA\UAE\UB0\UB5\UBA]\|[^\U00-\UBF]\)'
76+
" Characters that can appear in an identifier, starting in 2nd position. Above
77+
" \UBF we check for operators since we need to stop the identifier if one
78+
" appears. We don't check for invalid characters though.
79+
let s:id_charsW = '\%([0-9A-Za-z_!\UA2-\UA6\UA9\UAA\UAE-\UB0\UB2-\UB5\UB8-\UBA\UBC-\UBE]\|[^\U00-\UBF]\@=[^' . s:op_chars_wc . ']\)'
80+
81+
" A valid julia identifier, more or less
82+
let s:idregex = '\%(' . s:id_charsH . s:id_charsW . '*\)'
83+
84+
85+
7186
syn case match
7287

7388
syntax cluster juliaExpressions contains=@juliaParItems,@juliaStringItems,@juliaKeywordItems,@juliaBlocksItems,@juliaTypesItems,@juliaConstItems,@juliaMacroItems,@juliaSymbolItems,@juliaOperatorItems,@juliaNumberItems,@juliaCommentItems,@juliaErrorItems,@juliaSyntaxRegions
@@ -208,7 +223,7 @@ syntax match juliaConstNum display "\%(\<\%(\%(NaN\|Inf\)\%(16\|32\|64\)\?\|p
208223
" (This also tries to detect preceding number constants; it does so in a crude
209224
" way.)
210225
syntax match juliaPossibleEuler "" contains=juliaEuler
211-
exec 'syntax match juliaEuler contained "\%(\%(^\|[' . s:nonidS_chars . ']\|' . s:operators . '\)\%([.0-9eEf_]*\d\)\?\)\@'.s:d(80).'<=ℯ\ze\%($\|[' . s:nonidS_chars . ']\|' . s:operators . '\)"'
226+
exec 'syntax match juliaEuler contained "\%(\%(^\|[' . s:nonidS_chars . s:op_chars_wc . ']\)\%(.\?[0-9][.0-9eEf_]*\d\)\?\)\@'.s:d(80).'<=ℯ\ze[' . s:nonidS_chars . s:op_chars_wc . ']"'
212227
syntax match juliaConstBool display "\<\%(true\|false\)\>"
213228
syntax match juliaConstEnv display "\<\%(ARGS\|ENV\|ENDIAN_BOM\|LOAD_PATH\|VERSION\|PROGRAM_FILE\|DEPOT_PATH\)\>"
214229
syntax match juliaConstIO display "\<\%(std\%(out\|in\|err\)\|devnull\)\>"
@@ -266,10 +281,12 @@ exec 'syntax match juliaFloat contained "' . s:float_regex . '" contains=juli
266281
syntax match juliaComplexUnit display contained "\<im\>"
267282

268283
syntax match juliaRangeOperator display ":"
269-
exec 'syntax match juliaOperator "' . s:operators . '"'
284+
exec 'syntax match juliaOperator display "' . s:operators1 . '"'
285+
exec 'syntax match juliaOperator "' . s:operatorsd . '"'
286+
270287
exec 'syntax region juliaTernaryRegion matchgroup=juliaTernaryOperator start="\s\zs?\ze\s" skip="\%(:\(:\|[^:[:space:]'."'".'"({[]\+\s*\ze:\)\|\%(?\s*\)\@'.s:d(6).'<=:(\)" end=":" contains=@juliaExpressions,juliaErrorSemicol'
271288

272-
let s:interp_dollar = '\([' . s:nonidS_chars . s:uniop_chars . s:binop_chars . '!]\|^\)\@'.s:d(1).'<=\$'
289+
let s:interp_dollar = '\([' . s:nonidS_chars . s:op_chars_wc . '!]\|^\)\@'.s:d(1).'<=\$'
273290

274291
exec 'syntax match juliaDollarVar display contained "' . s:interp_dollar . s:idregex . '"'
275292
exec 'syntax region juliaDollarPar matchgroup=juliaDollarVar contained start="' .s:interp_dollar . '(" end=")" contains=@juliaExpressions'
@@ -281,7 +298,7 @@ syntax match juliaChar display "'\\x\x\{2\}'" contains=juliaHexEscapeChar
281298
syntax match juliaChar display "'\\u\x\{1,4\}'" contains=juliaUniCharSmall
282299
syntax match juliaChar display "'\\U\x\{1,8\}'" contains=juliaUniCharLarge
283300

284-
exec 'syntax match juliaCTransOperator "[[:space:]}' . s:nonid_chars . s:uniop_chars . s:binop_chars . '!]\@'.s:d(1).'<!\.\?' . "'" . 'ᵀ\?"'
301+
exec 'syntax match juliaCTransOperator "[[:space:]}' . s:nonid_chars . s:op_chars_wc . '!]\@'.s:d(1).'<!\.\?' . "'" . 'ᵀ\?"'
285302

286303
" TODO: some of these might be specialized; the rest could be just left to the
287304
" generic juliaStringPrefixed fallback
@@ -332,8 +349,8 @@ syntax match juliaPrintfFmt display contained "\\%%"hs=s+1
332349
" (for performance reasons)
333350
syntax match juliaPossibleSymbol transparent ":\ze[^:]" contains=juliaSymbol,juliaQuotedParBlock,juliaQuotedQMarkPar,juliaColon
334351

335-
let s:quotable = '\%(' . s:idregex . '\|?\|' . s:operators . '\|' . s:float_regex . '\|' . s:int_regex . '\)'
336-
let s:quoting_colon = '\%(\%(^\s*\|\s\{6,\}\|[' . s:nonid_chars . s:uniop_chars . s:binop_chars . ']\s*\)\@'.s:d(6).'<=\|\%(\<\%(return\|if\|else\%(if\)\?\|while\|try\|begin\)\s\+\)\@'.s:d(9).'<=\)\zs:'
352+
let s:quotable = '\%(' . s:idregex . '\|' . s:operators . '\|[?.]\|' . s:float_regex . '\|' . s:int_regex . '\)'
353+
let s:quoting_colon = '\%(\%(^\s*\|\s\{6,\}\|[' . s:nonid_chars . s:op_chars_wc . ']\s*\)\@'.s:d(6).'<=\|\%(\<\%(return\|if\|else\%(if\)\?\|while\|try\|begin\)\s\+\)\@'.s:d(9).'<=\)\zs:'
337354
let s:quoting_colonS = '\s\@'.s:d(1).'<=:'
338355

339356
" note: juliaSymbolS only works within whitespace-sensitive contexts,

0 commit comments

Comments
 (0)