diff --git a/src/main/java/com/hubspot/jinjava/LegacyOverrides.java b/src/main/java/com/hubspot/jinjava/LegacyOverrides.java index bd3732455..b158ef918 100644 --- a/src/main/java/com/hubspot/jinjava/LegacyOverrides.java +++ b/src/main/java/com/hubspot/jinjava/LegacyOverrides.java @@ -32,6 +32,7 @@ public interface LegacyOverrides extends WithLegacyOverrides { .withAllowAdjacentTextNodes(true) .withUseTrimmingForNotesAndExpressions(true) .withKeepNullableLoopValues(true) + .withHandleBackslashInQuotesOnly(true) .build(); @Value.Default @@ -79,6 +80,23 @@ default boolean isKeepNullableLoopValues() { return false; } + /** + * When {@code true}, the token scanner treats backslash as an escape character + * only inside quoted string literals, leaving bare backslashes outside quotes + * untouched for the expression parser (JUEL) to handle. This matches the + * behaviour of Python's Jinja2, where the template scanner is not responsible + * for backslash interpretation at all. + * + *
When {@code false} (the default), the scanner consumes a backslash and + * the following character unconditionally, regardless of quote context. This + * is the legacy Jinjava behaviour, which prevents closing delimiters from + * being recognized after a backslash but diverges from Jinja2. + */ + @Value.Default + default boolean isHandleBackslashInQuotesOnly() { + return false; + } + class Builder extends ImmutableLegacyOverrides.Builder {} static Builder newBuilder() { diff --git a/src/main/java/com/hubspot/jinjava/tree/parse/ExpressionToken.java b/src/main/java/com/hubspot/jinjava/tree/parse/ExpressionToken.java index d8d9996d5..1c0d679c0 100644 --- a/src/main/java/com/hubspot/jinjava/tree/parse/ExpressionToken.java +++ b/src/main/java/com/hubspot/jinjava/tree/parse/ExpressionToken.java @@ -54,7 +54,14 @@ public int getType() { @Override protected void parse() { - this.expr = WhitespaceUtils.unwrap(image, "{{", "}}"); + // Use the symbols-derived delimiter strings instead of the hardcoded "{{" / "}}" + // so that custom delimiters (e.g. "\VAR{" / "}") are stripped correctly. + this.expr = + WhitespaceUtils.unwrap( + image, + getSymbols().getExpressionStart(), + getSymbols().getExpressionEnd() + ); this.expr = handleTrim(expr); this.expr = StringUtils.trimToEmpty(this.expr); } diff --git a/src/main/java/com/hubspot/jinjava/tree/parse/NoteToken.java b/src/main/java/com/hubspot/jinjava/tree/parse/NoteToken.java index 3f5360e67..450f9ccbd 100644 --- a/src/main/java/com/hubspot/jinjava/tree/parse/NoteToken.java +++ b/src/main/java/com/hubspot/jinjava/tree/parse/NoteToken.java @@ -48,8 +48,11 @@ public int getType() { */ @Override protected void parse() { - if (image.length() > 4) { // {# #} - handleTrim(image.substring(2, image.length() - 2)); + int startLen = getSymbols().getCommentStartLength(); + int endLen = getSymbols().getCommentEndLength(); + + if (image.length() > startLen + endLen) { + handleTrim(image.substring(startLen, image.length() - endLen)); } content = ""; } diff --git a/src/main/java/com/hubspot/jinjava/tree/parse/StringTokenScannerSymbols.java b/src/main/java/com/hubspot/jinjava/tree/parse/StringTokenScannerSymbols.java new file mode 100644 index 000000000..242abd241 --- /dev/null +++ b/src/main/java/com/hubspot/jinjava/tree/parse/StringTokenScannerSymbols.java @@ -0,0 +1,269 @@ +/********************************************************************** + * Copyright (c) 2014 HubSpot Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **********************************************************************/ +package com.hubspot.jinjava.tree.parse; + +/** + * A {@link TokenScannerSymbols} implementation that supports arbitrary multi-character + * delimiter strings, addressing + * issue #195. + * + *
Unlike {@link DefaultTokenScannerSymbols}, which is constrained to single-character + * prefixes and postfixes, this class allows any non-empty string for each of the six + * delimiter roles. The delimiters do not need to share a common prefix character. + * + *
{@link TokenScanner} detects this class via {@link #isStringBased()} and activates + * a string-matching scan path. {@link ExpressionToken}, {@link TagToken}, and + * {@link NoteToken} use the length accessors on {@link TokenScannerSymbols} (e.g. + * {@link #getExpressionStartLength()}) to strip delimiters correctly regardless of length. + * + *
The single-character abstract methods inherited from {@link TokenScannerSymbols} + * return private Unicode Private-Use-Area sentinel values. These are used only as + * token-kind discriminators inside {@link Token#newToken} and must never be used for + * scanning template text. + * + *
{@code
+ * JinjavaConfig config = JinjavaConfig.newBuilder()
+ * .withTokenScannerSymbols(StringTokenScannerSymbols.builder()
+ * .withVariableStartString("\\VAR{")
+ * .withVariableEndString("}")
+ * .withBlockStartString("\\BLOCK{")
+ * .withBlockEndString("}")
+ * .withCommentStartString("\\#{")
+ * .withCommentEndString("}")
+ * .build())
+ * .build();
+ * }
+ */
+public class StringTokenScannerSymbols extends TokenScannerSymbols {
+
+ private static final long serialVersionUID = 1L;
+
+ // ── Internal sentinel chars ────────────────────────────────────────────────
+ // Unicode Private Use Area values — guaranteed never to appear in real template
+ // text, so Token.newToken()'s if-chain dispatches to the right Token subclass.
+ static final char SENTINEL_FIXED = '\uE000';
+ static final char SENTINEL_NOTE = '\uE001';
+ static final char SENTINEL_TAG = '\uE002';
+ static final char SENTINEL_EXPR_START = '\uE003';
+ static final char SENTINEL_EXPR_END = '\uE004';
+ static final char SENTINEL_PREFIX = '\uE005'; // unused for scanning
+ static final char SENTINEL_POSTFIX = '\uE006'; // unused for scanning
+ static final char SENTINEL_NEWLINE = '\n'; // real newline for line tracking
+ static final char SENTINEL_TRIM = '-'; // real trim char
+
+ // ── The configured string delimiters ──────────────────────────────────────
+ private final String variableStartString;
+ private final String variableEndString;
+ private final String blockStartString;
+ private final String blockEndString;
+ private final String commentStartString;
+ private final String commentEndString;
+ // Optional; null means disabled.
+ private final String lineStatementPrefix;
+ private final String lineCommentPrefix;
+
+ private StringTokenScannerSymbols(Builder builder) {
+ this.variableStartString = builder.variableStartString;
+ this.variableEndString = builder.variableEndString;
+ this.blockStartString = builder.blockStartString;
+ this.blockEndString = builder.blockEndString;
+ this.commentStartString = builder.commentStartString;
+ this.commentEndString = builder.commentEndString;
+ this.lineStatementPrefix = builder.lineStatementPrefix;
+ this.lineCommentPrefix = builder.lineCommentPrefix;
+ }
+
+ // ── Abstract char contract — returns sentinels only ───────────────────────
+
+ @Override
+ public char getPrefixChar() {
+ return SENTINEL_PREFIX;
+ }
+
+ @Override
+ public char getPostfixChar() {
+ return SENTINEL_POSTFIX;
+ }
+
+ @Override
+ public char getFixedChar() {
+ return SENTINEL_FIXED;
+ }
+
+ @Override
+ public char getNoteChar() {
+ return SENTINEL_NOTE;
+ }
+
+ @Override
+ public char getTagChar() {
+ return SENTINEL_TAG;
+ }
+
+ @Override
+ public char getExprStartChar() {
+ return SENTINEL_EXPR_START;
+ }
+
+ @Override
+ public char getExprEndChar() {
+ return SENTINEL_EXPR_END;
+ }
+
+ @Override
+ public char getNewlineChar() {
+ return SENTINEL_NEWLINE;
+ }
+
+ @Override
+ public char getTrimChar() {
+ return SENTINEL_TRIM;
+ }
+
+ // ── String-level getters: MUST override the base-class lazy cache ──────────
+ // The base class builds these from the char methods above, which would produce
+ // garbage sentinel strings. We override them to return the real delimiters so
+ // that ExpressionToken, TagToken, and NoteToken strip content correctly.
+
+ @Override
+ public String getExpressionStart() {
+ return variableStartString;
+ }
+
+ @Override
+ public String getExpressionEnd() {
+ return variableEndString;
+ }
+
+ @Override
+ public String getExpressionStartWithTag() {
+ return blockStartString;
+ }
+
+ @Override
+ public String getExpressionEndWithTag() {
+ return blockEndString;
+ }
+
+ @Override
+ public String getOpeningComment() {
+ return commentStartString;
+ }
+
+ @Override
+ public String getClosingComment() {
+ return commentEndString;
+ }
+
+ @Override
+ public String getLineStatementPrefix() {
+ return lineStatementPrefix;
+ }
+
+ @Override
+ public String getLineCommentPrefix() {
+ return lineCommentPrefix;
+ }
+
+ // ── isStringBased flag ────────────────────────────────────────────────────
+
+ @Override
+ public boolean isStringBased() {
+ return true;
+ }
+
+ // ── Builder ────────────────────────────────────────────────────────────────
+
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ public static final class Builder {
+
+ // Defaults mirror the standard Jinja2 delimiters, so building with no
+ // overrides behaves identically to DefaultTokenScannerSymbols.
+ private String variableStartString = "{{";
+ private String variableEndString = "}}";
+ private String blockStartString = "{%";
+ private String blockEndString = "%}";
+ private String commentStartString = "{#";
+ private String commentEndString = "#}";
+ private String lineStatementPrefix = null; // disabled by default
+ private String lineCommentPrefix = null; // disabled by default
+
+ public Builder withVariableStartString(String s) {
+ this.variableStartString = requireNonEmpty(s, "variableStartString");
+ return this;
+ }
+
+ public Builder withVariableEndString(String s) {
+ this.variableEndString = requireNonEmpty(s, "variableEndString");
+ return this;
+ }
+
+ public Builder withBlockStartString(String s) {
+ this.blockStartString = requireNonEmpty(s, "blockStartString");
+ return this;
+ }
+
+ public Builder withBlockEndString(String s) {
+ this.blockEndString = requireNonEmpty(s, "blockEndString");
+ return this;
+ }
+
+ public Builder withCommentStartString(String s) {
+ this.commentStartString = requireNonEmpty(s, "commentStartString");
+ return this;
+ }
+
+ public Builder withCommentEndString(String s) {
+ this.commentEndString = requireNonEmpty(s, "commentEndString");
+ return this;
+ }
+
+ /**
+ * Sets the line statement prefix (e.g. {@code "%%"}). A line beginning with
+ * this prefix is treated as a block tag, equivalent to wrapping its content
+ * in the configured block delimiters. Pass {@code null} to disable (default).
+ */
+ public Builder withLineStatementPrefix(String s) {
+ this.lineStatementPrefix = s;
+ return this;
+ }
+
+ /**
+ * Sets the line comment prefix (e.g. {@code "%#"}). A line beginning with
+ * this prefix is stripped entirely from the output. Pass {@code null} to
+ * disable (default).
+ */
+ public Builder withLineCommentPrefix(String s) {
+ this.lineCommentPrefix = s;
+ return this;
+ }
+
+ public StringTokenScannerSymbols build() {
+ return new StringTokenScannerSymbols(this);
+ }
+
+ private static String requireNonEmpty(String value, String name) {
+ if (value == null || value.isEmpty()) {
+ throw new IllegalArgumentException(name + " must not be null or empty");
+ }
+ return value;
+ }
+ }
+}
diff --git a/src/main/java/com/hubspot/jinjava/tree/parse/TagToken.java b/src/main/java/com/hubspot/jinjava/tree/parse/TagToken.java
index a737dd96c..0c500c145 100644
--- a/src/main/java/com/hubspot/jinjava/tree/parse/TagToken.java
+++ b/src/main/java/com/hubspot/jinjava/tree/parse/TagToken.java
@@ -54,7 +54,10 @@ public int getType() {
*/
@Override
protected void parse() {
- if (image.length() < 4) {
+ int startLen = getSymbols().getTagStartLength();
+ int endLen = getSymbols().getTagEndLength();
+
+ if (image.length() < startLen + endLen) {
throw new TemplateSyntaxException(
image,
"Malformed tag token",
@@ -63,7 +66,7 @@ protected void parse() {
);
}
- content = image.substring(2, image.length() - 2);
+ content = image.substring(startLen, image.length() - endLen);
content = handleTrim(content);
int nameStart = -1, pos = 0, len = content.length();
diff --git a/src/main/java/com/hubspot/jinjava/tree/parse/TokenScanner.java b/src/main/java/com/hubspot/jinjava/tree/parse/TokenScanner.java
index 7e53b295a..56be51c60 100644
--- a/src/main/java/com/hubspot/jinjava/tree/parse/TokenScanner.java
+++ b/src/main/java/com/hubspot/jinjava/tree/parse/TokenScanner.java
@@ -42,6 +42,29 @@ public class TokenScanner extends AbstractIteratorMatches Python Jinja2 semantics exactly: + *
Neither form affects the newline that ended the preceding line. + */ + private Token handleLineComment() { + int afterPrefix = currPost + lineCommentPrefix.length; + boolean hasTrimModifier = + afterPrefix < length && is[afterPrefix] == symbols.getTrimChar(); + + // Flush buffered text up to (but not including) the current line's indentation. + // The preceding newline is always preserved regardless of the trim modifier. + Token pending = flushTextBefore(lineIndentStart(currPost)); + + // Advance past the comment content to the end of the line. + int end = afterPrefix; + while (end < length && is[end] != '\n') { + end++; + } + + if (hasTrimModifier) { + // %#- : strip trailing \n too, leaving no blank line. + int next = end; + if (next < length && is[next] == '\n') { + next++; + currLine++; + lastNewlinePos = next; + } + tokenStart = next; + currPost = next; + } else { + // %# : leave the trailing \n in place so it renders as a blank line. + tokenStart = end; + currPost = end; + } + + return (pending != null) ? pending : DELIMITER_MATCHED; + } + + /** + * Returns the position of the first character of the indentation on the line + * containing {@code pos} — i.e. the position just after the preceding newline + * (or 0 if at the start of input). Used to exclude leading horizontal whitespace + * from the text token flushed before a line prefix match. + */ + private int lineIndentStart(int pos) { + int p = pos - 1; + while (p >= 0 && (is[p] == ' ' || is[p] == '\t')) { + p--; + } + // p is now at the newline before the indentation, or at -1. + return p + 1; + } + + // ── One-slot stash for the synthetic tag after a line-statement ───────── + // When a line-statement prefix is found and there is pending text to flush + // first, we return the text token immediately and stash the synthetic tag + // here so computeNext() picks it up on the very next call. + private Token pendingToken = null; + + @Override + protected Token computeNext() { + // Drain any stashed token first. + if (pendingToken != null) { + Token t = pendingToken; + pendingToken = null; + return t; + } + + Token t = getNextToken(); + if (t == null) { + return endOfData(); + } + return t; + } + + // ── Helpers ─────────────────────────────────────────────────────────────── + + /** + * Returns true when {@code pos} is at the start of a line — i.e. it is either + * the very first character of the input, or the character immediately after a + * newline (accounting for any leading whitespace that lstripBlocks may allow). + */ + private boolean isStartOfLine(int pos) { + if (pos == 0) { + return true; + } + // Walk backwards past any horizontal whitespace (spaces/tabs). + int p = pos - 1; + while (p >= 0 && (is[p] == ' ' || is[p] == '\t')) { + p--; + } + // True if we hit the beginning of the input or a newline. + return p < 0 || is[p] == '\n'; + } + + /** + * If {@code is[tokenStart..upTo)} contains un-emitted plain text, captures it + * as a TEXT token and returns it. Returns {@code null} for zero-length regions. + * Does NOT update {@code tokenStart} — the caller sets it after returning. + */ + private Token flushTextBefore(int upTo) { + int textLen = upTo - tokenStart; + if (textLen <= 0) { + return null; + } + lastStart = tokenStart; + tokenLength = textLen; + return emitStringToken(symbols.getFixed()); + } + + /** Returns the closing delimiter for the currently open block kind. */ + private char[] closingDelimFor(int currentKind) { + if (currentKind == symbols.getExprStart()) { + return varEnd; + } + if (currentKind == symbols.getTag()) { + return blkEnd; + } + if (currentKind == symbols.getNote()) { + return cmtEnd; + } + return null; + } + + /** + * Constructs a token from {@code lastStart}/{@code tokenLength}, then applies + * trimBlocks and raw-mode post-processing identical to the char-based path. + */ + private Token emitStringToken(int kind) { + Token t = Token.newToken( + kind, + symbols, + whitespaceControlParser, + String.valueOf(is, lastStart, tokenLength), + currLine, + lastStart - lastNewlinePos + 1 + ); + + if ( + (t instanceof TagToken || t instanceof NoteToken) && + config.isTrimBlocks() && + currPost < length && + is[currPost] == '\n' + ) { + lastNewlinePos = currPost + 1; + ++currPost; + ++tokenStart; + } + + if (t instanceof TagToken) { + TagToken tt = (TagToken) t; + if ("raw".equals(tt.getTagName())) { + inRaw = 1; + return tt; + } else if ("endraw".equals(tt.getTagName())) { + inRaw = 0; + return tt; + } + } + + if (inRaw > 0 && t.getType() != symbols.getFixed()) { + return Token.newToken( + symbols.getFixed(), + symbols, + whitespaceControlParser, + t.image, + currLine, + lastStart - lastNewlinePos + 1 + ); + } + + return t; + } + + /** + * Emits whatever remains at end-of-input. + * Advances {@code tokenStart = currPost} so subsequent calls return null. + */ + private Token getEndTokenStringBased() { + tokenLength = currPost - tokenStart; + lastStart = tokenStart; + tokenStart = currPost; + int type = symbols.getFixed(); + if (inComment > 0) { + type = symbols.getNote(); + } else if (inBlock > 0) { + return new UnclosedToken( + String.valueOf(is, lastStart, tokenLength), + currLine, + lastStart - lastNewlinePos + 1, + symbols, + whitespaceControlParser + ); + } + return Token.newToken( + type, + symbols, + whitespaceControlParser, + String.valueOf(is, lastStart, tokenLength), + currLine, + lastStart - lastNewlinePos + 1 + ); + } + + /** Returns true if {@code is[pos..]} starts with {@code pattern}. */ + private boolean regionMatches(int pos, char[] pattern) { + if (pos + pattern.length > length) { + return false; + } + for (int i = 0; i < pattern.length; i++) { + if (is[pos + i] != pattern[i]) { + return false; + } + } + return true; + } + + // ── Original char-based scanning path (completely unchanged) ────────────── + + private Token getNextTokenCharBased() { char c; while (currPost < length) { c = is[currPost++]; @@ -75,10 +631,14 @@ private Token getNextToken() { } if (inBlock > 0) { - if (c == '\\') { + if (c == '\\' && !backslashInQuotesOnly) { ++currPost; continue; } else if (inQuote != 0) { + if (c == '\\') { + ++currPost; + continue; + } if (inQuote == c) { inQuote = 0; } @@ -311,15 +871,4 @@ private boolean matchToken(char kind) { return kind == tokenKind; } } - - @Override - protected Token computeNext() { - Token t = getNextToken(); - - if (t == null) { - return endOfData(); - } - - return t; - } } diff --git a/src/main/java/com/hubspot/jinjava/tree/parse/TokenScannerSymbols.java b/src/main/java/com/hubspot/jinjava/tree/parse/TokenScannerSymbols.java index 771dbda41..638220853 100644 --- a/src/main/java/com/hubspot/jinjava/tree/parse/TokenScannerSymbols.java +++ b/src/main/java/com/hubspot/jinjava/tree/parse/TokenScannerSymbols.java @@ -129,4 +129,90 @@ public static boolean isNoteTagOrExprChar(TokenScannerSymbols symbols, char c) { c == symbols.getNote() || c == symbols.getTag() || c == symbols.getExprStartChar() ); } + + // ── New API ──────────────────────────────────────────────────────────────── + + /** + * Returns {@code true} if this instance uses arbitrary string delimiters that + * require the string-matching scan path in {@link TokenScanner}. + * + *
The default returns {@code false}, so all existing subclasses are unaffected. + * {@link StringTokenScannerSymbols} overrides this to return {@code true}. + */ + public boolean isStringBased() { + return false; + } + + /** + * Length of the variable/expression opening delimiter (e.g. 2 for {@code "{{"}), + * used by {@link ExpressionToken#parse()} instead of the hardcoded constant 2. + */ + public int getExpressionStartLength() { + return getExpressionStart().length(); + } + + /** + * Length of the variable/expression closing delimiter (e.g. 2 for {@code "}}"}), + * used by {@link ExpressionToken#parse()} instead of the hardcoded constant 2. + */ + public int getExpressionEndLength() { + return getExpressionEnd().length(); + } + + /** + * Length of the block/tag opening delimiter (e.g. 2 for {@code "{%"}), + * used by {@link TagToken#parse()} instead of the hardcoded constant 2. + */ + public int getTagStartLength() { + return getExpressionStartWithTag().length(); + } + + /** + * Length of the block/tag closing delimiter (e.g. 2 for {@code "%}"}), + * used by {@link TagToken#parse()} instead of the hardcoded constant 2. + */ + public int getTagEndLength() { + return getExpressionEndWithTag().length(); + } + + /** + * Length of the comment opening delimiter (e.g. 2 for {@code "{#"}), + * used by {@link NoteToken#parse()} instead of the hardcoded constant 2. + */ + public int getCommentStartLength() { + return getOpeningComment().length(); + } + + /** + * Length of the comment closing delimiter (e.g. 2 for {@code "#}"}), + * used by {@link NoteToken#parse()} instead of the hardcoded constant 2. + */ + public int getCommentEndLength() { + return getClosingComment().length(); + } + + /** + * Optional line statement prefix (e.g. {@code "%%"}). When non-null, any line + * that begins with this prefix (after optional horizontal whitespace) is treated + * as a block tag statement, equivalent to wrapping its content in the block + * delimiters. Returns {@code null} by default (feature disabled). + * + *
Only used by {@link StringTokenScannerSymbols}; has no effect in the + * char-based path. + */ + public String getLineStatementPrefix() { + return null; + } + + /** + * Optional line comment prefix (e.g. {@code "%#"}). When non-null, any line + * that begins with this prefix (after optional horizontal whitespace) is stripped + * entirely from the output. Returns {@code null} by default (feature disabled). + * + *
Only used by {@link StringTokenScannerSymbols}; has no effect in the
+ * char-based path.
+ */
+ public String getLineCommentPrefix() {
+ return null;
+ }
}
diff --git a/src/test/java/com/hubspot/jinjava/tree/parse/BackslashHandlingTest.java b/src/test/java/com/hubspot/jinjava/tree/parse/BackslashHandlingTest.java
new file mode 100644
index 000000000..de5c9cc7b
--- /dev/null
+++ b/src/test/java/com/hubspot/jinjava/tree/parse/BackslashHandlingTest.java
@@ -0,0 +1,238 @@
+package com.hubspot.jinjava.tree.parse;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import com.google.common.collect.ImmutableMap;
+import com.hubspot.jinjava.Jinjava;
+import com.hubspot.jinjava.JinjavaConfig;
+import com.hubspot.jinjava.LegacyOverrides;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import org.junit.Test;
+
+/**
+ * Tests for backslash handling inside block/variable/comment delimiters,
+ * covering both the char-based (DefaultTokenScannerSymbols) and string-based
+ * (StringTokenScannerSymbols) scanning paths, with the
+ * {@link LegacyOverrides#isHandleBackslashInQuotesOnly()} flag both off (legacy)
+ * and on (Jinja2-compatible).
+ */
+public class BackslashHandlingTest {
+
+ // ── Jinjava instances ──────────────────────────────────────────────────────
+
+ /** Char-based scanner, legacy backslash behaviour (flag = false). */
+ private static Jinjava charLegacy() {
+ return new Jinjava(
+ JinjavaConfig
+ .newBuilder()
+ .withLegacyOverrides(LegacyOverrides.newBuilder().build())
+ .build()
+ );
+ }
+
+ /** Char-based scanner, Jinja2-compatible backslash behaviour (flag = true). */
+ private static Jinjava charNew() {
+ return new Jinjava(
+ JinjavaConfig
+ .newBuilder()
+ .withLegacyOverrides(
+ LegacyOverrides.newBuilder().withHandleBackslashInQuotesOnly(true).build()
+ )
+ .build()
+ );
+ }
+
+ /** String-based scanner, legacy backslash behaviour (flag = false). */
+ private static Jinjava stringLegacy() {
+ return new Jinjava(
+ JinjavaConfig
+ .newBuilder()
+ .withTokenScannerSymbols(StringTokenScannerSymbols.builder().build())
+ .withLegacyOverrides(LegacyOverrides.newBuilder().build())
+ .build()
+ );
+ }
+
+ /** String-based scanner, Jinja2-compatible backslash behaviour (flag = true). */
+ private static Jinjava stringNew() {
+ return new Jinjava(
+ JinjavaConfig
+ .newBuilder()
+ .withTokenScannerSymbols(StringTokenScannerSymbols.builder().build())
+ .withLegacyOverrides(
+ LegacyOverrides.newBuilder().withHandleBackslashInQuotesOnly(true).build()
+ )
+ .build()
+ );
+ }
+
+ // ── Backslash inside a quoted string ──────────────────────────────────────
+ //
+ // Both legacy and new behaviour must handle escaped quotes inside strings
+ // correctly — \" should not close the string.
+
+ @Test
+ public void charLegacy_escapedQuoteInsideString() {
+ assertThat(charLegacy().render("{{ \"he said \\\"hi\\\"\" }}", new HashMap<>()))
+ .isEqualTo("he said \"hi\"");
+ }
+
+ @Test
+ public void charNew_escapedQuoteInsideString() {
+ assertThat(charNew().render("{{ \"he said \\\"hi\\\"\" }}", new HashMap<>()))
+ .isEqualTo("he said \"hi\"");
+ }
+
+ @Test
+ public void stringLegacy_escapedQuoteInsideString() {
+ assertThat(stringLegacy().render("{{ \"he said \\\"hi\\\"\" }}", new HashMap<>()))
+ .isEqualTo("he said \"hi\"");
+ }
+
+ @Test
+ public void stringNew_escapedQuoteInsideString() {
+ assertThat(stringNew().render("{{ \"he said \\\"hi\\\"\" }}", new HashMap<>()))
+ .isEqualTo("he said \"hi\"");
+ }
+
+ // ── Backslash outside a quoted string ─────────────────────────────────────
+ //
+ // Template under test: "prefix {{ x \}} suffix }}"
+ //
+ // We test the scanner token structure directly rather than going through
+ // render(), because the expression "x \..." is always a JUEL lexical error
+ // regardless of mode. What differs between modes is which token boundaries
+ // the scanner produces — and that is what we assert on.
+ //
+ // Legacy (backslashInQuotesOnly = false):
+ // Scanner consumes '\' and skips the following '}'. The first '}}' is not
+ // recognized as a closer. The block runs until the second '}}', so the
+ // token sequence is:
+ // TEXT "prefix " | EXPR "{{ x \}} suffix }}"
+ //
+ // New (backslashInQuotesOnly = true):
+ // Scanner leaves '\' untouched. The first '}}' is recognized as the closer.
+ // The token sequence is:
+ // TEXT "prefix " | EXPR "{{ x \}}" | TEXT " suffix }}"
+
+ private static final String BACKSLASH_TEMPLATE = "prefix {{ x \\}} suffix }}";
+
+ @Test
+ public void charLegacy_backslashConsumesOneDelimiterChar_blockRunsToSecondCloser() {
+ List