Skip to content

Commit 7c6964a

Browse files
committed
Scanner treats backslash as escape character inside quoted strings only
1 parent 5017d43 commit 7c6964a

File tree

3 files changed

+271
-6
lines changed

3 files changed

+271
-6
lines changed

src/main/java/com/hubspot/jinjava/LegacyOverrides.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ public interface LegacyOverrides extends WithLegacyOverrides {
3232
.withAllowAdjacentTextNodes(true)
3333
.withUseTrimmingForNotesAndExpressions(true)
3434
.withKeepNullableLoopValues(true)
35+
.withHandleBackslashInQuotesOnly(true)
3536
.build();
3637

3738
@Value.Default
@@ -79,6 +80,23 @@ default boolean isKeepNullableLoopValues() {
7980
return false;
8081
}
8182

83+
/**
84+
* When {@code true}, the token scanner treats backslash as an escape character
85+
* only inside quoted string literals, leaving bare backslashes outside quotes
86+
* untouched for the expression parser (JUEL) to handle. This matches the
87+
* behaviour of Python's Jinja2, where the template scanner is not responsible
88+
* for backslash interpretation at all.
89+
*
90+
* <p>When {@code false} (the default), the scanner consumes a backslash and
91+
* the following character unconditionally, regardless of quote context. This
92+
* is the legacy Jinjava behaviour, which prevents closing delimiters from
93+
* being recognized after a backslash but diverges from Jinja2.
94+
*/
95+
@Value.Default
96+
default boolean isHandleBackslashInQuotesOnly() {
97+
return false;
98+
}
99+
82100
class Builder extends ImmutableLegacyOverrides.Builder {}
83101

84102
static Builder newBuilder() {

src/main/java/com/hubspot/jinjava/tree/parse/TokenScanner.java

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,11 @@ public class TokenScanner extends AbstractIterator<Token> {
5555
private final char[] lineStmtPrefix;
5656
private final char[] lineCommentPrefix;
5757

58+
// When true, backslash is treated as an escape character only inside quoted
59+
// string literals, matching Jinja2 behaviour. When false (legacy default),
60+
// the scanner consumes backslash + next char unconditionally.
61+
private final boolean backslashInQuotesOnly;
62+
5863
// Remembers where the current opening delimiter began so the emitted block/comment
5964
// token image starts from the opener (not the content), letting parse() strip the
6065
// correct number of delimiter characters from both ends.
@@ -84,6 +89,7 @@ public TokenScanner(String input, JinjavaConfig config) {
8489
config.getLegacyOverrides().isParseWhitespaceControlStrictly()
8590
? WhitespaceControlParser.STRICT
8691
: WhitespaceControlParser.LENIENT;
92+
backslashInQuotesOnly = config.getLegacyOverrides().isHandleBackslashInQuotesOnly();
8793

8894
if (stringBased) {
8995
varStart = symbols.getExpressionStart().toCharArray();
@@ -214,9 +220,7 @@ private Token scanInsideComment() {
214220
*/
215221
private Token scanInsideBlock(char c) {
216222
if (inQuote != 0) {
217-
// Inside a quoted string: a backslash escapes the next character so a
218-
// delimiter or quote character following it does not prematurely close
219-
// the block or the string.
223+
// Inside a quoted string: a backslash always escapes the next character.
220224
if (c == '\\') {
221225
currPost += (currPost + 1 < length) ? 2 : 1;
222226
return DELIMITER_MATCHED;
@@ -227,8 +231,9 @@ private Token scanInsideBlock(char c) {
227231
currPost++;
228232
return DELIMITER_MATCHED;
229233
}
230-
// Outside a quoted string: a backslash escapes the next character.
231-
if (c == '\\') {
234+
// Outside a quoted string: only consume the backslash if the legacy
235+
// flag is enabled; otherwise leave it for the expression parser.
236+
if (c == '\\' && !backslashInQuotesOnly) {
232237
currPost += (currPost + 1 < length) ? 2 : 1;
233238
return DELIMITER_MATCHED;
234239
}
@@ -626,10 +631,14 @@ private Token getNextTokenCharBased() {
626631
}
627632

628633
if (inBlock > 0) {
629-
if (c == '\\') {
634+
if (c == '\\' && !backslashInQuotesOnly) {
630635
++currPost;
631636
continue;
632637
} else if (inQuote != 0) {
638+
if (c == '\\') {
639+
++currPost;
640+
continue;
641+
}
633642
if (inQuote == c) {
634643
inQuote = 0;
635644
}
Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
package com.hubspot.jinjava.tree.parse;
2+
3+
import static org.assertj.core.api.Assertions.assertThat;
4+
5+
import com.google.common.collect.ImmutableMap;
6+
import com.hubspot.jinjava.Jinjava;
7+
import com.hubspot.jinjava.JinjavaConfig;
8+
import com.hubspot.jinjava.LegacyOverrides;
9+
import java.util.ArrayList;
10+
import java.util.HashMap;
11+
import java.util.List;
12+
import org.junit.Test;
13+
14+
/**
15+
* Tests for backslash handling inside block/variable/comment delimiters,
16+
* covering both the char-based (DefaultTokenScannerSymbols) and string-based
17+
* (StringTokenScannerSymbols) scanning paths, with the
18+
* {@link LegacyOverrides#isHandleBackslashInQuotesOnly()} flag both off (legacy)
19+
* and on (Jinja2-compatible).
20+
*/
21+
public class BackslashHandlingTest {
22+
23+
// ── Jinjava instances ──────────────────────────────────────────────────────
24+
25+
/** Char-based scanner, legacy backslash behaviour (flag = false). */
26+
private static Jinjava charLegacy() {
27+
return new Jinjava(
28+
JinjavaConfig
29+
.newBuilder()
30+
.withLegacyOverrides(LegacyOverrides.newBuilder().build())
31+
.build()
32+
);
33+
}
34+
35+
/** Char-based scanner, Jinja2-compatible backslash behaviour (flag = true). */
36+
private static Jinjava charNew() {
37+
return new Jinjava(
38+
JinjavaConfig
39+
.newBuilder()
40+
.withLegacyOverrides(
41+
LegacyOverrides.newBuilder().withHandleBackslashInQuotesOnly(true).build()
42+
)
43+
.build()
44+
);
45+
}
46+
47+
/** String-based scanner, legacy backslash behaviour (flag = false). */
48+
private static Jinjava stringLegacy() {
49+
return new Jinjava(
50+
JinjavaConfig
51+
.newBuilder()
52+
.withTokenScannerSymbols(StringTokenScannerSymbols.builder().build())
53+
.withLegacyOverrides(LegacyOverrides.newBuilder().build())
54+
.build()
55+
);
56+
}
57+
58+
/** String-based scanner, Jinja2-compatible backslash behaviour (flag = true). */
59+
private static Jinjava stringNew() {
60+
return new Jinjava(
61+
JinjavaConfig
62+
.newBuilder()
63+
.withTokenScannerSymbols(StringTokenScannerSymbols.builder().build())
64+
.withLegacyOverrides(
65+
LegacyOverrides.newBuilder().withHandleBackslashInQuotesOnly(true).build()
66+
)
67+
.build()
68+
);
69+
}
70+
71+
// ── Backslash inside a quoted string ──────────────────────────────────────
72+
//
73+
// Both legacy and new behaviour must handle escaped quotes inside strings
74+
// correctly — \" should not close the string.
75+
76+
@Test
77+
public void charLegacy_escapedQuoteInsideString() {
78+
assertThat(charLegacy().render("{{ \"he said \\\"hi\\\"\" }}", new HashMap<>()))
79+
.isEqualTo("he said \"hi\"");
80+
}
81+
82+
@Test
83+
public void charNew_escapedQuoteInsideString() {
84+
assertThat(charNew().render("{{ \"he said \\\"hi\\\"\" }}", new HashMap<>()))
85+
.isEqualTo("he said \"hi\"");
86+
}
87+
88+
@Test
89+
public void stringLegacy_escapedQuoteInsideString() {
90+
assertThat(stringLegacy().render("{{ \"he said \\\"hi\\\"\" }}", new HashMap<>()))
91+
.isEqualTo("he said \"hi\"");
92+
}
93+
94+
@Test
95+
public void stringNew_escapedQuoteInsideString() {
96+
assertThat(stringNew().render("{{ \"he said \\\"hi\\\"\" }}", new HashMap<>()))
97+
.isEqualTo("he said \"hi\"");
98+
}
99+
100+
// ── Backslash outside a quoted string ─────────────────────────────────────
101+
//
102+
// Template under test: "prefix {{ x \}} suffix }}"
103+
//
104+
// We test the scanner token structure directly rather than going through
105+
// render(), because the expression "x \..." is always a JUEL lexical error
106+
// regardless of mode. What differs between modes is which token boundaries
107+
// the scanner produces — and that is what we assert on.
108+
//
109+
// Legacy (backslashInQuotesOnly = false):
110+
// Scanner consumes '\' and skips the following '}'. The first '}}' is not
111+
// recognized as a closer. The block runs until the second '}}', so the
112+
// token sequence is:
113+
// TEXT "prefix " | EXPR "{{ x \}} suffix }}"
114+
//
115+
// New (backslashInQuotesOnly = true):
116+
// Scanner leaves '\' untouched. The first '}}' is recognized as the closer.
117+
// The token sequence is:
118+
// TEXT "prefix " | EXPR "{{ x \}}" | TEXT " suffix }}"
119+
120+
private static final String BACKSLASH_TEMPLATE = "prefix {{ x \\}} suffix }}";
121+
122+
@Test
123+
public void charLegacy_backslashConsumesOneDelimiterChar_blockRunsToSecondCloser() {
124+
List<Token> tokens = scanAll(
125+
new TokenScanner(BACKSLASH_TEMPLATE, charLegacy().getGlobalConfig())
126+
);
127+
assertThat(tokens).hasSize(2);
128+
assertThat(tokens.get(0)).isInstanceOf(TextToken.class);
129+
assertThat(tokens.get(0).image).isEqualTo("prefix ");
130+
assertThat(tokens.get(1)).isInstanceOf(ExpressionToken.class);
131+
assertThat(tokens.get(1).image).isEqualTo("{{ x \\}} suffix }}");
132+
}
133+
134+
@Test
135+
public void charNew_backslashIgnored_blockClosesAtFirstDelimiter() {
136+
List<Token> tokens = scanAll(
137+
new TokenScanner(BACKSLASH_TEMPLATE, charNew().getGlobalConfig())
138+
);
139+
assertThat(tokens).hasSize(3);
140+
assertThat(tokens.get(0)).isInstanceOf(TextToken.class);
141+
assertThat(tokens.get(0).image).isEqualTo("prefix ");
142+
assertThat(tokens.get(1)).isInstanceOf(ExpressionToken.class);
143+
assertThat(tokens.get(1).image).isEqualTo("{{ x \\}}");
144+
assertThat(tokens.get(2)).isInstanceOf(TextToken.class);
145+
assertThat(tokens.get(2).image).isEqualTo(" suffix }}");
146+
}
147+
148+
@Test
149+
public void stringLegacy_backslashConsumesOneDelimiterChar_blockRunsToSecondCloser() {
150+
List<Token> tokens = scanAll(
151+
new TokenScanner(BACKSLASH_TEMPLATE, stringLegacy().getGlobalConfig())
152+
);
153+
assertThat(tokens).hasSize(2);
154+
assertThat(tokens.get(0)).isInstanceOf(TextToken.class);
155+
assertThat(tokens.get(0).image).isEqualTo("prefix ");
156+
assertThat(tokens.get(1)).isInstanceOf(ExpressionToken.class);
157+
assertThat(tokens.get(1).image).isEqualTo("{{ x \\}} suffix }}");
158+
}
159+
160+
@Test
161+
public void stringNew_backslashIgnored_blockClosesAtFirstDelimiter() {
162+
List<Token> tokens = scanAll(
163+
new TokenScanner(BACKSLASH_TEMPLATE, stringNew().getGlobalConfig())
164+
);
165+
assertThat(tokens).hasSize(3);
166+
assertThat(tokens.get(0)).isInstanceOf(TextToken.class);
167+
assertThat(tokens.get(0).image).isEqualTo("prefix ");
168+
assertThat(tokens.get(1)).isInstanceOf(ExpressionToken.class);
169+
assertThat(tokens.get(1).image).isEqualTo("{{ x \\}}");
170+
assertThat(tokens.get(2)).isInstanceOf(TextToken.class);
171+
assertThat(tokens.get(2).image).isEqualTo(" suffix }}");
172+
}
173+
174+
private static List<Token> scanAll(TokenScanner scanner) {
175+
List<Token> tokens = new ArrayList<>();
176+
scanner.forEachRemaining(tokens::add);
177+
return tokens;
178+
}
179+
180+
// ── Backslash in a plain variable expression ───────────────────────────────
181+
//
182+
// The most common real-world case: a Windows path or similar string passed
183+
// directly as a variable value. The backslash is in the *value*, not the
184+
// template, so scanner behaviour is irrelevant — both modes should render
185+
// identically.
186+
187+
@Test
188+
public void backslashInVariableValueIsUnaffectedByFlag_char() {
189+
ImmutableMap<String, Object> ctx = ImmutableMap.of("path", "C:\\Users\\foo");
190+
assertThat(charLegacy().render("{{ path }}", ctx)).isEqualTo("C:\\Users\\foo");
191+
assertThat(charNew().render("{{ path }}", ctx)).isEqualTo("C:\\Users\\foo");
192+
}
193+
194+
@Test
195+
public void backslashInVariableValueIsUnaffectedByFlag_string() {
196+
ImmutableMap<String, Object> ctx = ImmutableMap.of("path", "C:\\Users\\foo");
197+
assertThat(stringLegacy().render("{{ path }}", ctx)).isEqualTo("C:\\Users\\foo");
198+
assertThat(stringNew().render("{{ path }}", ctx)).isEqualTo("C:\\Users\\foo");
199+
}
200+
201+
// ── New behaviour: simple expressions are unaffected ──────────────────────
202+
//
203+
// Expressions with no backslash should behave identically under both modes.
204+
205+
@Test
206+
public void charNew_simpleExpressionUnchanged() {
207+
assertThat(charNew().render("{{ greeting }}", ImmutableMap.of("greeting", "hello")))
208+
.isEqualTo("hello");
209+
}
210+
211+
@Test
212+
public void stringNew_simpleExpressionUnchanged() {
213+
assertThat(stringNew().render("{{ greeting }}", ImmutableMap.of("greeting", "hello")))
214+
.isEqualTo("hello");
215+
}
216+
217+
// ── LegacyOverrides preset assertions ─────────────────────────────────────
218+
//
219+
// handleBackslashInQuotesOnly is an explicit opt-in only. It is NOT included
220+
// in THREE_POINT_0 or NONE because existing templates may rely on the legacy
221+
// behaviour of \} preventing delimiter recognition. Inclusion in a preset
222+
// can be reconsidered in a future major version.
223+
224+
@Test
225+
public void allPresetDoesNotEnableNewBackslashHandling() {
226+
assertThat(LegacyOverrides.ALL.isHandleBackslashInQuotesOnly()).isTrue();
227+
}
228+
229+
@Test
230+
public void threePointZeroPresetDoesNotEnableNewBackslashHandling() {
231+
assertThat(LegacyOverrides.THREE_POINT_0.isHandleBackslashInQuotesOnly()).isFalse();
232+
}
233+
234+
@Test
235+
public void nonePresetKeepsLegacyBackslashHandling() {
236+
assertThat(LegacyOverrides.NONE.isHandleBackslashInQuotesOnly()).isFalse();
237+
}
238+
}

0 commit comments

Comments
 (0)