diff --git a/src/main/java/com/hubspot/jinjava/tree/parse/ExpressionToken.java b/src/main/java/com/hubspot/jinjava/tree/parse/ExpressionToken.java index d8d9996d5..1c0d679c0 100644 --- a/src/main/java/com/hubspot/jinjava/tree/parse/ExpressionToken.java +++ b/src/main/java/com/hubspot/jinjava/tree/parse/ExpressionToken.java @@ -54,7 +54,14 @@ public int getType() { @Override protected void parse() { - this.expr = WhitespaceUtils.unwrap(image, "{{", "}}"); + // Use the symbols-derived delimiter strings instead of the hardcoded "{{" / "}}" + // so that custom delimiters (e.g. "\VAR{" / "}") are stripped correctly. + this.expr = + WhitespaceUtils.unwrap( + image, + getSymbols().getExpressionStart(), + getSymbols().getExpressionEnd() + ); this.expr = handleTrim(expr); this.expr = StringUtils.trimToEmpty(this.expr); } diff --git a/src/main/java/com/hubspot/jinjava/tree/parse/NoteToken.java b/src/main/java/com/hubspot/jinjava/tree/parse/NoteToken.java index 3f5360e67..450f9ccbd 100644 --- a/src/main/java/com/hubspot/jinjava/tree/parse/NoteToken.java +++ b/src/main/java/com/hubspot/jinjava/tree/parse/NoteToken.java @@ -48,8 +48,11 @@ public int getType() { */ @Override protected void parse() { - if (image.length() > 4) { // {# #} - handleTrim(image.substring(2, image.length() - 2)); + int startLen = getSymbols().getCommentStartLength(); + int endLen = getSymbols().getCommentEndLength(); + + if (image.length() > startLen + endLen) { + handleTrim(image.substring(startLen, image.length() - endLen)); } content = ""; } diff --git a/src/main/java/com/hubspot/jinjava/tree/parse/StringTokenScannerSymbols.java b/src/main/java/com/hubspot/jinjava/tree/parse/StringTokenScannerSymbols.java new file mode 100644 index 000000000..242abd241 --- /dev/null +++ b/src/main/java/com/hubspot/jinjava/tree/parse/StringTokenScannerSymbols.java @@ -0,0 +1,269 @@ +/********************************************************************** + * Copyright (c) 2014 HubSpot Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **********************************************************************/ +package com.hubspot.jinjava.tree.parse; + +/** + * A {@link TokenScannerSymbols} implementation that supports arbitrary multi-character + * delimiter strings, addressing + * issue #195. + * + *
Unlike {@link DefaultTokenScannerSymbols}, which is constrained to single-character + * prefixes and postfixes, this class allows any non-empty string for each of the six + * delimiter roles. The delimiters do not need to share a common prefix character. + * + *
{@link TokenScanner} detects this class via {@link #isStringBased()} and activates + * a string-matching scan path. {@link ExpressionToken}, {@link TagToken}, and + * {@link NoteToken} use the length accessors on {@link TokenScannerSymbols} (e.g. + * {@link #getExpressionStartLength()}) to strip delimiters correctly regardless of length. + * + *
The single-character abstract methods inherited from {@link TokenScannerSymbols} + * return private Unicode Private-Use-Area sentinel values. These are used only as + * token-kind discriminators inside {@link Token#newToken} and must never be used for + * scanning template text. + * + *
{@code
+ * JinjavaConfig config = JinjavaConfig.newBuilder()
+ * .withTokenScannerSymbols(StringTokenScannerSymbols.builder()
+ * .withVariableStartString("\\VAR{")
+ * .withVariableEndString("}")
+ * .withBlockStartString("\\BLOCK{")
+ * .withBlockEndString("}")
+ * .withCommentStartString("\\#{")
+ * .withCommentEndString("}")
+ * .build())
+ * .build();
+ * }
+ */
+public class StringTokenScannerSymbols extends TokenScannerSymbols {
+
+ private static final long serialVersionUID = 1L;
+
+ // ── Internal sentinel chars ────────────────────────────────────────────────
+ // Unicode Private Use Area values — guaranteed never to appear in real template
+ // text, so Token.newToken()'s if-chain dispatches to the right Token subclass.
+ static final char SENTINEL_FIXED = '\uE000';
+ static final char SENTINEL_NOTE = '\uE001';
+ static final char SENTINEL_TAG = '\uE002';
+ static final char SENTINEL_EXPR_START = '\uE003';
+ static final char SENTINEL_EXPR_END = '\uE004';
+ static final char SENTINEL_PREFIX = '\uE005'; // unused for scanning
+ static final char SENTINEL_POSTFIX = '\uE006'; // unused for scanning
+ static final char SENTINEL_NEWLINE = '\n'; // real newline for line tracking
+ static final char SENTINEL_TRIM = '-'; // real trim char
+
+ // ── The configured string delimiters ──────────────────────────────────────
+ private final String variableStartString;
+ private final String variableEndString;
+ private final String blockStartString;
+ private final String blockEndString;
+ private final String commentStartString;
+ private final String commentEndString;
+ // Optional; null means disabled.
+ private final String lineStatementPrefix;
+ private final String lineCommentPrefix;
+
+ private StringTokenScannerSymbols(Builder builder) {
+ this.variableStartString = builder.variableStartString;
+ this.variableEndString = builder.variableEndString;
+ this.blockStartString = builder.blockStartString;
+ this.blockEndString = builder.blockEndString;
+ this.commentStartString = builder.commentStartString;
+ this.commentEndString = builder.commentEndString;
+ this.lineStatementPrefix = builder.lineStatementPrefix;
+ this.lineCommentPrefix = builder.lineCommentPrefix;
+ }
+
+ // ── Abstract char contract — returns sentinels only ───────────────────────
+
+ @Override
+ public char getPrefixChar() {
+ return SENTINEL_PREFIX;
+ }
+
+ @Override
+ public char getPostfixChar() {
+ return SENTINEL_POSTFIX;
+ }
+
+ @Override
+ public char getFixedChar() {
+ return SENTINEL_FIXED;
+ }
+
+ @Override
+ public char getNoteChar() {
+ return SENTINEL_NOTE;
+ }
+
+ @Override
+ public char getTagChar() {
+ return SENTINEL_TAG;
+ }
+
+ @Override
+ public char getExprStartChar() {
+ return SENTINEL_EXPR_START;
+ }
+
+ @Override
+ public char getExprEndChar() {
+ return SENTINEL_EXPR_END;
+ }
+
+ @Override
+ public char getNewlineChar() {
+ return SENTINEL_NEWLINE;
+ }
+
+ @Override
+ public char getTrimChar() {
+ return SENTINEL_TRIM;
+ }
+
+ // ── String-level getters: MUST override the base-class lazy cache ──────────
+ // The base class builds these from the char methods above, which would produce
+ // garbage sentinel strings. We override them to return the real delimiters so
+ // that ExpressionToken, TagToken, and NoteToken strip content correctly.
+
+ @Override
+ public String getExpressionStart() {
+ return variableStartString;
+ }
+
+ @Override
+ public String getExpressionEnd() {
+ return variableEndString;
+ }
+
+ @Override
+ public String getExpressionStartWithTag() {
+ return blockStartString;
+ }
+
+ @Override
+ public String getExpressionEndWithTag() {
+ return blockEndString;
+ }
+
+ @Override
+ public String getOpeningComment() {
+ return commentStartString;
+ }
+
+ @Override
+ public String getClosingComment() {
+ return commentEndString;
+ }
+
+ @Override
+ public String getLineStatementPrefix() {
+ return lineStatementPrefix;
+ }
+
+ @Override
+ public String getLineCommentPrefix() {
+ return lineCommentPrefix;
+ }
+
+ // ── isStringBased flag ────────────────────────────────────────────────────
+
+ @Override
+ public boolean isStringBased() {
+ return true;
+ }
+
+ // ── Builder ────────────────────────────────────────────────────────────────
+
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ public static final class Builder {
+
+ // Defaults mirror the standard Jinja2 delimiters, so building with no
+ // overrides behaves identically to DefaultTokenScannerSymbols.
+ private String variableStartString = "{{";
+ private String variableEndString = "}}";
+ private String blockStartString = "{%";
+ private String blockEndString = "%}";
+ private String commentStartString = "{#";
+ private String commentEndString = "#}";
+ private String lineStatementPrefix = null; // disabled by default
+ private String lineCommentPrefix = null; // disabled by default
+
+ public Builder withVariableStartString(String s) {
+ this.variableStartString = requireNonEmpty(s, "variableStartString");
+ return this;
+ }
+
+ public Builder withVariableEndString(String s) {
+ this.variableEndString = requireNonEmpty(s, "variableEndString");
+ return this;
+ }
+
+ public Builder withBlockStartString(String s) {
+ this.blockStartString = requireNonEmpty(s, "blockStartString");
+ return this;
+ }
+
+ public Builder withBlockEndString(String s) {
+ this.blockEndString = requireNonEmpty(s, "blockEndString");
+ return this;
+ }
+
+ public Builder withCommentStartString(String s) {
+ this.commentStartString = requireNonEmpty(s, "commentStartString");
+ return this;
+ }
+
+ public Builder withCommentEndString(String s) {
+ this.commentEndString = requireNonEmpty(s, "commentEndString");
+ return this;
+ }
+
+ /**
+ * Sets the line statement prefix (e.g. {@code "%%"}). A line beginning with
+ * this prefix is treated as a block tag, equivalent to wrapping its content
+ * in the configured block delimiters. Pass {@code null} to disable (default).
+ */
+ public Builder withLineStatementPrefix(String s) {
+ this.lineStatementPrefix = s;
+ return this;
+ }
+
+ /**
+ * Sets the line comment prefix (e.g. {@code "%#"}). A line beginning with
+ * this prefix is stripped entirely from the output. Pass {@code null} to
+ * disable (default).
+ */
+ public Builder withLineCommentPrefix(String s) {
+ this.lineCommentPrefix = s;
+ return this;
+ }
+
+ public StringTokenScannerSymbols build() {
+ return new StringTokenScannerSymbols(this);
+ }
+
+ private static String requireNonEmpty(String value, String name) {
+ if (value == null || value.isEmpty()) {
+ throw new IllegalArgumentException(name + " must not be null or empty");
+ }
+ return value;
+ }
+ }
+}
diff --git a/src/main/java/com/hubspot/jinjava/tree/parse/TagToken.java b/src/main/java/com/hubspot/jinjava/tree/parse/TagToken.java
index a737dd96c..0c500c145 100644
--- a/src/main/java/com/hubspot/jinjava/tree/parse/TagToken.java
+++ b/src/main/java/com/hubspot/jinjava/tree/parse/TagToken.java
@@ -54,7 +54,10 @@ public int getType() {
*/
@Override
protected void parse() {
- if (image.length() < 4) {
+ int startLen = getSymbols().getTagStartLength();
+ int endLen = getSymbols().getTagEndLength();
+
+ if (image.length() < startLen + endLen) {
throw new TemplateSyntaxException(
image,
"Malformed tag token",
@@ -63,7 +66,7 @@ protected void parse() {
);
}
- content = image.substring(2, image.length() - 2);
+ content = image.substring(startLen, image.length() - endLen);
content = handleTrim(content);
int nameStart = -1, pos = 0, len = content.length();
diff --git a/src/main/java/com/hubspot/jinjava/tree/parse/TokenScanner.java b/src/main/java/com/hubspot/jinjava/tree/parse/TokenScanner.java
index 7e53b295a..fc203ef21 100644
--- a/src/main/java/com/hubspot/jinjava/tree/parse/TokenScanner.java
+++ b/src/main/java/com/hubspot/jinjava/tree/parse/TokenScanner.java
@@ -42,6 +42,24 @@ public class TokenScanner extends AbstractIteratorThe default returns {@code false}, so all existing subclasses are unaffected. + * {@link StringTokenScannerSymbols} overrides this to return {@code true}. + */ + public boolean isStringBased() { + return false; + } + + /** + * Length of the variable/expression opening delimiter (e.g. 2 for {@code "{{"}), + * used by {@link ExpressionToken#parse()} instead of the hardcoded constant 2. + */ + public int getExpressionStartLength() { + return getExpressionStart().length(); + } + + /** + * Length of the variable/expression closing delimiter (e.g. 2 for {@code "}}"}), + * used by {@link ExpressionToken#parse()} instead of the hardcoded constant 2. + */ + public int getExpressionEndLength() { + return getExpressionEnd().length(); + } + + /** + * Length of the block/tag opening delimiter (e.g. 2 for {@code "{%"}), + * used by {@link TagToken#parse()} instead of the hardcoded constant 2. + */ + public int getTagStartLength() { + return getExpressionStartWithTag().length(); + } + + /** + * Length of the block/tag closing delimiter (e.g. 2 for {@code "%}"}), + * used by {@link TagToken#parse()} instead of the hardcoded constant 2. + */ + public int getTagEndLength() { + return getExpressionEndWithTag().length(); + } + + /** + * Length of the comment opening delimiter (e.g. 2 for {@code "{#"}), + * used by {@link NoteToken#parse()} instead of the hardcoded constant 2. + */ + public int getCommentStartLength() { + return getOpeningComment().length(); + } + + /** + * Length of the comment closing delimiter (e.g. 2 for {@code "#}"}), + * used by {@link NoteToken#parse()} instead of the hardcoded constant 2. + */ + public int getCommentEndLength() { + return getClosingComment().length(); + } + + /** + * Optional line statement prefix (e.g. {@code "%%"}). When non-null, any line + * that begins with this prefix (after optional horizontal whitespace) is treated + * as a block tag statement, equivalent to wrapping its content in the block + * delimiters. Returns {@code null} by default (feature disabled). + * + *
Only used by {@link StringTokenScannerSymbols}; has no effect in the + * char-based path. + */ + public String getLineStatementPrefix() { + return null; + } + + /** + * Optional line comment prefix (e.g. {@code "%#"}). When non-null, any line + * that begins with this prefix (after optional horizontal whitespace) is stripped + * entirely from the output. Returns {@code null} by default (feature disabled). + * + *
Only used by {@link StringTokenScannerSymbols}; has no effect in the
+ * char-based path.
+ */
+ public String getLineCommentPrefix() {
+ return null;
+ }
}
diff --git a/src/test/java/com/hubspot/jinjava/tree/parse/StringTokenScannerSymbolsTest.java b/src/test/java/com/hubspot/jinjava/tree/parse/StringTokenScannerSymbolsTest.java
new file mode 100644
index 000000000..50affae8c
--- /dev/null
+++ b/src/test/java/com/hubspot/jinjava/tree/parse/StringTokenScannerSymbolsTest.java
@@ -0,0 +1,346 @@
+package com.hubspot.jinjava.tree.parse;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Lists;
+import com.hubspot.jinjava.BaseJinjavaTest;
+import com.hubspot.jinjava.Jinjava;
+import com.hubspot.jinjava.JinjavaConfig;
+import com.hubspot.jinjava.lib.filter.JoinFilterTest.User;
+import java.util.HashMap;
+import org.junit.Before;
+import org.junit.Test;
+
+public class StringTokenScannerSymbolsTest {
+
+ // ── Shared symbol configurations ───────────────────────────────────────────
+
+ /** LaTeX-style delimiters as used in the original issue #195 example. */
+ private static final StringTokenScannerSymbols LATEX_SYMBOLS = StringTokenScannerSymbols
+ .builder()
+ .withVariableStartString("\\VAR{")
+ .withVariableEndString("}")
+ .withBlockStartString("\\BLOCK{")
+ .withBlockEndString("}")
+ .withCommentStartString("\\#{")
+ .withCommentEndString("}")
+ .build();
+
+ /** Angle-bracket style — same delimiters as the existing CustomTokenScannerSymbolsTest. */
+ private static final StringTokenScannerSymbols ANGLE_SYMBOLS = StringTokenScannerSymbols
+ .builder()
+ .withVariableStartString("<<")
+ .withVariableEndString(">>")
+ .withBlockStartString("<%")
+ .withBlockEndString("%>")
+ .withCommentStartString("<#")
+ .withCommentEndString("#>")
+ .build();
+
+ private Jinjava latexJinjava;
+ private Jinjava angleJinjava;
+
+ @Before
+ public void setup() {
+ latexJinjava =
+ new Jinjava(
+ BaseJinjavaTest.newConfigBuilder().withTokenScannerSymbols(LATEX_SYMBOLS).build()
+ );
+ latexJinjava
+ .getGlobalContext()
+ .put("numbers", Lists.newArrayList(1L, 2L, 3L, 4L, 5L));
+
+ angleJinjava =
+ new Jinjava(
+ BaseJinjavaTest.newConfigBuilder().withTokenScannerSymbols(ANGLE_SYMBOLS).build()
+ );
+ angleJinjava
+ .getGlobalContext()
+ .put("numbers", Lists.newArrayList(1L, 2L, 3L, 4L, 5L));
+ }
+
+ // ── Plain text ─────────────────────────────────────────────────────────────
+
+ @Test
+ public void itRendersPlainText() {
+ String template = "jinjava interpreter works correctly";
+ assertThat(latexJinjava.render(template, new HashMap<>())).isEqualTo(template);
+ assertThat(angleJinjava.render(template, new HashMap<>())).isEqualTo(template);
+ }
+
+ // ── Variable expressions ───────────────────────────────────────────────────
+
+ @Test
+ public void itRendersVariablesWithLatexSymbols() {
+ assertThat(latexJinjava.render("\\VAR{ name }", ImmutableMap.of("name", "World")))
+ .isEqualTo("World");
+ }
+
+ @Test
+ public void itRendersVariablesWithAngleSymbols() {
+ assertThat(angleJinjava.render("<< name >>", ImmutableMap.of("name", "World")))
+ .isEqualTo("World");
+ }
+
+ // ── Default delimiters pass through as literal text ────────────────────────
+
+ @Test
+ public void itPassesThroughDefaultCurlyBracesAsLiteralText() {
+ // With custom delimiters, {{ }} must be treated as plain text, not expressions.
+ assertThat(
+ latexJinjava.render(
+ "{{ not a variable }} \\VAR{ name }",
+ ImmutableMap.of("name", "Jorge")
+ )
+ )
+ .isEqualTo("{{ not a variable }} Jorge");
+
+ assertThat(
+ angleJinjava.render(
+ "{{ not a variable }} << name >>",
+ ImmutableMap.of("name", "Jorge")
+ )
+ )
+ .isEqualTo("{{ not a variable }} Jorge");
+ }
+
+ // ── Block tags ─────────────────────────────────────────────────────────────
+
+ @Test
+ public void itRendersIfBlockWithLatexSymbols() {
+ assertThat(
+ latexJinjava.render(
+ "\\BLOCK{ if show }hello\\BLOCK{ endif }",
+ ImmutableMap.of("show", true)
+ )
+ )
+ .isEqualTo("hello");
+
+ assertThat(
+ latexJinjava.render(
+ "\\BLOCK{ if show }hello\\BLOCK{ endif }",
+ ImmutableMap.of("show", false)
+ )
+ )
+ .isEqualTo("");
+ }
+
+ @Test
+ public void itRendersSetBlockWithAngleSymbols() {
+ assertThat(
+ angleJinjava.render(
+ "<% set d=d | default(\"some random value\") %><< d >>",
+ new HashMap<>()
+ )
+ )
+ .isEqualTo("some random value");
+ }
+
+ // ── Comments ───────────────────────────────────────────────────────────────
+
+ @Test
+ public void itStripsCommentsWithLatexSymbols() {
+ assertThat(latexJinjava.render("before\\#{ this is ignored }after", new HashMap<>()))
+ .isEqualTo("beforeafter");
+ }
+
+ @Test
+ public void itStripsCommentsWithAngleSymbols() {
+ assertThat(angleJinjava.render("before<# this is ignored #>after", new HashMap<>()))
+ .isEqualTo("beforeafter");
+ }
+
+ // ── Filters ────────────────────────────────────────────────────────────────
+
+ @Test
+ public void itRendersFiltersWithLatexSymbols() {
+ assertThat(latexJinjava.render("\\VAR{ [1, 2, 3, 3]|union(null) }", new HashMap<>()))
+ .isEqualTo("[1, 2, 3]");
+ assertThat(
+ latexJinjava.render("\\VAR{ numbers|select('equalto', 3) }", new HashMap<>())
+ )
+ .isEqualTo("[3]");
+ }
+
+ @Test
+ public void itRendersFiltersWithAngleSymbols() {
+ assertThat(angleJinjava.render("<< [1, 2, 3, 3]|union(null) >>", new HashMap<>()))
+ .isEqualTo("[1, 2, 3]");
+ assertThat(angleJinjava.render("<< numbers|select('equalto', 3) >>", new HashMap<>()))
+ .isEqualTo("[3]");
+ }
+
+ @Test
+ public void itRendersMapFilterWithLatexSymbols() {
+ assertThat(
+ latexJinjava.render(
+ "\\VAR{ users|map(attribute='username')|join(', ') }",
+ ImmutableMap.of(
+ "users",
+ (Object) Lists.newArrayList(new User("foo"), new User("bar"))
+ )
+ )
+ )
+ .isEqualTo("foo, bar");
+ }
+
+ @Test
+ public void itRendersMapFilterWithAngleSymbols() {
+ assertThat(
+ angleJinjava.render(
+ "<< users|map(attribute='username')|join(', ') >>",
+ ImmutableMap.of(
+ "users",
+ (Object) Lists.newArrayList(new User("foo"), new User("bar"))
+ )
+ )
+ )
+ .isEqualTo("foo, bar");
+ }
+
+ // ── Delimiter characters inside string literals in expressions ─────────────
+
+ @Test
+ public void itHandlesClosingDelimiterInsideQuotedString() {
+ // The "}" inside the default string must not prematurely close \VAR{
+ assertThat(latexJinjava.render("\\VAR{ name | default(\"}\") }", new HashMap<>()))
+ .isEqualTo("}");
+ }
+
+ @Test
+ public void itHandlesClosingDelimiterInsideQuotedStringAngle() {
+ // ">>" inside a quoted string must not close the << expression
+ assertThat(angleJinjava.render("<< name | default(\">>\") >>", new HashMap<>()))
+ .isEqualTo(">>");
+ }
+
+ // ── Builder defaults produce same behaviour as DefaultTokenScannerSymbols ──
+
+ @Test
+ public void defaultBuilderBehavesLikeDefaultSymbols() {
+ Jinjava defaultJinjava = new Jinjava();
+ Jinjava stringBasedDefaultJinjava = new Jinjava(
+ JinjavaConfig
+ .newBuilder()
+ .withTokenScannerSymbols(StringTokenScannerSymbols.builder().build())
+ .build()
+ );
+ String template = "{{ greeting }}, {{ name }}!";
+ ImmutableMap