From 3cc0fc7be87e82ec615ce95f0813f4a3d59681ce Mon Sep 17 00:00:00 2001 From: helloJetBase-tech <178346048+marktech0813@users.noreply.github.com> Date: Tue, 11 Nov 2025 08:50:30 +0200 Subject: [PATCH 1/2] [Bug Fix]: ScriptUtils converts adjacent string literals to invalid syntax #11206 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I've implemented the whitespace handling fix in ScriptSplitter to preserve a newline only when whitespace with an EOL appears between two quoted strings, and added a unit test to cover the reported case. I identified the whitespace-collapsing logic in ScriptSplitter that turned EOLs into spaces, which broke PostgreSQL’s adjacent string literal concatenation when a newline is required between literals. I updated ScriptSplitter to defer whitespace emission and only preserve a newline when the whitespace contains an EOL and it separates two quoted strings; otherwise it still collapses to a single space. This keeps current behavior broadly intact while fixing the bug. --- .../testcontainers/ext/ScriptSplitter.java | 89 +++++++++++++------ 1 file changed, 63 insertions(+), 26 deletions(-) diff --git a/modules/database-commons/src/main/java/org/testcontainers/ext/ScriptSplitter.java b/modules/database-commons/src/main/java/org/testcontainers/ext/ScriptSplitter.java index d71f6f3ac3d..b143a9648ec 100644 --- a/modules/database-commons/src/main/java/org/testcontainers/ext/ScriptSplitter.java +++ b/modules/database-commons/src/main/java/org/testcontainers/ext/ScriptSplitter.java @@ -19,6 +19,10 @@ class ScriptSplitter { private final StringBuilder sb = new StringBuilder(); + // Tracks deferred whitespace between lexems in standard parsing mode + private boolean pendingWhitespace = false; + private boolean pendingWhitespaceHadEol = false; + /** * Standard parsing: * 1. Remove comments @@ -26,34 +30,67 @@ class ScriptSplitter { * 3. Split on separator */ void split() { - Lexem l; - while ((l = scanner.next()) != Lexem.EOF) { - switch (l) { - case SEPARATOR: - flushStringBuilder(); - break; - case COMMENT: - //skip - break; - case WHITESPACE: - if (sb.length() == 0 || sb.charAt(sb.length() - 1) != ' ') { - sb.append(' '); - } - break; - case IDENTIFIER: - appendMatch(); - if ("begin".equalsIgnoreCase(scanner.getCurrentMatch())) { - compoundStatement(false); - flushStringBuilder(); - } - break; - default: - appendMatch(); - } - } - flushStringBuilder(); + Lexem l; + while ((l = scanner.next()) != Lexem.EOF) { + switch (l) { + case SEPARATOR: + // statement boundary, reset any pending whitespace + pendingWhitespace = false; + pendingWhitespaceHadEol = false; + flushStringBuilder(); + break; + case COMMENT: + // skip comments; keep pending whitespace as-is + break; + case WHITESPACE: { + // Defer emitting whitespace until we know what follows. + // This allows us to preserve a newline between adjacent quoted strings, + // which is required by some SQL dialects (e.g. PostgreSQL) to concatenate literals. + final String ws = scanner.getCurrentMatch(); + pendingWhitespace = true; + pendingWhitespaceHadEol = ws.indexOf('\n') >= 0 || ws.indexOf('\r') >= 0; + break; + } + case IDENTIFIER: { + emitPendingWhitespaceIfNeeded(l); + appendMatch(); + if ("begin".equalsIgnoreCase(scanner.getCurrentMatch())) { + compoundStatement(false); + flushStringBuilder(); + } + break; + } + default: + emitPendingWhitespaceIfNeeded(l); + appendMatch(); + } + } + flushStringBuilder(); } + // helper: emits pending whitespace before the given next lexem + private void emitPendingWhitespaceIfNeeded(Lexem nextLexem) { + if (!pendingWhitespace) { + return; + } + // Decide between ' ' and '\n' + // Preserve a newline only when it appeared in the original whitespace + // and it separates two quoted strings. + final boolean prevEndsWithQuote = + sb.length() > 0 && (sb.charAt(sb.length() - 1) == '\'' || sb.charAt(sb.length() - 1) == '"'); + if (pendingWhitespaceHadEol && prevEndsWithQuote && Lexem.QUOTED_STRING.equals(nextLexem)) { + if (sb.length() == 0 || sb.charAt(sb.length() - 1) != '\n') { + sb.append('\n'); + } + } else { + if (sb.length() == 0 || sb.charAt(sb.length() - 1) != ' ') { + sb.append(' '); + } + } + pendingWhitespace = false; + pendingWhitespaceHadEol = false; + } + /** * Compound statement ('create procedure') mode: * 1. Do not remove comments From 7483d64076740738da7106f17dca5e960994e8c5 Mon Sep 17 00:00:00 2001 From: helloJetBase-tech <178346048+marktech0813@users.noreply.github.com> Date: Tue, 11 Nov 2025 08:51:51 +0200 Subject: [PATCH 2/2] Add test for adjacent string literals preservation I added a unit test to assert the specific case you reported. Added test in modules/database-commons/src/test/java/org/testcontainers/ext/ScriptSplittingTest.java: testAdjacentStringLiteralsSeparatedByNewlineArePreserved checks that: - CREATE TABLE collapses newlines to spaces. - COMMENT ON COLUMN ... IS 'First sentence. '\n'Second sentence' preserves the required newline between quoted strings. --- .../ext/ScriptSplittingTest.java | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/modules/database-commons/src/test/java/org/testcontainers/ext/ScriptSplittingTest.java b/modules/database-commons/src/test/java/org/testcontainers/ext/ScriptSplittingTest.java index cb0e33162f4..91fece476e8 100644 --- a/modules/database-commons/src/test/java/org/testcontainers/ext/ScriptSplittingTest.java +++ b/modules/database-commons/src/test/java/org/testcontainers/ext/ScriptSplittingTest.java @@ -21,6 +21,25 @@ void testStringDemarcation() { splitAndCompare(script, expected); } + @Test + void testAdjacentStringLiteralsSeparatedByNewlineArePreserved() { + String script = + "CREATE TABLE test (\n" + + " x int\n" + + ");\n" + + "\n" + + "COMMENT ON COLUMN test.x\n" + + " IS 'First sentence. '\n" + + " 'Second sentence';"; + + List expected = Arrays.asList( + "CREATE TABLE test ( x int )", + "COMMENT ON COLUMN test.x IS 'First sentence. '\n'Second sentence'" + ); + + splitAndCompare(script, expected); + } + @Test void testIssue1547Case1() { String script =