From 5ae2b06afc1bb5adf0b19bd4f0fe9744898599af Mon Sep 17 00:00:00 2001 From: Simeon Widdis Date: Mon, 10 Nov 2025 19:07:47 +0000 Subject: [PATCH 1/4] Add workflow for SQL CLI integration tests Signed-off-by: Simeon Widdis --- .../workflows/sql-cli-integration-test.yml | 103 ++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 .github/workflows/sql-cli-integration-test.yml diff --git a/.github/workflows/sql-cli-integration-test.yml b/.github/workflows/sql-cli-integration-test.yml new file mode 100644 index 0000000000..8ea621c71f --- /dev/null +++ b/.github/workflows/sql-cli-integration-test.yml @@ -0,0 +1,103 @@ +name: SQL CLI Integration Test + +# This workflow tests sql-cli against the current SQL changes +# to catch breaking changes before they're published + +on: + pull_request: + paths: + - 'api/**' + - 'sql/**' + - 'ppl/**' + - 'core/**' + - 'opensearch/**' + - 'common/**' + - 'protocol/**' + - '**/*.gradle' + - '.github/workflows/sql-cli-integration-test.yml' + push: + branches: + - main + - '[0-9]+.[0-9]+' + - '[0-9]+.x' + paths: + - 'api/**' + - 'sql/**' + - 'ppl/**' + - 'core/**' + - 'opensearch/**' + - 'common/**' + - 'protocol/**' + - '**/*.gradle' + - '.github/workflows/sql-cli-integration-test.yml' + workflow_dispatch: + +jobs: + test-sql-cli-integration: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + java: [21] + + steps: + - name: Checkout SQL repository (current changes) + uses: actions/checkout@v4 + with: + path: sql + + - name: Checkout SQL CLI repository (latest main) + uses: actions/checkout@v4 + with: + repository: opensearch-project/sql-cli + path: sql-cli + ref: main + + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: ${{ matrix.java }} + + - name: Build and publish SQL modules to Maven Local + working-directory: sql + run: | + echo "Building SQL modules from current branch..." + ./gradlew publishToMavenLocal -x test -x integTest + echo "SQL modules published to Maven Local" + + - name: Verify SQL CLI settings.gradle exists + working-directory: sql-cli + run: | + if [ ! -f "settings.gradle" ]; then + echo "ERROR: settings.gradle not found in sql-cli repository" + echo "The composite build configuration may not be merged yet." + echo "This workflow requires the composite build setup to work." + exit 1 + fi + echo "settings.gradle found, proceeding with tests" + + - name: Run SQL CLI tests with local SQL modules + working-directory: sql-cli + run: | + echo "Running SQL CLI tests against local SQL modules..." + ./gradlew clean test -PuseLocalSql=true --info --stacktrace + + - name: Upload SQL CLI test reports + if: always() + uses: actions/upload-artifact@v4 + continue-on-error: true + with: + name: sql-cli-test-reports-java-${{ matrix.java }} + path: | + sql-cli/build/reports/** + sql-cli/build/test-results/** + + - name: Test Summary + if: always() + run: | + echo "## SQL CLI Integration Test Results" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "Tested SQL CLI against SQL changes from: \`${{ github.ref }}\`" >> $GITHUB_STEP_SUMMARY + echo "SQL CLI version: main branch (latest)" >> $GITHUB_STEP_SUMMARY + echo "Java version: ${{ matrix.java }}" >> $GITHUB_STEP_SUMMARY From bc4e06cca16eaa2e5dd426ceaa20a63ee2528f6c Mon Sep 17 00:00:00 2001 From: Simeon Widdis Date: Fri, 14 Nov 2025 21:50:21 +0000 Subject: [PATCH 2/4] The SPath fix Signed-off-by: Simeon Widdis --- .../src/main/java/org/opensearch/sql/ast/tree/SPath.java | 7 +++++-- .../org/opensearch/sql/ppl/utils/SPathRewriteTest.java | 9 +++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java b/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java index 89eab6cf16..7a507121fa 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java @@ -5,6 +5,8 @@ package org.opensearch.sql.ast.tree; +import static org.opensearch.sql.common.utils.StringUtils.unquoteIdentifier; + import com.google.common.collect.ImmutableList; import java.util.List; import lombok.AllArgsConstructor; @@ -48,8 +50,9 @@ public T accept(AbstractNodeVisitor nodeVisitor, C context) { public Eval rewriteAsEval() { String outField = this.outField; + String unquotedPath = unquoteIdentifier(this.path); if (outField == null) { - outField = this.path; + outField = unquotedPath; } return AstDSL.eval( @@ -57,6 +60,6 @@ public Eval rewriteAsEval() { AstDSL.let( AstDSL.field(outField), AstDSL.function( - "json_extract", AstDSL.field(inField), AstDSL.stringLiteral(this.path)))); + "json_extract", AstDSL.field(inField), AstDSL.stringLiteral(unquotedPath)))); } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/SPathRewriteTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/SPathRewriteTest.java index e97fb51ea9..45e20c8f73 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/SPathRewriteTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/SPathRewriteTest.java @@ -64,4 +64,13 @@ public void testSpathMissingPathArgumentHandling() { public void testSpathArgumentDeshuffle() { assertEquals(plan("source = t | spath path=a input=a"), plan("source = t | spath input=a a")); } + + @Test + public void testSpathEscapedParse() { + SPath sp = + (SPath) plan("source = t | spath input=f output=o path=`attributes.['cluster.name']`"); + Eval ev = (Eval) plan("source = t | eval o=json_extract(f, \"attributes.['cluster.name']\")"); + + assertEquals(ev, sp.rewriteAsEval()); + } } From b5d6996858e85943a1572a063cfd92583a267e72 Mon Sep 17 00:00:00 2001 From: Simeon Widdis Date: Fri, 14 Nov 2025 21:50:32 +0000 Subject: [PATCH 3/4] Revert "Add workflow for SQL CLI integration tests" This reverts commit 5ae2b06afc1bb5adf0b19bd4f0fe9744898599af. Signed-off-by: Simeon Widdis --- .../workflows/sql-cli-integration-test.yml | 103 ------------------ 1 file changed, 103 deletions(-) delete mode 100644 .github/workflows/sql-cli-integration-test.yml diff --git a/.github/workflows/sql-cli-integration-test.yml b/.github/workflows/sql-cli-integration-test.yml deleted file mode 100644 index 8ea621c71f..0000000000 --- a/.github/workflows/sql-cli-integration-test.yml +++ /dev/null @@ -1,103 +0,0 @@ -name: SQL CLI Integration Test - -# This workflow tests sql-cli against the current SQL changes -# to catch breaking changes before they're published - -on: - pull_request: - paths: - - 'api/**' - - 'sql/**' - - 'ppl/**' - - 'core/**' - - 'opensearch/**' - - 'common/**' - - 'protocol/**' - - '**/*.gradle' - - '.github/workflows/sql-cli-integration-test.yml' - push: - branches: - - main - - '[0-9]+.[0-9]+' - - '[0-9]+.x' - paths: - - 'api/**' - - 'sql/**' - - 'ppl/**' - - 'core/**' - - 'opensearch/**' - - 'common/**' - - 'protocol/**' - - '**/*.gradle' - - '.github/workflows/sql-cli-integration-test.yml' - workflow_dispatch: - -jobs: - test-sql-cli-integration: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - java: [21] - - steps: - - name: Checkout SQL repository (current changes) - uses: actions/checkout@v4 - with: - path: sql - - - name: Checkout SQL CLI repository (latest main) - uses: actions/checkout@v4 - with: - repository: opensearch-project/sql-cli - path: sql-cli - ref: main - - - name: Set up JDK ${{ matrix.java }} - uses: actions/setup-java@v4 - with: - distribution: 'temurin' - java-version: ${{ matrix.java }} - - - name: Build and publish SQL modules to Maven Local - working-directory: sql - run: | - echo "Building SQL modules from current branch..." - ./gradlew publishToMavenLocal -x test -x integTest - echo "SQL modules published to Maven Local" - - - name: Verify SQL CLI settings.gradle exists - working-directory: sql-cli - run: | - if [ ! -f "settings.gradle" ]; then - echo "ERROR: settings.gradle not found in sql-cli repository" - echo "The composite build configuration may not be merged yet." - echo "This workflow requires the composite build setup to work." - exit 1 - fi - echo "settings.gradle found, proceeding with tests" - - - name: Run SQL CLI tests with local SQL modules - working-directory: sql-cli - run: | - echo "Running SQL CLI tests against local SQL modules..." - ./gradlew clean test -PuseLocalSql=true --info --stacktrace - - - name: Upload SQL CLI test reports - if: always() - uses: actions/upload-artifact@v4 - continue-on-error: true - with: - name: sql-cli-test-reports-java-${{ matrix.java }} - path: | - sql-cli/build/reports/** - sql-cli/build/test-results/** - - - name: Test Summary - if: always() - run: | - echo "## SQL CLI Integration Test Results" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "Tested SQL CLI against SQL changes from: \`${{ github.ref }}\`" >> $GITHUB_STEP_SUMMARY - echo "SQL CLI version: main branch (latest)" >> $GITHUB_STEP_SUMMARY - echo "Java version: ${{ matrix.java }}" >> $GITHUB_STEP_SUMMARY From 7bf8d1ac7715b923e404c208031d88e8396e4a38 Mon Sep 17 00:00:00 2001 From: Simeon Widdis Date: Tue, 18 Nov 2025 21:22:35 +0000 Subject: [PATCH 4/4] Implement string escaping for paths instead of ident escaping Signed-off-by: Simeon Widdis --- .../org/opensearch/sql/ast/tree/SPath.java | 4 +-- docs/category.json | 1 + docs/user/dql/metadata.rst | 3 ++- docs/user/ppl/cmd/spath.rst | 27 +++++++++++++++---- doctest/test_data/structured.json | 3 +++ doctest/test_docs.py | 1 + doctest/test_mapping/structured.json | 20 ++++++++++++++ ppl/src/main/antlr/OpenSearchPPLParser.g4 | 1 + .../sql/ppl/utils/SPathRewriteTest.java | 10 ++++++- 9 files changed, 61 insertions(+), 9 deletions(-) create mode 100644 doctest/test_data/structured.json create mode 100644 doctest/test_mapping/structured.json diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java b/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java index 7a507121fa..a1c0c08a15 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java @@ -5,7 +5,7 @@ package org.opensearch.sql.ast.tree; -import static org.opensearch.sql.common.utils.StringUtils.unquoteIdentifier; +import static org.opensearch.sql.common.utils.StringUtils.unquoteText; import com.google.common.collect.ImmutableList; import java.util.List; @@ -50,7 +50,7 @@ public T accept(AbstractNodeVisitor nodeVisitor, C context) { public Eval rewriteAsEval() { String outField = this.outField; - String unquotedPath = unquoteIdentifier(this.path); + String unquotedPath = unquoteText(this.path); if (outField == null) { outField = unquotedPath; } diff --git a/docs/category.json b/docs/category.json index f126904da6..f3fe70ecfa 100644 --- a/docs/category.json +++ b/docs/category.json @@ -46,6 +46,7 @@ "user/ppl/cmd/search.rst", "user/ppl/cmd/showdatasources.rst", "user/ppl/cmd/sort.rst", + "user/ppl/cmd/spath.rst", "user/ppl/cmd/stats.rst", "user/ppl/cmd/streamstats.rst", "user/ppl/cmd/subquery.rst", diff --git a/docs/user/dql/metadata.rst b/docs/user/dql/metadata.rst index 7584c72505..e959a69c8b 100644 --- a/docs/user/dql/metadata.rst +++ b/docs/user/dql/metadata.rst @@ -35,7 +35,7 @@ Example 1: Show All Indices Information SQL query:: os> SHOW TABLES LIKE '%' - fetched rows / total rows = 22/22 + fetched rows / total rows = 23/23 +----------------+-------------+-------------------+------------+---------+----------+------------+-----------+---------------------------+----------------+ | TABLE_CAT | TABLE_SCHEM | TABLE_NAME | TABLE_TYPE | REMARKS | TYPE_CAT | TYPE_SCHEM | TYPE_NAME | SELF_REFERENCING_COL_NAME | REF_GENERATION | |----------------+-------------+-------------------+------------+---------+----------+------------+-----------+---------------------------+----------------| @@ -54,6 +54,7 @@ SQL query:: | docTestCluster | null | otellogs | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | people | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | state_country | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | structured | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | time_data | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | time_data2 | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | time_test | BASE TABLE | null | null | null | null | null | null | diff --git a/docs/user/ppl/cmd/spath.rst b/docs/user/ppl/cmd/spath.rst index 7defb4437f..85ba328c27 100644 --- a/docs/user/ppl/cmd/spath.rst +++ b/docs/user/ppl/cmd/spath.rst @@ -37,10 +37,10 @@ The simplest spath is to extract a single field. This extracts `n` from the `doc PPL query:: - PPL> source=test_spath | spath input=doc n; + os> source=structured | spath input=doc_n n | fields doc_n n; fetched rows / total rows = 3/3 +----------+---+ - | doc | n | + | doc_n | n | |----------+---| | {"n": 1} | 1 | | {"n": 2} | 2 | @@ -54,10 +54,10 @@ These queries demonstrate more JSON path uses, like traversing nested fields and PPL query:: - PPL> source=test_spath | spath input=doc output=first_element list{0} | spath input=doc output=all_elements list{} | spath input=doc output=nested nest_out.nest_in; + os> source=structured | spath input=doc_list output=first_element list{0} | spath input=doc_list output=all_elements list{} | spath input=doc_list output=nested nest_out.nest_in | fields doc_list first_element all_elements nested; fetched rows / total rows = 3/3 +------------------------------------------------------+---------------+--------------+--------+ - | doc | first_element | all_elements | nested | + | doc_list | first_element | all_elements | nested | |------------------------------------------------------+---------------+--------------+--------| | {"list": [1, 2, 3, 4], "nest_out": {"nest_in": "a"}} | 1 | [1,2,3,4] | a | | {"list": [], "nest_out": {"nest_in": "a"}} | null | [] | a | @@ -71,10 +71,27 @@ The example shows extracting an inner field and doing statistics on it, using th PPL query:: - PPL> source=test_spath | spath input=doc n | eval n=cast(n as int) | stats sum(n); + os> source=structured | spath input=doc_n n | eval n=cast(n as int) | stats sum(n) | fields `sum(n)`; fetched rows / total rows = 1/1 +--------+ | sum(n) | |--------| | 6 | +--------+ + +Example 4: Escaped paths +============================ + +`spath` can escape paths with strings to accept any path that `json_extract` does. This includes escaping complex field names as array components. + +PPL query:: + + os> source=structured | spath output=a input=doc_escape "['a fancy field name']" | spath output=b input=doc_escape "['a.b.c']" | fields a b; + fetched rows / total rows = 3/3 + +-------+---+ + | a | b | + |-------+---| + | true | 0 | + | true | 1 | + | false | 2 | + +-------+---+ diff --git a/doctest/test_data/structured.json b/doctest/test_data/structured.json new file mode 100644 index 0000000000..c0717c6f32 --- /dev/null +++ b/doctest/test_data/structured.json @@ -0,0 +1,3 @@ +{"doc_n":"{\"n\": 1}","doc_escape":"{\"a fancy field name\": true,\"a.b.c\": 0}","doc_list":"{\"list\": [1, 2, 3, 4], \"nest_out\": {\"nest_in\": \"a\"}}","obj_field":{"field": "a"}} +{"doc_n":"{\"n\": 2}","doc_escape":"{\"a fancy field name\": true,\"a.b.c\": 1}","doc_list":"{\"list\": [], \"nest_out\": {\"nest_in\": \"a\"}}","obj_field":{"field": "b"}} +{"doc_n":"{\"n\": 3}","doc_escape":"{\"a fancy field name\": false,\"a.b.c\": 2}","doc_list":"{\"list\": [5, 6], \"nest_out\": {\"nest_in\": \"a\"}}","obj_field":{"field": "c"}} \ No newline at end of file diff --git a/doctest/test_docs.py b/doctest/test_docs.py index 4fd9c230ff..d3cea5782b 100644 --- a/doctest/test_docs.py +++ b/doctest/test_docs.py @@ -37,6 +37,7 @@ 'weblogs': 'weblogs.json', 'json_test': 'json_test.json', 'state_country': 'state_country.json', + 'structured': 'structured.json', 'occupation': 'occupation.json', 'worker': 'worker.json', 'work_information': 'work_information.json', diff --git a/doctest/test_mapping/structured.json b/doctest/test_mapping/structured.json new file mode 100644 index 0000000000..5c79e53dc0 --- /dev/null +++ b/doctest/test_mapping/structured.json @@ -0,0 +1,20 @@ +{ + "mappings": { + "properties": { + "doc_n": { + "type": "text" + }, + "doc_list": { + "type": "text" + }, + "doc_escape": { + "type": "text" + }, + "obj_field": { + "properties": { + "field": { "type": "text" } + } + } + } + } +} \ No newline at end of file diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 494adb1571..6a54265904 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -404,6 +404,7 @@ spathParameter indexablePath : pathElement (DOT pathElement)* + | stringLiteral ; pathElement diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/SPathRewriteTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/SPathRewriteTest.java index 45e20c8f73..73d282d1f6 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/SPathRewriteTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/SPathRewriteTest.java @@ -68,9 +68,17 @@ public void testSpathArgumentDeshuffle() { @Test public void testSpathEscapedParse() { SPath sp = - (SPath) plan("source = t | spath input=f output=o path=`attributes.['cluster.name']`"); + (SPath) plan("source = t | spath input=f output=o path=\"attributes.['cluster.name']\""); Eval ev = (Eval) plan("source = t | eval o=json_extract(f, \"attributes.['cluster.name']\")"); assertEquals(ev, sp.rewriteAsEval()); } + + @Test + public void testSpathEscapedSpaces() { + SPath sp = (SPath) plan("source = t | spath input=f output=o path=\"['abc def ghi']\""); + Eval ev = (Eval) plan("source = t | eval o=json_extract(f, \"['abc def ghi']\")"); + + assertEquals(ev, sp.rewriteAsEval()); + } }