diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java b/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java index 89eab6cf166..a1c0c08a15f 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java @@ -5,6 +5,8 @@ package org.opensearch.sql.ast.tree; +import static org.opensearch.sql.common.utils.StringUtils.unquoteText; + import com.google.common.collect.ImmutableList; import java.util.List; import lombok.AllArgsConstructor; @@ -48,8 +50,9 @@ public T accept(AbstractNodeVisitor nodeVisitor, C context) { public Eval rewriteAsEval() { String outField = this.outField; + String unquotedPath = unquoteText(this.path); if (outField == null) { - outField = this.path; + outField = unquotedPath; } return AstDSL.eval( @@ -57,6 +60,6 @@ public Eval rewriteAsEval() { AstDSL.let( AstDSL.field(outField), AstDSL.function( - "json_extract", AstDSL.field(inField), AstDSL.stringLiteral(this.path)))); + "json_extract", AstDSL.field(inField), AstDSL.stringLiteral(unquotedPath)))); } } diff --git a/docs/category.json b/docs/category.json index f126904da6a..f3fe70ecfa5 100644 --- a/docs/category.json +++ b/docs/category.json @@ -46,6 +46,7 @@ "user/ppl/cmd/search.rst", "user/ppl/cmd/showdatasources.rst", "user/ppl/cmd/sort.rst", + "user/ppl/cmd/spath.rst", "user/ppl/cmd/stats.rst", "user/ppl/cmd/streamstats.rst", "user/ppl/cmd/subquery.rst", diff --git a/docs/user/dql/metadata.rst b/docs/user/dql/metadata.rst index 7584c72505e..e959a69c8b6 100644 --- a/docs/user/dql/metadata.rst +++ b/docs/user/dql/metadata.rst @@ -35,7 +35,7 @@ Example 1: Show All Indices Information SQL query:: os> SHOW TABLES LIKE '%' - fetched rows / total rows = 22/22 + fetched rows / total rows = 23/23 +----------------+-------------+-------------------+------------+---------+----------+------------+-----------+---------------------------+----------------+ | TABLE_CAT | TABLE_SCHEM | TABLE_NAME | TABLE_TYPE | REMARKS | TYPE_CAT | TYPE_SCHEM | TYPE_NAME | SELF_REFERENCING_COL_NAME | REF_GENERATION | |----------------+-------------+-------------------+------------+---------+----------+------------+-----------+---------------------------+----------------| @@ -54,6 +54,7 @@ SQL query:: | docTestCluster | null | otellogs | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | people | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | state_country | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | structured | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | time_data | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | time_data2 | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | time_test | BASE TABLE | null | null | null | null | null | null | diff --git a/docs/user/ppl/cmd/spath.rst b/docs/user/ppl/cmd/spath.rst index 7defb4437f2..85ba328c27b 100644 --- a/docs/user/ppl/cmd/spath.rst +++ b/docs/user/ppl/cmd/spath.rst @@ -37,10 +37,10 @@ The simplest spath is to extract a single field. This extracts `n` from the `doc PPL query:: - PPL> source=test_spath | spath input=doc n; + os> source=structured | spath input=doc_n n | fields doc_n n; fetched rows / total rows = 3/3 +----------+---+ - | doc | n | + | doc_n | n | |----------+---| | {"n": 1} | 1 | | {"n": 2} | 2 | @@ -54,10 +54,10 @@ These queries demonstrate more JSON path uses, like traversing nested fields and PPL query:: - PPL> source=test_spath | spath input=doc output=first_element list{0} | spath input=doc output=all_elements list{} | spath input=doc output=nested nest_out.nest_in; + os> source=structured | spath input=doc_list output=first_element list{0} | spath input=doc_list output=all_elements list{} | spath input=doc_list output=nested nest_out.nest_in | fields doc_list first_element all_elements nested; fetched rows / total rows = 3/3 +------------------------------------------------------+---------------+--------------+--------+ - | doc | first_element | all_elements | nested | + | doc_list | first_element | all_elements | nested | |------------------------------------------------------+---------------+--------------+--------| | {"list": [1, 2, 3, 4], "nest_out": {"nest_in": "a"}} | 1 | [1,2,3,4] | a | | {"list": [], "nest_out": {"nest_in": "a"}} | null | [] | a | @@ -71,10 +71,27 @@ The example shows extracting an inner field and doing statistics on it, using th PPL query:: - PPL> source=test_spath | spath input=doc n | eval n=cast(n as int) | stats sum(n); + os> source=structured | spath input=doc_n n | eval n=cast(n as int) | stats sum(n) | fields `sum(n)`; fetched rows / total rows = 1/1 +--------+ | sum(n) | |--------| | 6 | +--------+ + +Example 4: Escaped paths +============================ + +`spath` can escape paths with strings to accept any path that `json_extract` does. This includes escaping complex field names as array components. + +PPL query:: + + os> source=structured | spath output=a input=doc_escape "['a fancy field name']" | spath output=b input=doc_escape "['a.b.c']" | fields a b; + fetched rows / total rows = 3/3 + +-------+---+ + | a | b | + |-------+---| + | true | 0 | + | true | 1 | + | false | 2 | + +-------+---+ diff --git a/doctest/test_data/structured.json b/doctest/test_data/structured.json new file mode 100644 index 00000000000..c0717c6f328 --- /dev/null +++ b/doctest/test_data/structured.json @@ -0,0 +1,3 @@ +{"doc_n":"{\"n\": 1}","doc_escape":"{\"a fancy field name\": true,\"a.b.c\": 0}","doc_list":"{\"list\": [1, 2, 3, 4], \"nest_out\": {\"nest_in\": \"a\"}}","obj_field":{"field": "a"}} +{"doc_n":"{\"n\": 2}","doc_escape":"{\"a fancy field name\": true,\"a.b.c\": 1}","doc_list":"{\"list\": [], \"nest_out\": {\"nest_in\": \"a\"}}","obj_field":{"field": "b"}} +{"doc_n":"{\"n\": 3}","doc_escape":"{\"a fancy field name\": false,\"a.b.c\": 2}","doc_list":"{\"list\": [5, 6], \"nest_out\": {\"nest_in\": \"a\"}}","obj_field":{"field": "c"}} \ No newline at end of file diff --git a/doctest/test_docs.py b/doctest/test_docs.py index 4fd9c230ff6..d3cea5782b5 100644 --- a/doctest/test_docs.py +++ b/doctest/test_docs.py @@ -37,6 +37,7 @@ 'weblogs': 'weblogs.json', 'json_test': 'json_test.json', 'state_country': 'state_country.json', + 'structured': 'structured.json', 'occupation': 'occupation.json', 'worker': 'worker.json', 'work_information': 'work_information.json', diff --git a/doctest/test_mapping/structured.json b/doctest/test_mapping/structured.json new file mode 100644 index 00000000000..5c79e53dc0a --- /dev/null +++ b/doctest/test_mapping/structured.json @@ -0,0 +1,20 @@ +{ + "mappings": { + "properties": { + "doc_n": { + "type": "text" + }, + "doc_list": { + "type": "text" + }, + "doc_escape": { + "type": "text" + }, + "obj_field": { + "properties": { + "field": { "type": "text" } + } + } + } + } +} \ No newline at end of file diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 494adb15717..6a542659047 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -404,6 +404,7 @@ spathParameter indexablePath : pathElement (DOT pathElement)* + | stringLiteral ; pathElement diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/SPathRewriteTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/SPathRewriteTest.java index e97fb51ea90..73d282d1f64 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/SPathRewriteTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/SPathRewriteTest.java @@ -64,4 +64,21 @@ public void testSpathMissingPathArgumentHandling() { public void testSpathArgumentDeshuffle() { assertEquals(plan("source = t | spath path=a input=a"), plan("source = t | spath input=a a")); } + + @Test + public void testSpathEscapedParse() { + SPath sp = + (SPath) plan("source = t | spath input=f output=o path=\"attributes.['cluster.name']\""); + Eval ev = (Eval) plan("source = t | eval o=json_extract(f, \"attributes.['cluster.name']\")"); + + assertEquals(ev, sp.rewriteAsEval()); + } + + @Test + public void testSpathEscapedSpaces() { + SPath sp = (SPath) plan("source = t | spath input=f output=o path=\"['abc def ghi']\""); + Eval ev = (Eval) plan("source = t | eval o=json_extract(f, \"['abc def ghi']\")"); + + assertEquals(ev, sp.rewriteAsEval()); + } }