Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions build.zig
Original file line number Diff line number Diff line change
Expand Up @@ -1371,6 +1371,73 @@ pub fn build(b: *std.Build) void {
test_xml_mismatched_tags.step.dependOn(b.getInstallStep());
test_step.dependOn(&test_xml_mismatched_tags.step);

// Integration test 133: --json-path navigates single-key nested JSON array
const test_json_path_single = b.addSystemCommand(&.{
"bash", "-c",
\\result=$(printf '{"data":[{"name":"Alice"},{"name":"Bob"}]}' \
\\ | ./zig-out/bin/sql-pipe -I json --json-path data 'SELECT name FROM t ORDER BY name')
\\[ "$result" = "$(printf 'Alice\nBob')" ]
});
test_json_path_single.step.dependOn(b.getInstallStep());
test_step.dependOn(&test_json_path_single.step);

// Integration test 134: --json-path navigates multi-segment nested JSON path
const test_json_path_multi = b.addSystemCommand(&.{
"bash", "-c",
\\result=$(printf '{"results":{"items":[{"id":1},{"id":2}]}}' \
\\ | ./zig-out/bin/sql-pipe -I json --json-path results.items 'SELECT id FROM t ORDER BY id')
\\[ "$result" = "$(printf '1\n2')" ]
});
test_json_path_multi.step.dependOn(b.getInstallStep());
test_step.dependOn(&test_json_path_multi.step);

// Integration test 135: --json-path with missing key exits non-zero
const test_json_path_missing_key = b.addSystemCommand(&.{
"bash", "-c",
\\printf '{"data":[{"name":"Alice"}]}' \
\\ | ./zig-out/bin/sql-pipe -I json --json-path missing 'SELECT * FROM t' 2>/dev/null; test $? -ne 0
});
test_json_path_missing_key.step.dependOn(b.getInstallStep());
test_step.dependOn(&test_json_path_missing_key.step);

// Integration test 136: --json-path targeting a non-array value exits non-zero
const test_json_path_non_array = b.addSystemCommand(&.{
"bash", "-c",
\\printf '{"data":{"name":"Alice"}}' \
\\ | ./zig-out/bin/sql-pipe -I json --json-path data 'SELECT * FROM t' 2>/dev/null; test $? -ne 0
});
test_json_path_non_array.step.dependOn(b.getInstallStep());
test_step.dependOn(&test_json_path_non_array.step);

// Integration test 137: --json-path with --columns lists columns from nested array
const test_json_path_columns = b.addSystemCommand(&.{
"bash", "-c",
\\result=$(printf '{"feed":{"entry":[{"title":"T1","link":"L1"}]}}' \
\\ | ./zig-out/bin/sql-pipe -I json --json-path feed.entry --columns)
\\[ "$result" = "$(printf 'title\nlink')" ]
});
test_json_path_columns.step.dependOn(b.getInstallStep());
test_step.dependOn(&test_json_path_columns.step);

// Integration test 138: --json-path with --validate parses nested JSON array and prints summary
const test_json_path_validate = b.addSystemCommand(&.{
"bash", "-c",
\\result=$(printf '{"data":[{"id":1,"name":"Alice"},{"id":2,"name":"Bob"}]}' \
\\ | ./zig-out/bin/sql-pipe -I json --json-path data --validate)
\\echo "$result" | grep -q "OK: 2 rows"
});
test_json_path_validate.step.dependOn(b.getInstallStep());
test_step.dependOn(&test_json_path_validate.step);

// Integration test 139: --json-path with non-json input format exits non-zero
const test_json_path_format_mismatch = b.addSystemCommand(&.{
"bash", "-c",
\\printf 'name\nAlice\n' \
\\ | ./zig-out/bin/sql-pipe -I csv --json-path data 'SELECT * FROM t' 2>/dev/null; test $? -ne 0
});
test_json_path_format_mismatch.step.dependOn(b.getInstallStep());
test_step.dependOn(&test_json_path_format_mismatch.step);

// Unit tests for the RFC 4180 CSV parser (src/csv.zig)
const unit_tests = b.addTest(.{
.root_module = b.createModule(.{
Expand Down
23 changes: 23 additions & 0 deletions src/args.zig
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ pub const SqlPipeError = error{
InvalidInputFormat,
InvalidOutputFormat,
MissingXmlFlagValue,
MissingJsonFlagValue,
InvalidXmlName,
JsonPathRequiresJson,
OpenDbFailed,
EmptyInput,
EmptyColumnName,
Expand Down Expand Up @@ -82,6 +84,8 @@ pub const ParsedArgs = struct {
xml_root_input: ?[]const u8,
/// Row tag filter for XML input; null = accept any direct child element as a row.
xml_row_input: ?[]const u8,
/// Dot-separated path to the JSON array (e.g. "results.items"); null = expect top-level array.
json_path: ?[]const u8,
/// Use a file-backed temporary SQLite database instead of :memory: when true.
/// Enables processing datasets larger than available RAM; also sets PRAGMA temp_store = FILE.
disk: bool,
Expand All @@ -98,6 +102,8 @@ pub const ColumnsArgs = struct {
xml_root_input: ?[]const u8,
/// Row tag filter for XML input; null = accept any direct child element as a row.
xml_row_input: ?[]const u8,
/// Dot-separated path to the JSON array (e.g. "results.items"); null = expect top-level array.
json_path: ?[]const u8,
};

pub const ValidateArgs = struct {
Expand All @@ -111,6 +117,8 @@ pub const ValidateArgs = struct {
xml_root_input: ?[]const u8,
/// Row tag filter for XML input; null = accept any direct child element as a row.
xml_row_input: ?[]const u8,
/// Dot-separated path to the JSON array (e.g. "results.items"); null = expect top-level array.
json_path: ?[]const u8,
};

pub const SampleArgs = struct {
Expand Down Expand Up @@ -173,6 +181,7 @@ pub fn printUsage(writer: *std.Io.Writer) !void {
\\ --output <file> Write results to file instead of stdout
\\ --xml-root <name> Root element name for XML I/O (default: results)
\\ --xml-row <name> Row element name for XML I/O (default: row)
\\ --json-path <path> Dot-separated path to the JSON array for -I json (e.g. results.items)
\\ --disk Use a file-backed temp database instead of :memory:
\\ Enables processing datasets larger than available RAM
\\ Also sets PRAGMA temp_store = FILE for transient structures
Expand Down Expand Up @@ -238,6 +247,7 @@ pub fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult {
var xml_row: []const u8 = "row";
var xml_root_input: ?[]const u8 = null;
var xml_row_input: ?[]const u8 = null;
var json_path: ?[]const u8 = null;
var sample_mode = false;
var sample_n: usize = 10;
var disk = false;
Expand Down Expand Up @@ -354,6 +364,12 @@ pub fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult {
} else if (std.mem.startsWith(u8, arg, "--xml-row=")) {
xml_row = arg["--xml-row=".len..];
xml_row_input = arg["--xml-row=".len..];
} else if (std.mem.eql(u8, arg, "--json-path")) {
i += 1;
if (i >= args.len) return error.MissingJsonFlagValue;
json_path = args[i];
} else if (std.mem.startsWith(u8, arg, "--json-path=")) {
json_path = arg["--json-path=".len..];
} else if (std.mem.eql(u8, arg, "--disk")) {
disk = true;
} else {
Expand Down Expand Up @@ -415,6 +431,10 @@ pub fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult {
return error.InvalidXmlName;
}

// --json-path requires -I json (the flag only applies to JSON object navigation)
if (json_path != null and input_format != .json)
return error.JsonPathRequiresJson;

// --columns mode: list headers and exit
if (list_columns)
return .{ .columns = ColumnsArgs{
Expand All @@ -423,6 +443,7 @@ pub fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult {
.input_format = input_format,
.xml_root_input = xml_root_input,
.xml_row_input = xml_row_input,
.json_path = json_path,
} };

// --validate mode: parse CSV and print summary
Expand All @@ -433,6 +454,7 @@ pub fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult {
.input_format = input_format,
.xml_root_input = xml_root_input,
.xml_row_input = xml_row_input,
.json_path = json_path,
} };

// --sample mode: print schema + first n rows and exit
Expand All @@ -459,6 +481,7 @@ pub fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult {
.xml_row = xml_row,
.xml_root_input = xml_root_input,
.xml_row_input = xml_row_input,
.json_path = json_path,
.disk = disk,
} };
}
45 changes: 42 additions & 3 deletions src/json.zig
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,39 @@ pub fn insertRowFromJson(
if (c.sqlite3_step(stmt) != c.SQLITE_DONE) return error.StepFailed;
}

/// navigateJsonPath(value, path, stderr_writer) → std.json.Value
///
/// Pre: value is a parsed JSON value; path is a non-empty dot-separated key sequence
/// Post: returns the JSON value at the given path
/// fatals if any segment is empty, not found, or if an intermediate value is not an object
pub fn navigateJsonPath(
value: std.json.Value,
path: []const u8,
stderr_writer: *std.Io.Writer,
) std.json.Value {
var current = value;
var remaining = path;
// Loop invariant: current is the value at the path prefix consumed so far
// Bounding function: remaining.len (strictly decreasing per segment consumed)
while (remaining.len > 0) {
const dot = std.mem.indexOfScalar(u8, remaining, '.') orelse remaining.len;
const key = remaining[0..dot];
remaining = if (dot < remaining.len) remaining[dot + 1 ..] else &.{};
if (key.len == 0)
fatal("--json-path '{s}': empty key segment (check for double dots or leading/trailing dots)", stderr_writer, .csv_error, .{path});
const obj = switch (current) {
.object => |o| o,
else => fatal("--json-path '{s}': '{s}' is not an object", stderr_writer, .csv_error, .{ path, key }),
};
current = obj.get(key) orelse
fatal("--json-path '{s}': key '{s}' not found in JSON document", stderr_writer, .csv_error, .{ path, key });
}
return current;
}

// ─── Input loading ────────────────────────────────────

/// loadJsonArray(allocator, reader, db, max_rows, stderr_writer) → usize
/// loadJsonArray(allocator, reader, db, max_rows, json_path, stderr_writer) → usize
///
/// Pre: reader is positioned at the start of a JSON document
/// db is an open, empty SQLite database
Expand All @@ -177,6 +207,7 @@ pub fn loadJsonArray(
reader: *std.Io.Reader,
db: *c.sqlite3,
max_rows: ?usize,
json_path: ?[]const u8,
stderr_writer: *std.Io.Writer,
) usize {
// Read all input into a buffer using block reads instead of byte-by-byte takeByte()
Expand All @@ -193,9 +224,17 @@ pub fn loadJsonArray(
fatal("failed to parse JSON input", stderr_writer, .csv_error, .{});
defer parsed.deinit();

const array = switch (parsed.value) {
const target: std.json.Value = if (json_path) |path|
navigateJsonPath(parsed.value, path, stderr_writer)
else
parsed.value;

const array = switch (target) {
.array => |a| a,
else => fatal("JSON input must be an array of objects", stderr_writer, .csv_error, .{}),
else => if (json_path) |path|
fatal("--json-path '{s}': resolved to a non-array value; expected an array of objects", stderr_writer, .csv_error, .{path})
else
fatal("JSON input must be an array of objects", stderr_writer, .csv_error, .{}),
};

if (array.items.len == 0) fatal("empty JSON array: cannot determine column names", stderr_writer, .csv_error, .{});
Expand Down
4 changes: 3 additions & 1 deletion src/main.zig
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ fn run(
.json => blk: {
var stdin_buf: [4096]u8 = undefined;
var stdin_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf);
break :blk json.loadJsonArray(allocator, &stdin_reader.interface, db, parsed.max_rows, stderr_writer);
break :blk json.loadJsonArray(allocator, &stdin_reader.interface, db, parsed.max_rows, parsed.json_path, stderr_writer);
},
.ndjson => blk: {
var stdin_buf: [4096]u8 = undefined;
Expand Down Expand Up @@ -186,6 +186,8 @@ pub fn main(init: std.process.Init.Minimal) void {
error.SampleWithOutput => fatal("--sample cannot be combined with --output", stderr_writer, .usage, .{}),
error.InvalidSampleCount => fatal("--sample requires a positive integer value", stderr_writer, .usage, .{}),
error.MissingXmlFlagValue => fatal("--xml-root and --xml-row require a value", stderr_writer, .usage, .{}),
error.MissingJsonFlagValue => fatal("--json-path requires a value", stderr_writer, .usage, .{}),
error.JsonPathRequiresJson => fatal("--json-path requires -I json", stderr_writer, .usage, .{}),
error.InvalidXmlName => fatal("--xml-root and --xml-row must be valid XML element names (letter/underscore first, then letters/digits/-/._/:)", stderr_writer, .usage, .{}),
else => {},
}
Expand Down
12 changes: 10 additions & 2 deletions src/modes/columns.zig
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,17 @@ pub fn runColumns(
fatal("failed to parse JSON input", stderr_writer, .csv_error, .{});
defer parsed.deinit();

const array = switch (parsed.value) {
const target: std.json.Value = if (args.json_path) |path|
json_mod.navigateJsonPath(parsed.value, path, stderr_writer)
else
parsed.value;

const array = switch (target) {
.array => |a| a,
else => fatal("JSON input must be an array of objects", stderr_writer, .csv_error, .{}),
else => if (args.json_path) |path|
fatal("--json-path '{s}': resolved to a non-array value; expected an array of objects", stderr_writer, .csv_error, .{path})
else
fatal("JSON input must be an array of objects", stderr_writer, .csv_error, .{}),
};
if (array.items.len == 0) fatal("empty JSON array: cannot determine column names", stderr_writer, .csv_error, .{});

Expand Down
12 changes: 10 additions & 2 deletions src/modes/validate.zig
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,17 @@ pub fn runValidate(
fatal("failed to parse JSON input", stderr_writer, .csv_error, .{});
defer parsed.deinit();

const array = switch (parsed.value) {
const target: std.json.Value = if (args.json_path) |path|
json_mod.navigateJsonPath(parsed.value, path, stderr_writer)
else
parsed.value;

const array = switch (target) {
.array => |a| a,
else => fatal("JSON input must be an array of objects", stderr_writer, .csv_error, .{}),
else => if (args.json_path) |path|
fatal("--json-path '{s}': resolved to a non-array value; expected an array of objects", stderr_writer, .csv_error, .{path})
else
fatal("JSON input must be an array of objects", stderr_writer, .csv_error, .{}),
};
if (array.items.len == 0) fatal("empty JSON array: cannot determine column names", stderr_writer, .csv_error, .{});

Expand Down
Loading