diff --git a/build.zig b/build.zig
index 63badc2..87d7bb2 100644
--- a/build.zig
+++ b/build.zig
@@ -1362,6 +1362,15 @@ pub fn build(b: *std.Build) void {
test_disk_output.step.dependOn(b.getInstallStep());
test_step.dependOn(&test_disk_output.step);
+ // Integration test 132: mismatched nested closing tag in XML column content → non-zero exit
+ const test_xml_mismatched_tags = b.addSystemCommand(&.{
+ "bash", "-c",
+ \\printf 'text
' \
+ \\ | ./zig-out/bin/sql-pipe -I xml 'SELECT * FROM t' 2>/dev/null; test $? -ne 0
+ });
+ test_xml_mismatched_tags.step.dependOn(b.getInstallStep());
+ test_step.dependOn(&test_xml_mismatched_tags.step);
+
// Unit tests for the RFC 4180 CSV parser (src/csv.zig)
const unit_tests = b.addTest(.{
.root_module = b.createModule(.{
diff --git a/src/xml.zig b/src/xml.zig
index 525b5f6..619628a 100644
--- a/src/xml.zig
+++ b/src/xml.zig
@@ -393,6 +393,10 @@ pub const XmlParser = struct {
const start = self.pos;
var depth: usize = 0;
var has_nested = false;
+ // Stack of open nested element names (slices into self.data — no allocation per entry).
+ // Invariant: tag_stack.items.len == depth at all times.
+ var tag_stack: std.ArrayList([]const u8) = .empty;
+ defer tag_stack.deinit(allocator);
// Loop invariant: depth = number of unclosed nested elements
// Bounding function: self.data.len - self.pos (finite input)
@@ -428,20 +432,26 @@ pub const XmlParser = struct {
if (!has_nested) return decodeEntities(allocator, raw);
return allocator.dupe(u8, raw);
}
- // Closing tag of a nested element
+ // Closing tag of a nested element — verify name matches the open tag on the stack
depth -= 1;
self.advance();
self.advance(); // ""
- _ = self.readName(err_writer);
+ const close_name = self.readName(err_writer);
self.skipWs();
if (self.peek() == '>') self.advance();
+ const expected = tag_stack.pop().?; // safe: every closing tag at depth>0 was preceded by an opening push
+ if (!std.mem.eql(u8, close_name, expected))
+ self.fatalAt("mismatched closing tag: expected '{s}>' but found '{s}>'", err_writer, .{ expected, close_name });
} else {
// Opening tag of a nested element
has_nested = true;
self.advance(); // '<'
- _ = self.readName(err_writer);
+ const nested_name = self.readName(err_writer);
const self_closing = self.skipAttrsClose(err_writer);
- if (!self_closing) depth += 1;
+ if (!self_closing) {
+ depth += 1;
+ try tag_stack.append(allocator, nested_name);
+ }
}
}
self.fatalAt("unexpected end of input: unclosed element '{s}'", err_writer, .{elem_name});
@@ -1155,6 +1165,29 @@ test "XmlParser.navigateToRoot: handles text nodes between siblings" {
try std.testing.expectEqualStrings(" ", p.data[p.pos..]);
}
+test "XmlParser.readContent: properly matched nested tags are accepted" {
+ const allocator = std.testing.allocator;
+ var err_buf: [256]u8 = undefined;
+ var err_writer: std.Io.Writer = .fixed(&err_buf);
+
+ // Deeply nested content with correctly matched tags — stack must track them all
+ const input = "text
";
+ var p = XmlParser.init(input);
+ p.skipPrologue(&err_writer);
+ const root = p.readRootOpen(&err_writer);
+
+ const cols = try p.nextRow(allocator, root, null, &err_writer);
+ try std.testing.expect(cols != null);
+ defer {
+ for (cols.?) |col| if (col.value) |v| allocator.free(v);
+ allocator.free(cols.?);
+ }
+ try std.testing.expectEqual(@as(usize, 1), cols.?.len);
+ try std.testing.expectEqualStrings("col", cols.?[0].name);
+ // Mixed/nested content is returned as raw XML substring
+ try std.testing.expectEqualStrings("text", cols.?[0].value.?);
+}
+
test "XmlParser.nextRow: row_tag_filter skips non-matching elements" {
const allocator = std.testing.allocator;
var err_buf: [256]u8 = undefined;