diff --git a/build.zig b/build.zig index 63badc2..87d7bb2 100644 --- a/build.zig +++ b/build.zig @@ -1362,6 +1362,15 @@ pub fn build(b: *std.Build) void { test_disk_output.step.dependOn(b.getInstallStep()); test_step.dependOn(&test_disk_output.step); + // Integration test 132: mismatched nested closing tag in XML column content → non-zero exit + const test_xml_mismatched_tags = b.addSystemCommand(&.{ + "bash", "-c", + \\printf 'text' \ + \\ | ./zig-out/bin/sql-pipe -I xml 'SELECT * FROM t' 2>/dev/null; test $? -ne 0 + }); + test_xml_mismatched_tags.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_xml_mismatched_tags.step); + // Unit tests for the RFC 4180 CSV parser (src/csv.zig) const unit_tests = b.addTest(.{ .root_module = b.createModule(.{ diff --git a/src/xml.zig b/src/xml.zig index 525b5f6..619628a 100644 --- a/src/xml.zig +++ b/src/xml.zig @@ -393,6 +393,10 @@ pub const XmlParser = struct { const start = self.pos; var depth: usize = 0; var has_nested = false; + // Stack of open nested element names (slices into self.data — no allocation per entry). + // Invariant: tag_stack.items.len == depth at all times. + var tag_stack: std.ArrayList([]const u8) = .empty; + defer tag_stack.deinit(allocator); // Loop invariant: depth = number of unclosed nested elements // Bounding function: self.data.len - self.pos (finite input) @@ -428,20 +432,26 @@ pub const XmlParser = struct { if (!has_nested) return decodeEntities(allocator, raw); return allocator.dupe(u8, raw); } - // Closing tag of a nested element + // Closing tag of a nested element — verify name matches the open tag on the stack depth -= 1; self.advance(); self.advance(); // "') self.advance(); + const expected = tag_stack.pop().?; // safe: every closing tag at depth>0 was preceded by an opening push + if (!std.mem.eql(u8, close_name, expected)) + self.fatalAt("mismatched closing tag: expected '' but found ''", err_writer, .{ expected, close_name }); } else { // Opening tag of a nested element has_nested = true; self.advance(); // '<' - _ = self.readName(err_writer); + const nested_name = self.readName(err_writer); const self_closing = self.skipAttrsClose(err_writer); - if (!self_closing) depth += 1; + if (!self_closing) { + depth += 1; + try tag_stack.append(allocator, nested_name); + } } } self.fatalAt("unexpected end of input: unclosed element '{s}'", err_writer, .{elem_name}); @@ -1155,6 +1165,29 @@ test "XmlParser.navigateToRoot: handles text nodes between siblings" { try std.testing.expectEqualStrings("", p.data[p.pos..]); } +test "XmlParser.readContent: properly matched nested tags are accepted" { + const allocator = std.testing.allocator; + var err_buf: [256]u8 = undefined; + var err_writer: std.Io.Writer = .fixed(&err_buf); + + // Deeply nested content with correctly matched tags — stack must track them all + const input = "text"; + var p = XmlParser.init(input); + p.skipPrologue(&err_writer); + const root = p.readRootOpen(&err_writer); + + const cols = try p.nextRow(allocator, root, null, &err_writer); + try std.testing.expect(cols != null); + defer { + for (cols.?) |col| if (col.value) |v| allocator.free(v); + allocator.free(cols.?); + } + try std.testing.expectEqual(@as(usize, 1), cols.?.len); + try std.testing.expectEqualStrings("col", cols.?[0].name); + // Mixed/nested content is returned as raw XML substring + try std.testing.expectEqualStrings("text", cols.?[0].value.?); +} + test "XmlParser.nextRow: row_tag_filter skips non-matching elements" { const allocator = std.testing.allocator; var err_buf: [256]u8 = undefined;