|
| 1 | +@enum(NodeType, |
| 2 | + DOCUMENT_NODE, # prolog & root ELEMENT_NODE |
| 3 | + DTD_NODE, # <!DOCTYPE ...> |
| 4 | + DECLARATION_NODE, # <?xml attributes... ?> |
| 5 | + COMMENT_NODE, # <!-- ... --> |
| 6 | + CDATA_NODE, # <![CDATA[...]]> |
| 7 | + ELEMENT_NODE, # <NAME attributes... > |
| 8 | +) |
| 9 | + |
| 10 | +#-----------------------------------------------------------------------------# Node |
| 11 | +struct Node |
| 12 | + type::NodeType |
| 13 | + tag::Union{Nothing, String} |
| 14 | + attributes::Union{Nothing, Dict{Symbol, String}} |
| 15 | + children::Union{Nothing, Vector{Union{String, Node}}, String} |
| 16 | + Node(type, tag, attr, children::Nothing) = new(type, tag, attr, children) |
| 17 | + function Node(type, tag, attributes, children::Vector) |
| 18 | + new(type, tag, attributes, Vector{Union{String,Node}}(children)) |
| 19 | + end |
| 20 | + function Node(type, tag, attributes, children::AbstractString) |
| 21 | + new(type, tag, attributes, String(children)) |
| 22 | + end |
| 23 | +end |
| 24 | +function Base.:(==)(a::Node, b::Node) |
| 25 | + a.type == b.type && |
| 26 | + a.tag == b.tag && |
| 27 | + a.attributes == b.attributes && |
| 28 | + a.children == b.children |
| 29 | +end |
| 30 | + |
| 31 | +Base.show(io::IO, o::Node) = showxml(io, o) |
| 32 | + |
| 33 | +Base.write(io::IO, o::Node) = showxml(io, o) |
| 34 | + |
| 35 | + |
| 36 | +document(children...) = Node(DOCUMENT_NODE, nothing, nothing, collect(children)) |
| 37 | +dtd(content::AbstractString) = Node(DTD_NODE, nothing, nothing, content) |
| 38 | +declaration(; attrs...) = Node(DECLARATION_NODE, nothing, OrderedDict(k=>string(v) for (k,v) in pairs(attrs)), nothing) |
| 39 | +comment(content::AbstractString) = Node(COMMENT_NODE, nothing, nothing, content) |
| 40 | +cdata(content::AbstractString) = Node(CDATA_NODE, nothing, nothing, content) |
| 41 | + |
| 42 | +m(tag::String, children...; attrs...) = Node(ELEMENT_NODE, tag, OrderedDict(k=>string(v) for (k,v) in pairs(attrs)), collect(children)) |
| 43 | +# m(tag::String, child::String; attrs...) = Node(ELEMENT_NODE, tag, OrderedDict(k=>string(v) for (k,v) in pairs(attrs)), child) |
| 44 | +Base.getproperty(::typeof(m), sym::Symbol) = (f(args...; kw...) = m(string(sym), args...; kw...)) |
| 45 | + |
| 46 | +function check(o::Node) |
| 47 | + if type == DOCUMENT_NODE |
| 48 | + isnothing(o.tag) || error("A DOCUMENT_NODE should not have a tag.") |
| 49 | + isnothing(o.attributes) || error("A DOCUMENT_NODE should not have attributes.") |
| 50 | + o.children isa Vector{Node} || error("DOCUMENT_NODE children should be Vector{Node}.") |
| 51 | + elseif type == DTD_NODE |
| 52 | + isnothing(o.tag) || error("A DTD_NODE should not have a tag.") |
| 53 | + isnothing(o.children) || error("A DTD_NODE should not have children.") |
| 54 | + elseif type == DECLARATION_NODE |
| 55 | + isnothing(o.children) || error("A DECLARATION_NODE should not have children.") |
| 56 | + elseif type == COMMENT_NODE |
| 57 | + isnothing(o.tag) || error("A COMMENT_NODE should not have a tag.") |
| 58 | + isnothing(o.attributes) || error("A COMMENT_NODE should not have attributes.") |
| 59 | + o.children isa String || error("A COMMENT_NODE's child should be a String.") |
| 60 | + elseif type == CDATA_NODE |
| 61 | + isnothing(o.tag) || error("A CDATA_NODE should not have a tag.") |
| 62 | + isnothing(o.attributes) || error("A CDATA_NODE should not have attributes.") |
| 63 | + o.children isa String || error("A CDATA_NODE's child should be a String.") |
| 64 | + end |
| 65 | +end |
| 66 | + |
| 67 | +attr_string(o::Node) = join(" $k=$(repr(v))" for (k,v) in o.attributes) |
| 68 | + |
| 69 | +#-----------------------------------------------------------------------------# show |
| 70 | +function showxml(io::IO, o::Node; depth=0) |
| 71 | + p(args...) = printstyled(io, args...; color=depth + 1) |
| 72 | + if o.type == DOCUMENT_NODE |
| 73 | + for (i,child) in enumerate(o.children) |
| 74 | + showxml(io, child; depth=0) |
| 75 | + i != length(o.children) && println(io) |
| 76 | + end |
| 77 | + elseif o.type == DTD_NODE |
| 78 | + p(INDENT^depth, "<!doctype ", o.children, '>') |
| 79 | + elseif o.type == DECLARATION_NODE |
| 80 | + p(INDENT^depth, "<?", o.tag, attr_string(o), "?>") |
| 81 | + elseif o.type == COMMENT_NODE |
| 82 | + p(INDENT^depth, "<!-- ", o.children, " -->") |
| 83 | + elseif o.type == CDATA_NODE |
| 84 | + p(INDENT^depth, "<![CDATA[", o.children, "]]>") |
| 85 | + elseif o.type == ELEMENT_NODE |
| 86 | + p(INDENT^depth, '<', o.tag, attr_string(o)) |
| 87 | + if isnothing(o.children) |
| 88 | + p(" />") |
| 89 | + else |
| 90 | + if length(o.children) == 1 && o.children[1] isa String |
| 91 | + p('>') |
| 92 | + showxml(io, o.children[1]) |
| 93 | + p("</", o.tag, '>') |
| 94 | + else |
| 95 | + p('>', '\n') |
| 96 | + for child in o.children |
| 97 | + showxml(io, child, depth=depth+1) |
| 98 | + println(io) |
| 99 | + end |
| 100 | + p(INDENT^depth, "</", o.tag, '>') |
| 101 | + end |
| 102 | + end |
| 103 | + end |
| 104 | +end |
| 105 | + |
| 106 | + |
| 107 | +Base.getindex(o::Node, i::Integer) = o.children[i] |
| 108 | +Base.setindex!(o::Node, val, i::Integer) = setindex!(o.children, val, i) |
| 109 | +Base.lastindex(o::Node) = lastindex(o.children) |
| 110 | + |
| 111 | +#-----------------------------------------------------------------------------# From XMLTokenIterator |
| 112 | +function Node(itr::XMLTokenIterator) |
| 113 | + doc = Node(DOCUMENT_NODE, nothing, nothing, []) |
| 114 | + add_children!(doc, itr, "END_OF_FILE") |
| 115 | + return doc |
| 116 | +end |
| 117 | + |
| 118 | + |
| 119 | +readnode(file::String) = open(io -> Node(XMLTokenIterator(io)), file, "r") |
| 120 | + |
| 121 | +function add_children!(e::Node, o::XMLTokenIterator, until::String) |
| 122 | + s = "" |
| 123 | + c = e.children |
| 124 | + while s != until |
| 125 | + next = iterate(o, -1) # if state == 0, io will get reset to original position |
| 126 | + isnothing(next) && break |
| 127 | + T, s = next[1] |
| 128 | + if T == COMMENTTOKEN |
| 129 | + push!(c, comment(strip(replace(s, "<!--" => "", "-->" => "")))) |
| 130 | + elseif T == CDATATOKEN |
| 131 | + push!(c, cdata(replace(s, "<![CDATA[" => "", "]]>" => ""))) |
| 132 | + elseif T == ELEMENTSELFCLOSEDTOKEN |
| 133 | + push!(c, Node(ELEMENT_NODE, get_tag(s), get_attributes(s), nothing)) |
| 134 | + elseif T == ELEMENTTOKEN |
| 135 | + child = Node(ELEMENT_NODE, get_tag(s), get_attributes(s), []) |
| 136 | + add_children!(child, o, "</$(child.tag)>") |
| 137 | + push!(c, child) |
| 138 | + elseif T == TEXTTOKEN |
| 139 | + push!(c, unescape(s)) |
| 140 | + elseif T == DTDTOKEN |
| 141 | + push!(c, dtd(replace(s, "<!doctype " => "", "<!DOCTYPE " => "", '>' => ""))) |
| 142 | + elseif T == DECLARATIONTOKEN |
| 143 | + push!(c, Node(DECLARATION_NODE, get_tag(s), get_attributes(s), nothing)) |
| 144 | + end |
| 145 | + end |
| 146 | +end |
0 commit comments