diff --git a/Manifest.toml b/Manifest.toml new file mode 100644 index 0000000..ac3cdc4 --- /dev/null +++ b/Manifest.toml @@ -0,0 +1,108 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.9.1" +manifest_format = "2.0" +project_hash = "60b2b292332624ed32f4adf3489530dc60cb9a29" + +[[deps.Artifacts]] +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" + +[[deps.Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" + +[[deps.CompilerSupportLibraries_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" +version = "1.0.2+0" + +[[deps.DataAPI]] +git-tree-sha1 = "8da84edb865b0b5b0100c0666a9bc9a0b71c553c" +uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" +version = "1.15.0" + +[[deps.DataValueInterfaces]] +git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6" +uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464" +version = "1.0.0" + +[[deps.Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" + +[[deps.InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" + +[[deps.IteratorInterfaceExtensions]] +git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" +uuid = "82899510-4779-5014-852e-03e436cf321d" +version = "1.0.0" + +[[deps.Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" + +[[deps.LinearAlgebra]] +deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"] +uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" + +[[deps.Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" + +[[deps.Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" + +[[deps.OpenBLAS_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] +uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" +version = "0.3.21+4" + +[[deps.OrderedCollections]] +git-tree-sha1 = "dfdf5519f235516220579f949664f1bf44e741c5" +uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +version = "1.6.3" + +[[deps.Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" + +[[deps.Random]] +deps = ["SHA", "Serialization"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[[deps.SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" +version = "0.7.0" + +[[deps.SentinelArrays]] +deps = ["Dates", "Random"] +git-tree-sha1 = "0e7508ff27ba32f26cd459474ca2ede1bc10991f" +uuid = "91c51154-3ec4-41a3-a24f-3f23e20d615c" +version = "1.4.1" + +[[deps.Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" + +[[deps.TableTraits]] +deps = ["IteratorInterfaceExtensions"] +git-tree-sha1 = "c06b2f539df1c6efa794486abfb6ed2022561a39" +uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" +version = "1.0.1" + +[[deps.Tables]] +deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "OrderedCollections", "TableTraits"] +git-tree-sha1 = "cb76cf677714c095e535e3501ac7954732aeea2d" +uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" +version = "1.11.1" + +[[deps.Test]] +deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[[deps.Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" + +[[deps.libblastrampoline_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" +version = "5.8.0+0" diff --git a/README.md b/README.md index 3f68bc8..33940ca 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,15 @@ table_subset = ctable |> TableOperations.select(:C, :A) |> Tables.columntable ``` This "selects" the `C` and `A` columns from the original table, and re-orders them with `C` first. The column names can be provided as `String`s, `Symbol`s, or `Integer`s. +### `TableOperations.reject` +The `TableOperations.reject` function *drops* the specified subset of columns from a Tables.jl source, like: +```julia +ctable = (A=[1, missing, 3], B=[1.0, 2.0, 3.0], C=["hey", "there", "sailor"]) + +table_subset = ctable |> TableOperations.reject(:B) |> Tables.columntable +``` +This "selects" all columns except for `B` from the original table in the order in which they originally appear. The column names can be provided as `String`s, `Symbol`s, or `Integer`s. + ### `TableOperations.transform` The `TableOperations.transform` function allows specifying a "transform" function per column that will be applied per element. This is handy when a simple transformation is needed for a specific column (or columns). Note that this doesn't allow the creation of new columns, diff --git a/src/TableOperations.jl b/src/TableOperations.jl index 502fbbc..1a2a12e 100644 --- a/src/TableOperations.jl +++ b/src/TableOperations.jl @@ -181,6 +181,22 @@ end return SelectRow{typeof(row), names}(row), st end +""" + TableOperations.reject(source, columns...) => TableOperations.Select + source |> TableOperations.reject(columns...) => TableOperations.Select + +Create a lazy wrapper that satisfies the Tables.jl interface and drops the columns given by the columns arguments, which can be `String`s, `Symbol`s, or `Integer`s +""" +function reject end + +reject(names::Symbol...) = x->reject(x, names...) +reject(names::String...) = x->reject(x, Base.map(Symbol, names)...) +reject(inds::Integer...) = x->reject(x, Base.map(Int, inds)...) + +reject(x::T, names::Symbol...) where {T} = select(x, setdiff(Tables.columnnames(Tables.columns(x)), names)...) +reject(x::T, names::String...) where {T} = select(x, setdiff(Tables.columnnames(Tables.columns(x)), Base.map(Symbol, names))...) +reject(x::T, inds::Integer...) where {T} = select(x, setdiff(1:length(Tables.columnnames(Tables.columns(x))), Base.map(Int, inds))...) + # filter struct Filter{F, T} f::F diff --git a/test/runtests.jl b/test/runtests.jl index 952ced4..32e64a5 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -271,6 +271,131 @@ table = ctable |> TableOperations.select(3, 1) |> Tables.rowtable end +@testset "TableOperations.reject" begin +# Tests below closely mirror those from "TableOperations.select", where appropriate + +# 20 +x = ReallyWideTable() +sel = TableOperations.reject(x, :x1, :x2); +sch = Tables.schema(sel); +@test :x1 ∉ sch.names && :x2 ∉ sch.names +@test sch.types == ntuple(i -> Float64, 99998) +#tt = Tables.columntable(sel) +#@test tt.x1 isa Vector{Float64} + +# 117 +sel = TableOperations.reject(ctable, 1, 2, 3) +@test Tables.istable(typeof(sel)) +@test Tables.schema(sel) == Tables.Schema((), ()) +@test Tables.columnaccess(typeof(sel)) +@test Tables.columns(sel) === sel +@test propertynames(sel) == () +@test isequal(Tables.getcolumn(sel, 1), [1, missing, 3]) +@test isequal(Tables.getcolumn(sel, :A), [1, missing, 3]) +@test Tables.columntable(sel) == NamedTuple() +@test Tables.rowtable(sel) == NamedTuple{(), Tuple{}}[] +sel = ctable |> TableOperations.reject(:A) +@test Tables.istable(typeof(sel)) +@test Tables.schema(sel) == Tables.Schema((:B, :C), (Float64, String)) +@test Tables.columnaccess(typeof(sel)) +@test Tables.columns(sel) === sel +@test propertynames(sel) == (:B, :C) + +sel = ctable |> TableOperations.reject(1) +@test Tables.istable(typeof(sel)) +@test Tables.schema(sel) == Tables.Schema((:B, :C), (Float64, String)) +@test Tables.columnaccess(typeof(sel)) +@test Tables.columns(sel) === sel +@test propertynames(sel) == (:B, :C) + +sel = TableOperations.reject(rtable, 1, 2, 3) +@test Tables.rowaccess(typeof(sel)) +@test Tables.rows(sel) === sel +@test Tables.schema(sel) == Tables.Schema((), ()) +@test Base.IteratorSize(typeof(sel)) == Base.HasShape{1}() +@test length(sel) == 3 +@test Base.IteratorEltype(typeof(sel)) == Base.HasEltype() +@test eltype(sel) == TableOperations.SelectRow{NamedTuple{(:A, :B, :C),Tuple{Union{Missing, Int},Float64,String}},()} +@test Tables.columntable(sel) == NamedTuple() +@test Tables.rowtable(sel) == [NamedTuple(), NamedTuple(), NamedTuple()] +srow = first(sel) +@test propertynames(srow) == () + +sel = rtable |> TableOperations.reject(:B, :C) +@test Tables.rowaccess(typeof(sel)) +@test Tables.rows(sel) === sel +@test Tables.schema(sel) == Tables.Schema((:A,), (Union{Int, Missing},)) +@test Base.IteratorSize(typeof(sel)) == Base.HasShape{1}() +@test length(sel) == 3 +@test Base.IteratorEltype(typeof(sel)) == Base.HasEltype() +@test eltype(sel) == TableOperations.SelectRow{NamedTuple{(:A, :B, :C),Tuple{Union{Missing, Int},Float64,String}},(:A,)} +@test isequal(Tables.columntable(sel), (A = [1, missing, 3],)) +@test isequal(Tables.rowtable(sel), [(A=1,), (A=missing,), (A=3,)]) +srow = first(sel) +@test propertynames(srow) == (:A,) + +# Testing issue where we always select the first column values, but using the correct name. +# NOTE: We don't use rtable here because mixed types produce TypeErrors which hide the +# underlying problem. +rtable2 = [(A = 1.0, B = 2.0), (A = 2.0, B = 4.0), (A = 3.0, B = 6.0)] +sel = rtable2 |> TableOperations.reject(:A, :C) +@test Tables.rowaccess(typeof(sel)) +@test Tables.rows(sel) === sel +@test Tables.schema(sel) == Tables.Schema((:B,), (Float64,)) +@test Base.IteratorSize(typeof(sel)) == Base.HasShape{1}() +@test length(sel) == 3 +@test Base.IteratorEltype(typeof(sel)) == Base.HasEltype() +@test eltype(sel) == TableOperations.SelectRow{NamedTuple{(:A, :B,),Tuple{Float64,Float64}},(:B,)} +@test isequal(Tables.columntable(sel), (B = [2.0, 4.0, 6.0],)) +@test isequal(Tables.rowtable(sel), [(B=2.0,), (B=4.0,), (B=6.0,)]) +@test isequal(Tables.columntable(sel), (B = [2.0, 4.0, 6.0],)) +@test isequal(Tables.rowtable(sel), [(B=2.0,), (B=4.0,), (B=6.0,)]) +srow = first(sel) +@test propertynames(srow) == (:B,) +@test srow.B == 2.0 # What we expect + +sel = rtable |> TableOperations.reject(2, 3) +@test Tables.rowaccess(typeof(sel)) +@test Tables.rows(sel) === sel +@test Tables.schema(sel) == Tables.Schema((:A,), (Union{Int, Missing},)) +@test Base.IteratorSize(typeof(sel)) == Base.HasShape{1}() +@test length(sel) == 3 +@test Base.IteratorEltype(typeof(sel)) == Base.HasEltype() +@test eltype(sel) == TableOperations.SelectRow{NamedTuple{(:A, :B, :C),Tuple{Union{Missing, Int},Float64,String}},(1,)} +@test isequal(Tables.columntable(sel), (A = [1, missing, 3],)) +@test isequal(Tables.rowtable(sel), [(A=1,), (A=missing,), (A=3,)]) +srow = first(sel) +@test propertynames(srow) == (:A,) +@test Tables.getcolumn(srow, 1) == 1 +@test Tables.getcolumn(srow, :A) == 1 + +table = ctable |> TableOperations.reject(:B, :C) |> Tables.columntable +@test length(table) == 1 +@test isequal(table.A, [1, missing, 3]) + +table = ctable |> TableOperations.reject(2, 3) |> Tables.columntable +@test length(table) == 1 +@test isequal(table.A, [1, missing, 3]) + +table = ctable |> TableOperations.reject("B", "C") |> Tables.columntable +@test length(table) == 1 +@test isequal(table.A, [1, missing, 3]) + +# row sink +table = ctable |> TableOperations.reject(:B, :C) |> Tables.rowtable +@test length(table[1]) == 1 +@test isequal(map(x->x.A, table), [1, missing, 3]) + +table = ctable |> TableOperations.reject(2, 3) |> Tables.rowtable +@test length(table[1]) == 1 +@test isequal(map(x->x.A, table), [1, missing, 3]) + +table = ctable |> TableOperations.reject("B", "C") |> Tables.rowtable +@test length(table[1]) == 1 +@test isequal(map(x->x.A, table), [1, missing, 3]) + +end + @testset "TableOperations.filter" begin f = TableOperations.filter(x->x.B == 2.0, ctable)