Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 108 additions & 0 deletions Manifest.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# This file is machine-generated - editing it directly is not advised

julia_version = "1.9.1"
manifest_format = "2.0"
project_hash = "60b2b292332624ed32f4adf3489530dc60cb9a29"

[[deps.Artifacts]]
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"

[[deps.Base64]]
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"

[[deps.CompilerSupportLibraries_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
version = "1.0.2+0"

[[deps.DataAPI]]
git-tree-sha1 = "8da84edb865b0b5b0100c0666a9bc9a0b71c553c"
uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
version = "1.15.0"

[[deps.DataValueInterfaces]]
git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6"
uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464"
version = "1.0.0"

[[deps.Dates]]
deps = ["Printf"]
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"

[[deps.InteractiveUtils]]
deps = ["Markdown"]
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"

[[deps.IteratorInterfaceExtensions]]
git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856"
uuid = "82899510-4779-5014-852e-03e436cf321d"
version = "1.0.0"

[[deps.Libdl]]
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"

[[deps.LinearAlgebra]]
deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"]
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"

[[deps.Logging]]
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"

[[deps.Markdown]]
deps = ["Base64"]
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"

[[deps.OpenBLAS_jll]]
deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
version = "0.3.21+4"

[[deps.OrderedCollections]]
git-tree-sha1 = "dfdf5519f235516220579f949664f1bf44e741c5"
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
version = "1.6.3"

[[deps.Printf]]
deps = ["Unicode"]
uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"

[[deps.Random]]
deps = ["SHA", "Serialization"]
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"

[[deps.SHA]]
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
version = "0.7.0"

[[deps.SentinelArrays]]
deps = ["Dates", "Random"]
git-tree-sha1 = "0e7508ff27ba32f26cd459474ca2ede1bc10991f"
uuid = "91c51154-3ec4-41a3-a24f-3f23e20d615c"
version = "1.4.1"

[[deps.Serialization]]
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"

[[deps.TableTraits]]
deps = ["IteratorInterfaceExtensions"]
git-tree-sha1 = "c06b2f539df1c6efa794486abfb6ed2022561a39"
uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c"
version = "1.0.1"

[[deps.Tables]]
deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "OrderedCollections", "TableTraits"]
git-tree-sha1 = "cb76cf677714c095e535e3501ac7954732aeea2d"
uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
version = "1.11.1"

[[deps.Test]]
deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[[deps.Unicode]]
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"

[[deps.libblastrampoline_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
version = "5.8.0+0"
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,15 @@ table_subset = ctable |> TableOperations.select(:C, :A) |> Tables.columntable
```
This "selects" the `C` and `A` columns from the original table, and re-orders them with `C` first. The column names can be provided as `String`s, `Symbol`s, or `Integer`s.

### `TableOperations.reject`
The `TableOperations.reject` function *drops* the specified subset of columns from a Tables.jl source, like:
```julia
ctable = (A=[1, missing, 3], B=[1.0, 2.0, 3.0], C=["hey", "there", "sailor"])

table_subset = ctable |> TableOperations.reject(:B) |> Tables.columntable
```
This "selects" all columns except for `B` from the original table in the order in which they originally appear. The column names can be provided as `String`s, `Symbol`s, or `Integer`s.

### `TableOperations.transform`
The `TableOperations.transform` function allows specifying a "transform" function per column that will be applied per element. This is handy
when a simple transformation is needed for a specific column (or columns). Note that this doesn't allow the creation of new columns,
Expand Down
16 changes: 16 additions & 0 deletions src/TableOperations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,22 @@ end
return SelectRow{typeof(row), names}(row), st
end

"""
TableOperations.reject(source, columns...) => TableOperations.Select
source |> TableOperations.reject(columns...) => TableOperations.Select

Create a lazy wrapper that satisfies the Tables.jl interface and drops the columns given by the columns arguments, which can be `String`s, `Symbol`s, or `Integer`s
"""
function reject end

reject(names::Symbol...) = x->reject(x, names...)
reject(names::String...) = x->reject(x, Base.map(Symbol, names)...)
reject(inds::Integer...) = x->reject(x, Base.map(Int, inds)...)

reject(x::T, names::Symbol...) where {T} = select(x, setdiff(Tables.columnnames(Tables.columns(x)), names)...)
reject(x::T, names::String...) where {T} = select(x, setdiff(Tables.columnnames(Tables.columns(x)), Base.map(Symbol, names))...)
reject(x::T, inds::Integer...) where {T} = select(x, setdiff(1:length(Tables.columnnames(Tables.columns(x))), Base.map(Int, inds))...)

# filter
struct Filter{F, T}
f::F
Expand Down
125 changes: 125 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,131 @@ table = ctable |> TableOperations.select(3, 1) |> Tables.rowtable

end

@testset "TableOperations.reject" begin
# Tests below closely mirror those from "TableOperations.select", where appropriate

# 20
x = ReallyWideTable()
sel = TableOperations.reject(x, :x1, :x2);
sch = Tables.schema(sel);
@test :x1 ∉ sch.names && :x2 ∉ sch.names
@test sch.types == ntuple(i -> Float64, 99998)
#tt = Tables.columntable(sel)
#@test tt.x1 isa Vector{Float64}

# 117
sel = TableOperations.reject(ctable, 1, 2, 3)
@test Tables.istable(typeof(sel))
@test Tables.schema(sel) == Tables.Schema((), ())
@test Tables.columnaccess(typeof(sel))
@test Tables.columns(sel) === sel
@test propertynames(sel) == ()
@test isequal(Tables.getcolumn(sel, 1), [1, missing, 3])
@test isequal(Tables.getcolumn(sel, :A), [1, missing, 3])
@test Tables.columntable(sel) == NamedTuple()
@test Tables.rowtable(sel) == NamedTuple{(), Tuple{}}[]
sel = ctable |> TableOperations.reject(:A)
@test Tables.istable(typeof(sel))
@test Tables.schema(sel) == Tables.Schema((:B, :C), (Float64, String))
@test Tables.columnaccess(typeof(sel))
@test Tables.columns(sel) === sel
@test propertynames(sel) == (:B, :C)

sel = ctable |> TableOperations.reject(1)
@test Tables.istable(typeof(sel))
@test Tables.schema(sel) == Tables.Schema((:B, :C), (Float64, String))
@test Tables.columnaccess(typeof(sel))
@test Tables.columns(sel) === sel
@test propertynames(sel) == (:B, :C)

sel = TableOperations.reject(rtable, 1, 2, 3)
@test Tables.rowaccess(typeof(sel))
@test Tables.rows(sel) === sel
@test Tables.schema(sel) == Tables.Schema((), ())
@test Base.IteratorSize(typeof(sel)) == Base.HasShape{1}()
@test length(sel) == 3
@test Base.IteratorEltype(typeof(sel)) == Base.HasEltype()
@test eltype(sel) == TableOperations.SelectRow{NamedTuple{(:A, :B, :C),Tuple{Union{Missing, Int},Float64,String}},()}
@test Tables.columntable(sel) == NamedTuple()
@test Tables.rowtable(sel) == [NamedTuple(), NamedTuple(), NamedTuple()]
srow = first(sel)
@test propertynames(srow) == ()

sel = rtable |> TableOperations.reject(:B, :C)
@test Tables.rowaccess(typeof(sel))
@test Tables.rows(sel) === sel
@test Tables.schema(sel) == Tables.Schema((:A,), (Union{Int, Missing},))
@test Base.IteratorSize(typeof(sel)) == Base.HasShape{1}()
@test length(sel) == 3
@test Base.IteratorEltype(typeof(sel)) == Base.HasEltype()
@test eltype(sel) == TableOperations.SelectRow{NamedTuple{(:A, :B, :C),Tuple{Union{Missing, Int},Float64,String}},(:A,)}
@test isequal(Tables.columntable(sel), (A = [1, missing, 3],))
@test isequal(Tables.rowtable(sel), [(A=1,), (A=missing,), (A=3,)])
srow = first(sel)
@test propertynames(srow) == (:A,)

# Testing issue where we always select the first column values, but using the correct name.
# NOTE: We don't use rtable here because mixed types produce TypeErrors which hide the
# underlying problem.
rtable2 = [(A = 1.0, B = 2.0), (A = 2.0, B = 4.0), (A = 3.0, B = 6.0)]
sel = rtable2 |> TableOperations.reject(:A, :C)
@test Tables.rowaccess(typeof(sel))
@test Tables.rows(sel) === sel
@test Tables.schema(sel) == Tables.Schema((:B,), (Float64,))
@test Base.IteratorSize(typeof(sel)) == Base.HasShape{1}()
@test length(sel) == 3
@test Base.IteratorEltype(typeof(sel)) == Base.HasEltype()
@test eltype(sel) == TableOperations.SelectRow{NamedTuple{(:A, :B,),Tuple{Float64,Float64}},(:B,)}
@test isequal(Tables.columntable(sel), (B = [2.0, 4.0, 6.0],))
@test isequal(Tables.rowtable(sel), [(B=2.0,), (B=4.0,), (B=6.0,)])
@test isequal(Tables.columntable(sel), (B = [2.0, 4.0, 6.0],))
@test isequal(Tables.rowtable(sel), [(B=2.0,), (B=4.0,), (B=6.0,)])
srow = first(sel)
@test propertynames(srow) == (:B,)
@test srow.B == 2.0 # What we expect

sel = rtable |> TableOperations.reject(2, 3)
@test Tables.rowaccess(typeof(sel))
@test Tables.rows(sel) === sel
@test Tables.schema(sel) == Tables.Schema((:A,), (Union{Int, Missing},))
@test Base.IteratorSize(typeof(sel)) == Base.HasShape{1}()
@test length(sel) == 3
@test Base.IteratorEltype(typeof(sel)) == Base.HasEltype()
@test eltype(sel) == TableOperations.SelectRow{NamedTuple{(:A, :B, :C),Tuple{Union{Missing, Int},Float64,String}},(1,)}
@test isequal(Tables.columntable(sel), (A = [1, missing, 3],))
@test isequal(Tables.rowtable(sel), [(A=1,), (A=missing,), (A=3,)])
srow = first(sel)
@test propertynames(srow) == (:A,)
@test Tables.getcolumn(srow, 1) == 1
@test Tables.getcolumn(srow, :A) == 1

table = ctable |> TableOperations.reject(:B, :C) |> Tables.columntable
@test length(table) == 1
@test isequal(table.A, [1, missing, 3])

table = ctable |> TableOperations.reject(2, 3) |> Tables.columntable
@test length(table) == 1
@test isequal(table.A, [1, missing, 3])

table = ctable |> TableOperations.reject("B", "C") |> Tables.columntable
@test length(table) == 1
@test isequal(table.A, [1, missing, 3])

# row sink
table = ctable |> TableOperations.reject(:B, :C) |> Tables.rowtable
@test length(table[1]) == 1
@test isequal(map(x->x.A, table), [1, missing, 3])

table = ctable |> TableOperations.reject(2, 3) |> Tables.rowtable
@test length(table[1]) == 1
@test isequal(map(x->x.A, table), [1, missing, 3])

table = ctable |> TableOperations.reject("B", "C") |> Tables.rowtable
@test length(table[1]) == 1
@test isequal(map(x->x.A, table), [1, missing, 3])

end

@testset "TableOperations.filter" begin

f = TableOperations.filter(x->x.B == 2.0, ctable)
Expand Down