Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
30a6661
Add StdNames
May 5, 2022
d322959
Revised StdNames transform
May 6, 2022
488eeac
Revised StdNames transform and tests
May 7, 2022
84d065a
Update src/transforms/stdnames.jl
ceferisbarov May 7, 2022
b9f6150
Revised StdNames transform and tests
May 7, 2022
b401dab
Added tests for StdNames
May 7, 2022
9abca77
Update test/transforms.jl
ceferisbarov May 9, 2022
4b13047
Revised StdNames transform and tests
May 9, 2022
ace0c43
Update src/transforms/stdnames.jl
ceferisbarov May 9, 2022
ff0c538
Revised StdNames
May 9, 2022
a81bcd5
Update Project.toml
juliohm May 5, 2022
9bedf6d
Update Project.toml
juliohm May 5, 2022
aff91c1
Revised Rename transform to accept Dictionary of strings
May 10, 2022
155bf4f
Replaced rewrote _symboldict function in one line
May 10, 2022
d1564ab
Rewrote _symboldict function and added tests
May 10, 2022
6890774
Revised StdNames transform
May 10, 2022
21fef61
Merge branch 'JuliaML:master' into stdnames
ceferisbarov May 10, 2022
96f4c5b
Update src/transforms/stdnames.jl
ceferisbarov May 10, 2022
8e8abde
Update src/transforms/stdnames.jl
ceferisbarov May 10, 2022
582e1cb
Update src/transforms/stdnames.jl
ceferisbarov May 10, 2022
f812f71
Update test/transforms.jl
ceferisbarov May 10, 2022
983a49b
Revised StdNames
May 10, 2022
8e0f593
Added StdNames to docs
May 10, 2022
b060bce
Revised StdNames docstring and added tests
May 11, 2022
7e105b2
Updated _camel function
May 11, 2022
de48b72
Update src/transforms/stdnames.jl
ceferisbarov May 11, 2022
76b500d
Added preprocessing step to StdNames
May 12, 2022
19c2e88
Update src/transforms/stdnames.jl
ceferisbarov May 16, 2022
3251c75
Update src/transforms/stdnames.jl
ceferisbarov May 16, 2022
1a5dcc0
Update src/transforms/stdnames.jl
ceferisbarov May 16, 2022
f42062c
Refactored StdNames and its tests
May 16, 2022
e5b085a
Update src/transforms/stdnames.jl
juliohm May 17, 2022
50f3bf4
Update src/transforms/stdnames.jl
juliohm May 17, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/src/transforms/builtin.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ DropMissing
Rename
```

## StdNames

```@docs
StdNames
```

## Replace

```@docs
Expand Down
1 change: 1 addition & 0 deletions src/TableTransforms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ export
Filter,
DropMissing,
Rename,
StdNames,
Replace,
Coalesce,
Coerce,
Expand Down
1 change: 1 addition & 0 deletions src/transforms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ end
include("transforms/select.jl")
include("transforms/filter.jl")
include("transforms/rename.jl")
include("transforms/stdnames.jl")
include("transforms/replace.jl")
include("transforms/coalesce.jl")
include("transforms/coerce.jl")
Expand Down
77 changes: 77 additions & 0 deletions src/transforms/stdnames.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# ------------------------------------------------------------------
# Licensed under the MIT License. See LICENSE in the project root.
# ------------------------------------------------------------------

"""
StdNames(spec)

Standardizes column names according to given `spec`.
Default to `:upper` case specification.

# Specs

* `:upper` - Uppercase, e.g. COLUMNNAME
* `:camel` - Camelcase, e.g. ColumnName
* `:snake` - Snakecase, e.g. column_name
"""
struct StdNames <: Stateless
spec::Symbol
end

StdNames() = StdNames(:upper)

isrevertible(::Type{StdNames}) = true

function apply(transform::StdNames, table)
# retrieve spec
spec = transform.spec

# retrieve column names
cols = Tables.columns(table)
oldnames = string.(Tables.columnnames(cols))

# clean column names
cleaned = _clean.(oldnames)

# apply spec
spec == :camel && (names = _camel.(cleaned))
spec == :snake && (names = _snake.(cleaned))
spec == :upper && (names = _upper.(cleaned))

# make names unique
newnames = _unique(names)

# rename transform
rtrans = Rename(Dict(oldnames .=> newnames))
newtable, rcache = apply(rtrans, table)

newtable, (rtrans, rcache)
end

function revert(::StdNames, newtable, cache)
rtrans, rcache = cache
revert(rtrans, newtable, rcache)
end

const delim = [' ', '\t', '-', '_']

_clean(name) = filter(c -> isdigit(c) || isletter(c) || c ∈ delim, name)

function _unique(names)
newnames = String[]
for name in names
n = name
while n ∈ newnames
n = string(n, "_")
end
push!(newnames, n)
end

newnames
end

_camel(name) = join(uppercasefirst.(split(name, delim)))

_snake(name) = join(lowercase.(split(strip(name, delim), delim)), '_')

_upper(name) = replace(uppercase(name), delim => "")
71 changes: 71 additions & 0 deletions test/transforms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,77 @@
@test n1 == n2
end

@testset "StdNames" begin
names = ["apple banana", "apple\tbanana", "apple_banana", "apple-banana", "apple_Banana"]
for name in names
@test TableTransforms._camel(name) == "AppleBanana"
@test TableTransforms._snake(name) == "apple_banana"
@test TableTransforms._upper(name) == "APPLEBANANA"
end

names = ["a", "A", "_a", "_A", "a ", "A "]
for name in names
@test TableTransforms._camel(name) == "A"
@test TableTransforms._snake(name) == "a"
@test TableTransforms._upper(name) == "A"
end

# special characters
name = "a&B"
@test TableTransforms._clean(name) == "aB"

name = "apple#"
@test TableTransforms._clean(name) == "apple"

name = "apple-tree"
@test TableTransforms._clean(name) == "apple-tree"

# invariance test
names = ["AppleTree", "BananaFruit", "PearSeed"]
for name in names
@test TableTransforms._camel(name) == name
end

names = ["apple_tree", "banana_fruit", "pear_seed"]
for name in names
@test TableTransforms._snake(name) == name
end

names = ["APPLETREE", "BANANAFRUIT", "PEARSEED"]
for name in names
@test TableTransforms._upper(name) == name
end

# uniqueness test
names = (Symbol("AppleTree"), Symbol("apple tree"), Symbol("apple_tree"))
cols = ([1,2,3], [4,5,6], [7,8,9])
t = Table(; zip(names, cols)...)
rt = Tables.rowtable(t)
T = StdNames(:upper)
n, c = apply(T, rt)
columns = Tables.columns(n)
columnnames = Tables.columnnames(columns)
@test columnnames == (:APPLETREE, :APPLETREE_, :APPLETREE__)

# row table test
names = (:a, Symbol("apple tree"), Symbol("banana tree"))
cols = ([1,2,3], [4,5,6], [7,8,9])
t = Table(; zip(names, cols)...)
rt = Tables.rowtable(t)
T = StdNames()
n, c = apply(T, rt)
@test Tables.isrowtable(n)
@test isrevertible(T)
rtₒ = revert(T, n, c)
@test rt == rtₒ

# reapply test
T = StdNames()
n1, c1 = apply(T, rt)
n2 = reapply(T, n1, c1)
@test n1 == n2
end

@testset "Replace" begin
a = [3, 2, 1, 4, 5, 3]
b = [2, 4, 4, 5, 8, 5]
Expand Down