BazerData.jl

Data manipulation utilities for Julia
Log | Files | Refs | README | LICENSE

commit 64fb0a0d9ea414724e74cbc6cd5a46ea25251250
parent 18a4e925ce2cdf47027206aa5821f339ab1aa4f0
Author: Erik Loualiche <eloualiche@users.noreply.github.com>
Date:   Wed, 21 May 2025 13:38:39 -0500

Merge pull request #1 from eloualiche/feature/paneldata

Feature/paneldata
Diffstat:
MProject.toml | 2+-
Msrc/PanelData.jl | 36+++++++++++++++++-------------------
Mtest/UnitTests/panel_fill.jl | 43++++++++++++++++++++++++++-----------------
Mtest/runtests.jl | 5+++--
4 files changed, 47 insertions(+), 39 deletions(-)

diff --git a/Project.toml b/Project.toml @@ -1,7 +1,7 @@ name = "BazerData" uuid = "9777a11d-2328-4b97-9b51-b265bb408da6" authors = ["Erik Loualiche"] -version = "0.7.2" +version = "0.7.3" [deps] ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4" diff --git a/src/PanelData.jl b/src/PanelData.jl @@ -1,6 +1,6 @@ # ------------------------------------------------------------------------------------------ """ - panel_fill( + panel_fill!( df::DataFrame, id_var::Symbol, time_var::Symbol, @@ -25,7 +25,6 @@ email me for other interpolations (anything from Interpolations.jl is possible) - `uniquecheck::Bool = true`: check if panel is clean - `flag::Bool = false`: flag the interpolated values -- `merge::Bool = false`: merge the new values with the input dataset # Returns - `AbstractDataFrame`: @@ -33,17 +32,15 @@ # Examples - See tests """ -function panel_fill( +function panel_fill!( df::DataFrame, id_var::Symbol, time_var::Symbol, value_var::Union{Symbol, Vector{Symbol}}; gap::Union{Int, DatePeriod} = 1, method::Symbol = :backwards, uniquecheck::Bool = true, flag::Bool = false, - merge::Bool = false ) - # prepare the data sort!(df, [id_var, time_var]) if isa(value_var, Symbol) @@ -136,24 +133,24 @@ function panel_fill( transform!(df_fill, time_var_r => time_var) end - if merge - if flag - df[!, :flag] .= :original - end - return sort(vcat(df, df_fill, cols=:union), [id_var, time_var]) - else - return df_fill + if flag + df[!, :flag] .= :original end + append!(df, df_fill, cols=:union) + sort!(df, [id_var, time_var]) + + return df + end """ - panel_fill!(...) + panel_fill(...) - Same as panel_fill but with modification in place + Same as panel_fill but without modification in place in place """ -function panel_fill!( +function panel_fill( df::DataFrame, id_var::Symbol, time_var::Symbol, value_var::Union{Symbol, Vector{Symbol}}; gap::Union{Int, DatePeriod} = 1, @@ -162,12 +159,13 @@ function panel_fill!( flag::Bool = false ) - df_fill = panel_fill(df, id_var, time_var, value_var, + df_res = copy(df) + + panel_fill!(df_res, id_var, time_var, value_var, gap = gap, method = method, uniquecheck = uniquecheck, flag = flag) - append!(df, df_fill, cols=:union) - sort!(df, [id_var, time_var]) + + return df_res - return nothing end diff --git a/test/UnitTests/panel_fill.jl b/test/UnitTests/panel_fill.jl @@ -27,11 +27,12 @@ @testset "DF1" begin df1_test = panel_fill(df1, :id, :t, :a, gap=1, method=:backwards, uniquecheck=true, flag=true) - @test isequal(select(df1_test, :a), - DataFrame(a = [0.0, 1.0, 1.0])) + @test isequal( + select(subset(df1_test, :flag => ByRow(==(:backwards))), :a), + DataFrame(a = [1.0, 1.0, 0.0])) # TODO clean up this t est df1_test = panel_fill(df1, :id, :t, :a, - gap=1, method=:backwards, uniquecheck=true, flag=true, merge=true) + gap=1, method=:backwards, uniquecheck=true, flag=true) @test isequal(nrow(df1_test), 8) end @@ -39,11 +40,12 @@ @testset "DF2" begin df2_test = panel_fill(df2, :id, :t, [:v1, :v2, :v3], gap=1, method=:backwards, uniquecheck=true, flag=true) - @test isequal(select(df2_test, r"v"), - DataFrame(v1 = [0.0, 1.0, 1.0], v2 = [4.0, 1.0, 1.], v3 = [15.0, 1.0, 1.0])) + @test isequal( + select(subset(df2_test, :flag => ByRow(==(:backwards))), r"v"), + DataFrame(v1 = [1.0, 1.0, 0.0], v2 = [1.0, 1.0, 4.0], v3 = [1.0, 1.0, 15.0])) df2_test = panel_fill(df2, :id, :t, :v1, - gap=1, method=:backwards, uniquecheck=true, flag=true, merge=true) + gap=1, method=:backwards, uniquecheck=true, flag=true) @test isequal((nrow(df2_test), nrow(filter(:v2 => !ismissing, df2_test))), (10, 7)) end @@ -54,9 +56,11 @@ # test with dates backwards df3_test = panel_fill(df3, :id, :t, [:v1, :v2, :v3], gap=Month(1), method=:backwards, uniquecheck=true, flag=true) - @test isequal(select(df3_test, :v1, :v2, :v3), - DataFrame(v1 = [4.0, 11.0, 0.0, 1.0, 1.0], v2 = [2.0, 3.0, 4.0, 1.0, 1.0], - v3 = [22.5, 17.2, 15.0, 1.0, 1.0])) + @test isequal( + select(subset(df3_test, :flag => ByRow(==(:backwards))), r"v"), + DataFrame(v1 = [1.0, 1.0, 0.0, 4.0, 11.0], + v2 = [1.0, 1.0, 4.0, 2.0, 3.0], + v3 = [1.0, 1.0, 15.0, 22.5, 17.2])) # test in place with dates forwards and only fill some variables and not others df3_test = copy(df3) @@ -68,16 +72,21 @@ # linear interpolation df3_test = panel_fill(df3, :id, :t, [:v1, :v2, :v3], - gap=Month(1), method=:linear, uniquecheck=true, flag=true, merge=false) - @test isapprox(select(df3_test, r"v"), - DataFrame(v1 = [7.5 , 12.0, 0.0, 1.0, 1.0], v2 = [2.5, 3.5, 4.5, 1.333, 1.666], - v3 = [19.85, 9.1, 13.625, 2.3333, 3.666]), - atol = 0.01) + gap=Month(1), method=:linear, uniquecheck=true, flag=true) + @test isapprox( + select(subset(df3_test, :flag => ByRow(==(:linear)), skipmissing=true), r"v") , + DataFrame( + v1 = [1.0, 1.0, 0.0, 7.5 , 12.0], + v2 = [1.333, 1.666, 4.5, 2.5, 3.5], + v3 = [2.3333, 3.666, 13.625, 19.85, 9.1]), + atol = 0.01) # nearest df3_test = panel_fill(df3, :id, :t, :v1, - gap=Month(1), method=:nearest, uniquecheck=true, flag=true, merge=false) - @test isequal(select(df3_test, :v1), DataFrame(v1 = [11.0, 13.0, 0.0, 1.0, 1.0])) + gap=Month(1), method=:nearest, uniquecheck=true, flag=true) + @test isequal( + select(subset(df3_test, :flag => ByRow(==(:nearest)), skipmissing=true), :v1), + DataFrame(v1 = [1.0, 1.0, 0.0, 11.0, 13.0])) # TODO clean up these tests @@ -86,7 +95,7 @@ # panel_fill(df3, :id, :t, [:v1, :v2, :v3], # gap=Month(2), method=:backwards, uniquecheck=true, flag=true, merge=true) df3_test = panel_fill(df3, :id, :t, [:v1, :v2, :v3], - gap=Day(10), method=:forwards, uniquecheck=true, flag=true, merge=true) + gap=Day(10), method=:forwards, uniquecheck=true, flag=true) @test isequal(nrow(df3_test) , 39) end diff --git a/test/runtests.jl b/test/runtests.jl @@ -10,8 +10,9 @@ import StatsBase: quantile, Weights, sample using StreamToString const testsuite = [ - "tabulate", "xtile", "winsorize", "panel_fill", - "timeshift" + "tabulate", + "xtile", "winsorize", + "panel_fill", "timeshift" ] ENV["DATADEPS_ALWAYS_ACCEPT"] = true # for data loading of PalmerPenguins