BazerData.jl

Data manipulation utilities for Julia
Log | Files | Refs | README | LICENSE

panel_fill.jl (5689B)


      1 @testset "panel_fill" begin
      2 
      3 # include("./src/PanelData.jl")
      4 
      5     df1 = DataFrame(        # missing t=2 for id=1
      6         id = [1,1,2,2,2],
      7         t  = [1,4,1,2,4],
      8         a  = [1,1,1,0,0])
      9 
     10     df2 = DataFrame(        # missing t=2 for id=1
     11         id = ["a","a","b","b","c","c","c"],
     12         t  = [1,4,8,9,1,2,4],
     13         v1  = [1,1,1,6,6,0,0],
     14         v2  = [1,2,3,6,6,4,5],
     15         v3  = [1,5,4,6,6,15,12.25])
     16 
     17     df3 = DataFrame(        # missing t=2 for id=1
     18         id = ["a","a", "b","b", "c","c","c", "d","d","d","d"],
     19         t  = [Date(1990, 1, 1), Date(1990, 4, 1), Date(1990, 8, 1), Date(1990, 9, 1),
     20               Date(1990, 1, 1), Date(1990, 2, 1), Date(1990, 4, 1),
     21               Date(1999, 11, 10), Date(1999, 12, 21), Date(2000, 2, 5), Date(2000, 4, 1)],
     22         v1 = [1,1, 1,6, 6,0,0, 1,4,11,13],
     23         v2 = [1,2,3,6,6,4,5, 1,2,3,4],
     24         v3 = [1,5,4,6,6,15,12.25, 21,22.5,17.2,1])
     25 
     26     # --- test for df1
     27     @testset "DF1" begin
     28         df1_test = panel_fill(df1, :id, :t, :a,
     29             gap=1, method=:backwards, uniquecheck=true, flag=true)
     30         @test isequal(
     31             select(subset(df1_test, :flag => ByRow(==(:backwards))), :a),
     32             DataFrame(a = [1.0, 1.0, 0.0]))
     33         # TODO clean up this t est
     34         df1_test = panel_fill(df1, :id, :t, :a,
     35             gap=1, method=:backwards, uniquecheck=true, flag=true)
     36         @test isequal(nrow(df1_test), 8)
     37     end
     38 
     39     # --- test  for df2 multiple variables
     40     @testset "DF2" begin
     41         df2_test = panel_fill(df2, :id, :t, [:v1, :v2, :v3],
     42             gap=1, method=:backwards, uniquecheck=true, flag=true)
     43         @test isequal(
     44             select(subset(df2_test, :flag => ByRow(==(:backwards))), r"v"),
     45             DataFrame(v1 = [1.0, 1.0, 0.0], v2 = [1.0, 1.0, 4.0], v3 = [1.0, 1.0, 15.0]))
     46 
     47         df2_test = panel_fill(df2, :id, :t, :v1,
     48             gap=1, method=:backwards, uniquecheck=true, flag=true)
     49         @test isequal((nrow(df2_test), nrow(filter(:v2 => !ismissing, df2_test))),
     50                     (10, 7))
     51     end
     52 
     53 
     54     # --- test for df3 multiple variables and dates
     55     @testset "DF3" begin
     56         # test with dates backwards
     57         df3_test = panel_fill(df3, :id, :t, [:v1, :v2, :v3],
     58             gap=Month(1), method=:backwards, uniquecheck=true, flag=true)
     59         @test isequal(
     60             select(subset(df3_test, :flag => ByRow(==(:backwards))), r"v"),
     61             DataFrame(v1 = [1.0, 1.0, 0.0, 4.0, 11.0],
     62                       v2 = [1.0, 1.0, 4.0, 2.0, 3.0],
     63                       v3 = [1.0, 1.0, 15.0, 22.5, 17.2]))
     64 
     65         # test in place with dates forwards and only fill some variables and not others
     66         df3_test = copy(df3)
     67         panel_fill!(df3_test, :id, :t, [:v2],
     68             gap=Month(1), method=:forwards, uniquecheck=true, flag=true)
     69         @test isequal(
     70             select(subset(df3_test, :flag => ByRow(==(:forwards)), skipmissing=true), :v1, :v2),
     71             DataFrame(v1 = repeat([missing], inner=5), v2 = [2.0, 2.0, 5.0, 3.0, 4.0]))
     72 
     73         # linear interpolation
     74         df3_test = panel_fill(df3, :id, :t, [:v1, :v2, :v3],
     75             gap=Month(1), method=:linear, uniquecheck=true, flag=true)
     76         @test isapprox(
     77             select(subset(df3_test, :flag => ByRow(==(:linear)), skipmissing=true), r"v") ,
     78             DataFrame(
     79                 v1 = [1.0, 1.0, 0.0, 7.5 , 12.0],
     80                 v2 = [1.333, 1.666, 4.5, 2.5, 3.5],
     81                 v3 = [2.3333, 3.666, 13.625, 19.85, 9.1]),
     82             atol = 0.01)
     83 
     84         # nearest
     85         df3_test = panel_fill(df3, :id, :t, :v1,
     86             gap=Month(1), method=:nearest, uniquecheck=true, flag=true)
     87         @test isequal(
     88             select(subset(df3_test, :flag => ByRow(==(:nearest)), skipmissing=true), :v1),
     89             DataFrame(v1 = [1.0, 1.0, 0.0, 11.0, 13.0]))
     90 
     91         # -- different time periods
     92         df3_test = panel_fill(df3, :id, :t, [:v1, :v2, :v3],
     93             gap=Day(10), method=:forwards, uniquecheck=true, flag=true)
     94         @test isequal(nrow(df3_test) , 39)
     95 
     96     end
     97 
     98 end
     99 
    100 
    101 @testset "panel_fill - flag=false" begin
    102     df = DataFrame(id = [1, 1, 2, 2], t = [1, 3, 1, 4], v = [10, 20, 30, 40])
    103     result = panel_fill(df, :id, :t, :v, gap=1, method=:backwards, flag=false)
    104     @test !(:flag in names(result))
    105     @test nrow(result) > nrow(df)  # should have filled rows
    106 end
    107 
    108 
    109 @testset "panel_fill - invalid method" begin
    110     df = DataFrame(id = [1, 1], t = [1, 3], v = [10, 20])
    111     @test_throws Exception panel_fill(df, :id, :t, :v, gap=1, method=:invalid_method)
    112 end
    113 
    114 
    115 @testset "panel_fill - type mismatch" begin
    116     # DatePeriod gap with integer time variable
    117     df = DataFrame(id = [1, 1], t = [1, 3], v = [10, 20])
    118     @test_throws Exception panel_fill(df, :id, :t, :v, gap=Month(1))
    119 end
    120 
    121 
    122 @testset "panel_fill - non-unique warning" begin
    123     df = DataFrame(id = [1, 1, 1], t = [1, 2, 3], v = [10, 20, 30])
    124     # non-unique: add a duplicate
    125     df_dup = vcat(df, DataFrame(id = [1], t = [2], v = [99]))
    126     # should warn about non-unique observations
    127     @test_logs (:warn, r"non unique"i) begin
    128         try
    129             panel_fill(df_dup, :id, :t, :v,
    130                 gap=1, method=:backwards, uniquecheck=true, flag=true)
    131         catch
    132             # the function may error after warning due to duplicate handling;
    133             # we just verify the warning is emitted
    134         end
    135     end
    136 end
    137 
    138 
    139 @testset "panel_fill - no gaps to fill" begin
    140     # consecutive time values, nothing to interpolate
    141     df = DataFrame(id = [1, 1, 1], t = [1, 2, 3], v = [10, 20, 30])
    142     result = panel_fill(df, :id, :t, :v, gap=1, method=:backwards, flag=true)
    143     @test nrow(result) == 3  # no new rows added
    144     @test all(result.flag .== :original)
    145 end