PanelData.jl (5059B)
1 # ------------------------------------------------------------------------------------------ 2 """ 3 panel_fill!( 4 df::DataFrame, 5 id_var::Symbol, 6 time_var::Symbol, 7 value_var::Union{Symbol, Vector{Symbol}}; 8 gap::Union{Int, DatePeriod} = 1, 9 method::Symbol = :backwards, 10 uniquecheck::Bool = true, 11 flag::Bool = false 12 ) 13 14 # Arguments 15 - `df::AbstractDataFrame`: a panel dataset 16 - `id_var::Symbol`: the individual index dimension of the panel 17 - `time_var::Symbol`: the time index dimension of the panel (must be integer or a date) 18 - `value_var::Union{Symbol, Vector{Symbol}}`: the set of columns we would like to fill 19 20 # Keywords 21 - `gap::Union{Int, DatePeriod} = 1` : the interval size for which we want to fill data 22 - `method::Symbol = :backwards`: the interpolation method to fill the data 23 options are: `:backwards` (default), `:forwards`, `:linear`, `:nearest` 24 - `uniquecheck::Bool = true`: check if panel is clean 25 - `flag::Bool = false`: flag the interpolated values 26 27 # Returns 28 - `DataFrame`: the input DataFrame with interpolated rows appended 29 30 # Examples 31 - See tests 32 """ 33 function panel_fill!( 34 df::DataFrame, 35 id_var::Symbol, time_var::Symbol, value_var::Union{Symbol, Vector{Symbol}}; 36 gap::Union{Int, DatePeriod} = 1, 37 method::Symbol = :backwards, 38 uniquecheck::Bool = true, 39 flag::Bool = false, 40 ) 41 42 # prepare the data 43 sort!(df, [id_var, time_var]) 44 if value_var isa Symbol 45 value_var = [value_var] 46 end 47 if uniquecheck # check for unicity 48 any(nonunique(df, [id_var, time_var])) && 49 (@warn "Some non unique observations in dataset") 50 end 51 52 time_var_r = join([string(time_var), "rounded"], "_") # clean up if dates 53 if gap isa DatePeriod 54 if !(eltype(df[!, time_var]) <: Dates.AbstractTime) 55 error( 56 """ 57 Type of gap $(typeof(gap)) and type of time variable $(eltype(df[!, time_var])) do not match 58 """ 59 ) 60 else 61 df[!, time_var_r] .= floor.(df[!, time_var], gap) 62 if !isequal(df[!, time_var_r], df[!, time_var]) 63 @warn "Using rounded time variables for consistency with gap: $gap" 64 end 65 end 66 else 67 df[!, time_var_r] .= df[!, time_var] 68 end 69 70 if method == :backwards 71 interpolate_method = BSpline(Constant(Previous)) 72 elseif method == :forwards 73 interpolate_method = BSpline(Constant(Next)) 74 elseif method == :nearest 75 interpolate_method = BSpline(Constant()) 76 elseif method == :linear 77 interpolate_method = BSpline(Linear()) 78 else 79 error( 80 """ 81 Method $method not available. 82 Please choose from :backwards (default), :forwards, :nearest, :linear 83 """ 84 ) 85 end 86 87 gdf = groupby(df, [id_var]) 88 fill_chunks = DataFrame[] 89 90 for id_gdf in eachindex(gdf) 91 subdf = gdf[id_gdf] 92 93 if nrow(subdf) > 1 94 sort!(subdf, time_var_r) 95 rowdf_init = subdf[1, :] 96 for rowdf in eachrow(subdf)[2:end] 97 98 old_t = rowdf_init[time_var_r] 99 enum_t = rowdf[time_var_r] 100 101 t_fill = collect(range(old_t, enum_t, step=sign(enum_t-old_t) * gap))[2:end-1] 102 group_fill = DataFrame( 103 Dict(Symbol(time_var_r) => t_fill, id_var => id_gdf[1])) 104 N_fill = nrow(group_fill) 105 scale_xs = range(1, 2, N_fill+2)[2:end-1] 106 107 interp_dict = Dict( 108 v => interpolate([rowdf_init[v], rowdf[v]], interpolate_method) 109 for v in value_var) 110 var_fill = DataFrame( 111 Dict(v => interp_dict[v].(scale_xs) for v in value_var)) 112 113 push!(fill_chunks, hcat(group_fill, var_fill)) 114 rowdf_init = rowdf 115 end 116 end 117 end 118 119 df_fill = isempty(fill_chunks) ? DataFrame() : vcat(fill_chunks...) 120 121 # clean up the output 122 if flag 123 df_fill[!, :flag] .= method 124 end 125 if isequal(df[!, time_var_r], df[!, time_var]) 126 rename!(df_fill, time_var_r => time_var) 127 select!(df, Not(time_var_r)) 128 else # if they are not all the same we are going to fill 129 transform!(df_fill, time_var_r => time_var) 130 end 131 132 if flag 133 df[!, :flag] .= :original 134 end 135 136 append!(df, df_fill, cols=:union) 137 sort!(df, [id_var, time_var]) 138 139 return df 140 141 end 142 143 144 """ 145 panel_fill(...) 146 147 Same as panel_fill but without modification in place in place 148 """ 149 function panel_fill( 150 df::DataFrame, 151 id_var::Symbol, time_var::Symbol, value_var::Union{Symbol, Vector{Symbol}}; 152 gap::Union{Int, DatePeriod} = 1, 153 method::Symbol = :backwards, 154 uniquecheck::Bool = true, 155 flag::Bool = false 156 ) 157 158 df_res = deepcopy(df) 159 160 panel_fill!(df_res, id_var, time_var, value_var, 161 gap = gap, method = method, uniquecheck = uniquecheck, flag = flag) 162 163 return df_res 164 165 end 166