BazerData.jl

Data manipulation utilities for Julia
Log | Files | Refs | README | LICENSE

TimeShift.jl (9687B)


      1 # --------------------------------------------------------------------------------------------------
      2 # most of this code was copied from @FuZhiyu PanelShift.jl package
      3 # --------------------------------------------------------------------------------------------------
      4 
      5 
      6 # --------------------------------------------------------------------------------------------------
      7 # Shared validation for tlag/tlead
      8 function _validate_tshift_args(x, t_vec; n=nothing, checksorted=true, verbose=false)
      9     if isnothing(n)
     10         n = oneunit(t_vec[1] - t_vec[1])
     11         verbose && ((t_vec[1] isa Date) ? (@info "Default date gap inferred ... $n") :
     12             (@info "Default gap inferred ... $n"))
     13     elseif eltype(t_vec) == Date
     14         verbose && @info "No checks on increment argument n for type Date ... "
     15     else
     16         !(n isa typeof(t_vec[1]-t_vec[1])) &&
     17             error("Time gap type does not match time variable: typeof(n)=$(typeof(n)) != eltype(vec)=$(eltype(t_vec))")
     18     end
     19 
     20     checksorted && !issorted(t_vec; lt = (<=)) && error("time vector not sorted (order is strict)!")
     21     !(n > zero(n)) && error("shift value has to be positive!")
     22 
     23     N = length(t_vec)
     24     (length(x) != N) && error("value and time vector have different lengths!")
     25 
     26     return n, N
     27 end
     28 
     29 
     30 # Linear scan on native types (integers — already fast)
     31 function _scan_lag!(x_shift, x, t_vec, n, N)
     32     j = 0
     33     @inbounds for i in 1:N
     34         target = t_vec[i] - n
     35         while j < N && t_vec[j + 1] <= target
     36             j += 1
     37         end
     38         if j > 0 && t_vec[j] == target
     39             x_shift[i] = x[j]
     40         end
     41     end
     42     return x_shift
     43 end
     44 
     45 function _scan_lead!(x_shift, x, t_vec, n, N)
     46     j = 0
     47     @inbounds for i in 1:N
     48         target = t_vec[i] + n
     49         if target > t_vec[N]
     50             break
     51         end
     52         while j < N && t_vec[j + 1] < target
     53             j += 1
     54         end
     55         if j + 1 <= N && t_vec[j + 1] == target
     56             x_shift[i] = x[j + 1]
     57         end
     58     end
     59     return x_shift
     60 end
     61 
     62 
     63 # Pre-computed Int64 scan for Date types.
     64 # Date arithmetic (especially Month/Year) is expensive; converting to Int64
     65 # first keeps the hot scan loop in pure integer comparisons.
     66 function _scan_lag_int64!(x_shift, x, t_vec, n, N)
     67     int_times   = Vector{Int64}(undef, N)
     68     int_targets = Vector{Int64}(undef, N)
     69     @inbounds for i in 1:N
     70         int_times[i]   = Dates.value(t_vec[i])
     71         int_targets[i] = Dates.value(t_vec[i] - n)
     72     end
     73     j = 0
     74     @inbounds for i in 1:N
     75         target = int_targets[i]
     76         while j < N && int_times[j + 1] <= target
     77             j += 1
     78         end
     79         if j > 0 && int_times[j] == target
     80             x_shift[i] = x[j]
     81         end
     82     end
     83     return x_shift
     84 end
     85 
     86 function _scan_lead_int64!(x_shift, x, t_vec, n, N)
     87     int_times   = Vector{Int64}(undef, N)
     88     int_targets = Vector{Int64}(undef, N)
     89     @inbounds for i in 1:N
     90         int_times[i]   = Dates.value(t_vec[i])
     91         int_targets[i] = Dates.value(t_vec[i] + n)
     92     end
     93     j = 0
     94     @inbounds for i in 1:N
     95         target = int_targets[i]
     96         if target > int_times[N]
     97             break
     98         end
     99         while j < N && int_times[j + 1] < target
    100             j += 1
    101         end
    102         if j + 1 <= N && int_times[j + 1] == target
    103             x_shift[i] = x[j + 1]
    104         end
    105     end
    106     return x_shift
    107 end
    108 # --------------------------------------------------------------------------------------------------
    109 
    110 
    111 # --------------------------------------------------------------------------------------------------
    112 """
    113     tlag(x, t_vec; n = nothing, checksorted = true, verbose = false)
    114 
    115 Create a lagged version of array `x` based on time vector `t_vec`, where each element is shifted
    116 backward in time by a specified amount `n`.
    117 
    118 # Arguments
    119 - `x`: Array of values to be lagged
    120 - `t_vec`: Vector of time points corresponding to each element in `x`
    121 
    122 # Keyword Arguments
    123 - `n`: Time gap for lagging. If `nothing` (default), uses the minimal unit difference between time points.
    124 - `checksorted`: If `true` (default), verifies that `t_vec` is sorted in ascending order
    125 - `verbose`: If `true`, prints informational messages about the process
    126 
    127 # Returns
    128 - An array of the same length as `x` where each element is the value of `x` from `n` time units ago,
    129   or `missing` if no corresponding past value exists
    130 
    131 # Notes
    132 - Time vectors must be strictly sorted (ascending order)
    133 - The time gap `n` must be positive
    134 - For `Date` types, no type checking is performed on `n`
    135 - Elements at the beginning will be `missing` if they don't have values from `n` time units ago
    136 - See PanelShift.jl for original implementation
    137 
    138 # Errors
    139 - If `t_vec` is not sorted and `checksorted=true`
    140 - If `n` is not positive
    141 - If `x` and `t_vec` have different lengths
    142 - If `n` has a type that doesn't match the difference type of `t_vec`
    143 
    144 # Examples
    145 ```jldoctest
    146 julia> tlag([1, 2, 3], [1, 2, 3], n = 1)
    147 3-element Vector{Union{Missing, Int64}}:
    148   missing
    149  1
    150  2
    151 ```
    152 
    153 """
    154 function tlag(x, t_vec;
    155     n = nothing,
    156     checksorted = true,
    157     verbose = false,
    158     )
    159 
    160     isempty(t_vec) && return Array{Union{Missing, eltype(x)}}(missing, 0)
    161 
    162     n, N = _validate_tshift_args(x, t_vec; n=n, checksorted=checksorted, verbose=verbose)
    163 
    164     x_shift = Array{Union{Missing, eltype(x)}}(missing, N)
    165 
    166     # Month/Year arithmetic is expensive; pre-compute Int64 targets for those.
    167     # Day and integer arithmetic is cheap; scan directly.
    168     if n isa Dates.OtherPeriod
    169         _scan_lag_int64!(x_shift, x, t_vec, n, N)
    170     else
    171         _scan_lag!(x_shift, x, t_vec, n, N)
    172     end
    173 
    174     return x_shift
    175 end
    176 # --------------------------------------------------------------------------------------------------
    177 
    178 
    179 # --------------------------------------------------------------------------------------------------
    180 """
    181     tlead(x, t_vec; n = nothing, checksorted = true, verbose = false)
    182 
    183 Create a leading version of array `x` based on time vector `t_vec`, where each element is shifted
    184 forward in time by a specified amount `n`.
    185 
    186 # Arguments
    187 - `x`: Array of values to be led
    188 - `t_vec`: Vector of time points corresponding to each element in `x`
    189 
    190 # Keyword Arguments
    191 - `n`: Time gap for leading. If `nothing` (default), uses the minimal unit difference between time points.
    192 - `checksorted`: If `true` (default), verifies that `t_vec` is sorted in ascending order
    193 - `verbose`: If `true`, prints informational messages about the process
    194 
    195 # Returns
    196 - An array of the same length as `x` where each element is the value of `x` from `n` time units in the future,
    197   or `missing` if no corresponding future value exists
    198 
    199 # Notes
    200 - Time vectors must be strictly sorted (ascending order)
    201 - The time gap `n` must be positive
    202 - For `Date` types, no type checking is performed on `n`
    203 - Elements at the end will be `missing` if they don't have values from `n` time units in the future
    204 - See PanelShift.jl for original implementation
    205 
    206 # Errors
    207 - If `t_vec` is not sorted and `checksorted=true`
    208 - If `n` is not positive
    209 - If `x` and `t_vec` have different lengths
    210 - If `n` has a type that doesn't match the difference type of `t_vec`
    211 
    212 # Examples
    213 ```jldoctest
    214 julia> tlead([1, 2, 3], [8, 9, 10], n = 1)
    215 3-element Vector{Union{Missing, Int64}}:
    216  2
    217  3
    218   missing
    219 ```
    220 
    221 """
    222 function tlead(x, t_vec;
    223     n = nothing,
    224     checksorted = true,
    225     verbose = false,
    226     )
    227 
    228     isempty(t_vec) && return Array{Union{Missing, eltype(x)}}(missing, 0)
    229 
    230     n, N = _validate_tshift_args(x, t_vec; n=n, checksorted=checksorted, verbose=verbose)
    231 
    232     x_shift = Array{Union{Missing, eltype(x)}}(missing, N)
    233 
    234     if n isa Dates.OtherPeriod
    235         _scan_lead_int64!(x_shift, x, t_vec, n, N)
    236     else
    237         _scan_lead!(x_shift, x, t_vec, n, N)
    238     end
    239 
    240     return x_shift
    241 end
    242 # --------------------------------------------------------------------------------------------------
    243 
    244 
    245 # --------------------------------------------------------------------------------------------------
    246 """
    247     tshift(x, t_vec; n = nothing, kwargs...)
    248 
    249 Create a shifted version of array `x` based on time vector `t_vec`, where each element is shifted
    250 by a specified amount `n`. Acts as a unified interface to `tlag` and `tlead`.
    251 
    252 # Arguments
    253 - `x`: Array of values to be shifted
    254 - `t_vec`: Vector of time points corresponding to each element in `x`
    255 
    256 # Keyword Arguments
    257 - `n`: Time gap for shifting. If positive, performs a lag operation (backward in time);
    258        if negative, performs a lead operation (forward in time).
    259        If `nothing` (default), defaults to a lag operation with minimal unit difference.
    260 - `kwargs...`: Additional keyword arguments passed to either `tlag` or `tlead`
    261 
    262 # Returns
    263 - An array of the same length as `x` where each element is the value of `x` shifted by `n` time units,
    264   or `missing` if no corresponding value exists at that time point
    265 
    266 # Notes
    267 - Positive `n` values call `tlag` (backward shift in time)
    268 - Negative `n` values call `tlead` (forward shift in time)
    269 - If `n` is not specified, issues a warning and defaults to a lag operation
    270 
    271 # Examples
    272 ```jldoctest
    273 julia> tshift([1, 2, 3], [-3, -2, -1], n = 1)
    274 3-element Vector{Union{Missing, Int64}}:
    275   missing
    276  1
    277  2
    278 
    279 julia> tshift([1, 2, 3], [-3, -2, -1], n = -1)
    280 3-element Vector{Union{Missing, Int64}}:
    281  2
    282  3
    283   missing
    284 
    285 ```
    286 
    287 See also: [`tlag`](@ref), [`tlead`](@ref)
    288 """
    289 function tshift(x, t_vec; n=nothing, kwargs...)
    290 
    291     if isnothing(n)
    292         @warn "shift not specified ... defaulting to lag"
    293         n = oneunit(t_vec[1] - t_vec[1])
    294     end
    295 
    296     if n > zero(n)
    297         return tlag(x, t_vec, n=n; kwargs...)
    298     else
    299         return tlead(x, t_vec, n=-n; kwargs...)
    300     end
    301 end
    302 # --------------------------------------------------------------------------------------------------