BazerData.jl

Data manipulation utilities for Julia
Log | Files | Refs | README | LICENSE

commit 9c04b8462a29fbeeebdfd37c4454af5d8b096b8e
parent 84aceb67238c0e2626817d160d3d41a8e25c09cd
Author: Erik Loualiche <eloualic@umn.edu>
Date:   Tue, 20 May 2025 18:16:44 -0500

docstrings

Diffstat:
Mdocs/src/man/winsorize_guide.md | 9+++++++--
Msrc/TimeShift.jl | 113++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 119 insertions(+), 3 deletions(-)

diff --git a/docs/src/man/winsorize_guide.md b/docs/src/man/winsorize_guide.md @@ -122,7 +122,10 @@ Winsorize multiple variables var_to_winsorize = ["bill_length_mm", "bill_depth_mm", "flipper_length_mm"] transform!(df, var_to_winsorize .=> (x -> winsorize(x, probs=(0.1, 0.9)) ) .=> var_to_winsorize .* "_w") -show(IOContext(stdout, :limit => true, :displaysize => (20, 100)), df, allcols=true, allrows=false) +show(IOContext(stdout, :limit => true, :displaysize => (20, 100)), + select(df, :species, :island, :bill_length_mm, :bill_length_mm_w, + :bill_depth_mm, :bill_depth_mm_w, :flipper_length_mm, :flipper_length_mm_w), + allcols=true, allrows=false) nothing; # hide ``` @@ -131,7 +134,9 @@ Winsorize on one side only # left-winsorizing only, at 1th percentile; # cap noi gstats winsor wage, cuts(1 100); gstats winsor wage, cuts(1 100) s(_w2) transform!(df, :body_mass_g => (x -> winsorize(x, probs=(0.1, 1)) ) => :body_mass_g_w ) -show(IOContext(stdout, :limit => true, :displaysize => (20, 100)), df, allcols=true, allrows=false) +show(IOContext(stdout, :limit => true, :displaysize => (20, 100)), + select(df, :species, :island, :body_mass_g, :body_mass_g_w), + allcols=true, allrows=false) nothing; # hide ``` diff --git a/src/TimeShift.jl b/src/TimeShift.jl @@ -4,6 +4,45 @@ # -------------------------------------------------------------------------------------------------- +""" + tlag(x, t_vec; n = nothing, checksorted = true, verbose = false) + +Create a lagged version of array `x` based on time vector `t_vec`, where each element is shifted +backward in time by a specified amount `n`. + +# Arguments +- `x`: Array of values to be lagged +- `t_vec`: Vector of time points corresponding to each element in `x` + +# Keyword Arguments +- `n`: Time gap for lagging. If `nothing` (default), uses the minimal unit difference between time points. +- `checksorted`: If `true` (default), verifies that `t_vec` is sorted in ascending order +- `verbose`: If `true`, prints informational messages about the process + +# Returns +- An array of the same length as `x` where each element is the value of `x` from `n` time units ago, + or `missing` if no corresponding past value exists + +# Notes +- Time vectors must be strictly sorted (ascending order) +- The time gap `n` must be positive +- Uses linear scan to match time points +- For `Date` types, no type checking is performed on `n` +- Elements at the beginning will be `missing` if they don't have values from `n` time units ago +- See PanelShift.jl for original implementation + +# Errors +- If `t_vec` is not sorted and `checksorted=true` +- If `n` is not positive +- If `x` and `t_vec` have different lengths +- If `n` has a type that doesn't match the difference type of `t_vec` + +# Examples +```julia +x = [1, 2, 3, 4, 5] +t = [Date(2023,1,1), Date(2023,1,2), Date(2023,1,3), Date(2023,1,4), Date(2023,1,5)] +tlag(x, t, n = Day(1)) # Returns: [missing, 1, 2, 3, 4] +""" function tlag(x, t_vec; n = nothing, checksorted = true, @@ -62,7 +101,45 @@ end # -------------------------------------------------------------------------------------------------- -# most of this code was inspired by @FuZhiyu PanelShift.jl package +""" + tlead(x, t_vec; n = nothing, checksorted = true, verbose = false) + +Create a leading version of array `x` based on time vector `t_vec`, where each element is shifted +forward in time by a specified amount `n`. + +# Arguments +- `x`: Array of values to be led +- `t_vec`: Vector of time points corresponding to each element in `x` + +# Keyword Arguments +- `n`: Time gap for leading. If `nothing` (default), uses the minimal unit difference between time points. +- `checksorted`: If `true` (default), verifies that `t_vec` is sorted in ascending order +- `verbose`: If `true`, prints informational messages about the process + +# Returns +- An array of the same length as `x` where each element is the value of `x` from `n` time units in the future, + or `missing` if no corresponding future value exists + +# Notes +- Time vectors must be strictly sorted (ascending order) +- The time gap `n` must be positive +- Uses linear scan to match time points +- For `Date` types, no type checking is performed on `n` +- Elements at the end will be `missing` if they don't have values from `n` time units in the future +- See PanelShift.jl for original implementation + +# Errors +- If `t_vec` is not sorted and `checksorted=true` +- If `n` is not positive +- If `x` and `t_vec` have different lengths +- If `n` has a type that doesn't match the difference type of `t_vec` + +# Examples +```julia +x = [1, 2, 3, 4, 5] +t = [Date(2023,1,1), Date(2023,1,2), Date(2023,1,3), Date(2023,1,4), Date(2023,1,5)] +tlead(x, t, n = Day(1)) # Returns: [2, 3, 4, 5, missing] +""" function tlead(x, t_vec; n = nothing, checksorted = true, @@ -119,6 +196,40 @@ end # -------------------------------------------------------------------------------------------------- +""" + tshift(x, t_vec; n = nothing, kwargs...) + +Create a shifted version of array `x` based on time vector `t_vec`, where each element is shifted +by a specified amount `n`. Acts as a unified interface to `tlag` and `tlead`. + +# Arguments +- `x`: Array of values to be shifted +- `t_vec`: Vector of time points corresponding to each element in `x` + +# Keyword Arguments +- `n`: Time gap for shifting. If positive, performs a lag operation (backward in time); + if negative, performs a lead operation (forward in time). + If `nothing` (default), defaults to a lag operation with minimal unit difference. +- `kwargs...`: Additional keyword arguments passed to either `tlag` or `tlead` + +# Returns +- An array of the same length as `x` where each element is the value of `x` shifted by `n` time units, + or `missing` if no corresponding value exists at that time point + +# Notes +- Positive `n` values call `tlag` (backward shift in time) +- Negative `n` values call `tlead` (forward shift in time) +- If `n` is not specified, issues a warning and defaults to a lag operation + +# Examples +```julia +x = [1, 2, 3, 4, 5] +t = [Date(2023,1,1), Date(2023,1,2), Date(2023,1,3), Date(2023,1,4), Date(2023,1,5)] +tshift(x, t, n = Day(1)) # Lag: [missing, 1, 2, 3, 4] +tshift(x, t, n = -Day(1)) # Lead: [2, 3, 4, 5, missing] + +See also: tlag, tlead +""" function tshift(x, t_vec; n=nothing, kwargs...) if isnothing(n)