commit 9c04b8462a29fbeeebdfd37c4454af5d8b096b8e
parent 84aceb67238c0e2626817d160d3d41a8e25c09cd
Author: Erik Loualiche <eloualic@umn.edu>
Date: Tue, 20 May 2025 18:16:44 -0500
docstrings
Diffstat:
2 files changed, 119 insertions(+), 3 deletions(-)
diff --git a/docs/src/man/winsorize_guide.md b/docs/src/man/winsorize_guide.md
@@ -122,7 +122,10 @@ Winsorize multiple variables
var_to_winsorize = ["bill_length_mm", "bill_depth_mm", "flipper_length_mm"]
transform!(df,
var_to_winsorize .=> (x -> winsorize(x, probs=(0.1, 0.9)) ) .=> var_to_winsorize .* "_w")
-show(IOContext(stdout, :limit => true, :displaysize => (20, 100)), df, allcols=true, allrows=false)
+show(IOContext(stdout, :limit => true, :displaysize => (20, 100)),
+ select(df, :species, :island, :bill_length_mm, :bill_length_mm_w,
+ :bill_depth_mm, :bill_depth_mm_w, :flipper_length_mm, :flipper_length_mm_w),
+ allcols=true, allrows=false)
nothing; # hide
```
@@ -131,7 +134,9 @@ Winsorize on one side only
# left-winsorizing only, at 1th percentile;
# cap noi gstats winsor wage, cuts(1 100); gstats winsor wage, cuts(1 100) s(_w2)
transform!(df, :body_mass_g => (x -> winsorize(x, probs=(0.1, 1)) ) => :body_mass_g_w )
-show(IOContext(stdout, :limit => true, :displaysize => (20, 100)), df, allcols=true, allrows=false)
+show(IOContext(stdout, :limit => true, :displaysize => (20, 100)),
+ select(df, :species, :island, :body_mass_g, :body_mass_g_w),
+ allcols=true, allrows=false)
nothing; # hide
```
diff --git a/src/TimeShift.jl b/src/TimeShift.jl
@@ -4,6 +4,45 @@
# --------------------------------------------------------------------------------------------------
+"""
+ tlag(x, t_vec; n = nothing, checksorted = true, verbose = false)
+
+Create a lagged version of array `x` based on time vector `t_vec`, where each element is shifted
+backward in time by a specified amount `n`.
+
+# Arguments
+- `x`: Array of values to be lagged
+- `t_vec`: Vector of time points corresponding to each element in `x`
+
+# Keyword Arguments
+- `n`: Time gap for lagging. If `nothing` (default), uses the minimal unit difference between time points.
+- `checksorted`: If `true` (default), verifies that `t_vec` is sorted in ascending order
+- `verbose`: If `true`, prints informational messages about the process
+
+# Returns
+- An array of the same length as `x` where each element is the value of `x` from `n` time units ago,
+ or `missing` if no corresponding past value exists
+
+# Notes
+- Time vectors must be strictly sorted (ascending order)
+- The time gap `n` must be positive
+- Uses linear scan to match time points
+- For `Date` types, no type checking is performed on `n`
+- Elements at the beginning will be `missing` if they don't have values from `n` time units ago
+- See PanelShift.jl for original implementation
+
+# Errors
+- If `t_vec` is not sorted and `checksorted=true`
+- If `n` is not positive
+- If `x` and `t_vec` have different lengths
+- If `n` has a type that doesn't match the difference type of `t_vec`
+
+# Examples
+```julia
+x = [1, 2, 3, 4, 5]
+t = [Date(2023,1,1), Date(2023,1,2), Date(2023,1,3), Date(2023,1,4), Date(2023,1,5)]
+tlag(x, t, n = Day(1)) # Returns: [missing, 1, 2, 3, 4]
+"""
function tlag(x, t_vec;
n = nothing,
checksorted = true,
@@ -62,7 +101,45 @@ end
# --------------------------------------------------------------------------------------------------
-# most of this code was inspired by @FuZhiyu PanelShift.jl package
+"""
+ tlead(x, t_vec; n = nothing, checksorted = true, verbose = false)
+
+Create a leading version of array `x` based on time vector `t_vec`, where each element is shifted
+forward in time by a specified amount `n`.
+
+# Arguments
+- `x`: Array of values to be led
+- `t_vec`: Vector of time points corresponding to each element in `x`
+
+# Keyword Arguments
+- `n`: Time gap for leading. If `nothing` (default), uses the minimal unit difference between time points.
+- `checksorted`: If `true` (default), verifies that `t_vec` is sorted in ascending order
+- `verbose`: If `true`, prints informational messages about the process
+
+# Returns
+- An array of the same length as `x` where each element is the value of `x` from `n` time units in the future,
+ or `missing` if no corresponding future value exists
+
+# Notes
+- Time vectors must be strictly sorted (ascending order)
+- The time gap `n` must be positive
+- Uses linear scan to match time points
+- For `Date` types, no type checking is performed on `n`
+- Elements at the end will be `missing` if they don't have values from `n` time units in the future
+- See PanelShift.jl for original implementation
+
+# Errors
+- If `t_vec` is not sorted and `checksorted=true`
+- If `n` is not positive
+- If `x` and `t_vec` have different lengths
+- If `n` has a type that doesn't match the difference type of `t_vec`
+
+# Examples
+```julia
+x = [1, 2, 3, 4, 5]
+t = [Date(2023,1,1), Date(2023,1,2), Date(2023,1,3), Date(2023,1,4), Date(2023,1,5)]
+tlead(x, t, n = Day(1)) # Returns: [2, 3, 4, 5, missing]
+"""
function tlead(x, t_vec;
n = nothing,
checksorted = true,
@@ -119,6 +196,40 @@ end
# --------------------------------------------------------------------------------------------------
+"""
+ tshift(x, t_vec; n = nothing, kwargs...)
+
+Create a shifted version of array `x` based on time vector `t_vec`, where each element is shifted
+by a specified amount `n`. Acts as a unified interface to `tlag` and `tlead`.
+
+# Arguments
+- `x`: Array of values to be shifted
+- `t_vec`: Vector of time points corresponding to each element in `x`
+
+# Keyword Arguments
+- `n`: Time gap for shifting. If positive, performs a lag operation (backward in time);
+ if negative, performs a lead operation (forward in time).
+ If `nothing` (default), defaults to a lag operation with minimal unit difference.
+- `kwargs...`: Additional keyword arguments passed to either `tlag` or `tlead`
+
+# Returns
+- An array of the same length as `x` where each element is the value of `x` shifted by `n` time units,
+ or `missing` if no corresponding value exists at that time point
+
+# Notes
+- Positive `n` values call `tlag` (backward shift in time)
+- Negative `n` values call `tlead` (forward shift in time)
+- If `n` is not specified, issues a warning and defaults to a lag operation
+
+# Examples
+```julia
+x = [1, 2, 3, 4, 5]
+t = [Date(2023,1,1), Date(2023,1,2), Date(2023,1,3), Date(2023,1,4), Date(2023,1,5)]
+tshift(x, t, n = Day(1)) # Lag: [missing, 1, 2, 3, 4]
+tshift(x, t, n = -Day(1)) # Lead: [2, 3, 4, 5, missing]
+
+See also: tlag, tlead
+"""
function tshift(x, t_vec; n=nothing, kwargs...)
if isnothing(n)