bench_timeshift.jl (4448B)
1 #!/usr/bin/env julia 2 # Benchmark: tlag/tlead performance comparison 3 # 4 # Compares three approaches: 5 # 1. Old linear scan (Date arithmetic interleaved with comparisons) 6 # 2. New: pre-compute Int64 targets, scan in pure Int64 7 # 3. Dict-based O(1) lookup 8 # 9 # Run with: julia --project test/bench_timeshift.jl 10 11 using BazerData 12 using Dates 13 using Random 14 using Statistics 15 16 # --- Old linear scan (Date objects in hot loop) --- 17 function tlag_oldscan(x, t_vec, n) 18 N = length(t_vec) 19 x_shift = Array{Union{Missing, eltype(x)}}(missing, N) 20 j = 0 21 @inbounds for i in 1:N 22 lagt = t_vec[i] - n 23 while j < N && t_vec[j + 1] <= lagt 24 j += 1 25 end 26 if j > 0 && t_vec[j] == lagt 27 x_shift[i] = x[j] 28 end 29 end 30 return x_shift 31 end 32 33 function tlead_oldscan(x, t_vec, n) 34 N = length(t_vec) 35 x_shift = Array{Union{Missing, eltype(x)}}(missing, N) 36 j = 0 37 @inbounds for i in 1:N 38 leadt = t_vec[i] + n 39 if leadt > t_vec[N]; break; end 40 while j < N && t_vec[j + 1] < leadt 41 j += 1 42 end 43 if j + 1 <= N && t_vec[j + 1] == leadt 44 x_shift[i] = x[j + 1] 45 end 46 end 47 return x_shift 48 end 49 50 # --- Dict-based lookup --- 51 function tlag_dict(x, t_vec, n) 52 N = length(t_vec) 53 x_shift = Array{Union{Missing, eltype(x)}}(missing, N) 54 lookup = Dict{eltype(t_vec), Int}() 55 sizehint!(lookup, N) 56 @inbounds for i in 1:N; lookup[t_vec[i]] = i; end 57 @inbounds for i in 1:N 58 idx = get(lookup, t_vec[i] - n, 0) 59 if idx > 0; x_shift[i] = x[idx]; end 60 end 61 return x_shift 62 end 63 64 65 # --- Benchmark harness --- 66 function bench(f; warmup=3, trials=15) 67 for _ in 1:warmup; f(); end 68 GC.gc() 69 times = Float64[] 70 for _ in 1:trials 71 t0 = time_ns() 72 f() 73 push!(times, (time_ns() - t0) / 1e6) # ms 74 end 75 return (median=median(times), min=minimum(times)) 76 end 77 78 function report(label, old, new; dict=nothing) 79 speedup = old.median / new.median 80 color = speedup >= 1.0 ? "\033[32m" : "\033[31m" 81 reset = "\033[0m" 82 line = " $(rpad(label, 28)) old=$(rpad(round(old.median, digits=2), 8))ms " * 83 "new=$(rpad(round(new.median, digits=2), 8))ms " * 84 "$(color)$(round(speedup, digits=2))x$(reset)" 85 if dict !== nothing 86 ds = old.median / dict.median 87 dc = ds >= 1.0 ? "\033[32m" : "\033[31m" 88 line *= " dict=$(rpad(round(dict.median, digits=2), 8))ms $(dc)$(round(ds, digits=2))x$(reset)" 89 end 90 println(line) 91 end 92 93 94 # --- Generate test data --- 95 function make_daily_dates(n; gap_prob=0.1, seed=42) 96 Random.seed!(seed) 97 dates = Vector{Date}(undef, n) 98 d = Date(2000, 1, 1) 99 for i in 1:n 100 dates[i] = d 101 d += Day(rand() < gap_prob ? rand(2:5) : 1) 102 end 103 return dates 104 end 105 106 function make_integers(n; gap_prob=0.1, seed=42) 107 Random.seed!(seed) 108 ts = Vector{Int}(undef, n) 109 t = 1 110 for i in 1:n 111 ts[i] = t 112 t += rand() < gap_prob ? rand(2:5) : 1 113 end 114 return ts 115 end 116 117 118 # --- Run benchmarks --- 119 println("\n" * "="^80) 120 println(" TimeShift Benchmark") 121 println(" old = linear scan on Date objects") 122 println(" new = pre-compute Int64 targets, scan in Int64") 123 println(" dict = Dict{T,Int} lookup") 124 println("="^80) 125 126 for N in [100_000, 1_000_000] 127 println("\n--- N = $(N รท 1000)K elements ---") 128 129 dates = make_daily_dates(N) 130 ints = make_integers(N) 131 x_f = rand(N) 132 x_i = rand(1:1000, N) 133 134 println("\n tlag:") 135 for (lbl, t, x, n) in [ 136 ("Int, n=1", ints, x_i, 1), 137 ("Int, n=365", ints, x_i, 365), 138 ("Date, n=Day(1)", dates, x_f, Day(1)), 139 ("Date, n=Month(1)", dates, x_f, Month(1)), 140 ("Date, n=Year(1)", dates, x_f, Year(1)), 141 ] 142 old = bench(() -> tlag_oldscan(x, t, n)) 143 new = bench(() -> tlag(x, t; n=n, checksorted=false)) 144 dict = bench(() -> tlag_dict(x, t, n)) 145 report(lbl, old, new; dict=dict) 146 end 147 148 println("\n tlead:") 149 for (lbl, t, x, n) in [ 150 ("Int, n=1", ints, x_i, 1), 151 ("Date, n=Day(1)", dates, x_f, Day(1)), 152 ("Date, n=Month(1)", dates, x_f, Month(1)), 153 ("Date, n=Year(1)", dates, x_f, Year(1)), 154 ] 155 old = bench(() -> tlead_oldscan(x, t, n)) 156 new = bench(() -> tlead(x, t; n=n, checksorted=false)) 157 report(lbl, old, new) 158 end 159 end 160 161 println("\n" * "="^80)