RiskPremium

Measuring the market risk premium
Log | Files | Refs

DataImport.jl (4602B)


      1 # src/DataImport.jl
      2 module DataImport
      3 
      4 using CSV, DataFrames, Dates, Statistics
      5 include("FredUtils.jl")
      6 using .FredUtils
      7 
      8 export compute_dp, compute_tbill, compute_excess_returns, build_predictors
      9 
     10 """
     11     compute_dp(msi_path) -> DataFrame
     12 
     13 Compute 12-month rolling dividend-price ratio from CRSP MSI data.
     14 Replicates R logic: accumulate monthly dividends with reinvestment over 12 months.
     15 
     16 Returns DataFrame with columns: dateym, dp.
     17 """
     18 function compute_dp(msi_path::String)
     19     msi = CSV.read(msi_path, DataFrame)
     20     dropmissing!(msi, [:vwretd, :vwretx])
     21 
     22     n = nrow(msi)
     23     dpvw = 100.0 .* (msi.vwretd .- msi.vwretx) ./ (1.0 .+ msi.vwretx)
     24     retd_retx = (1.0 .+ msi.vwretd) ./ (1.0 .+ msi.vwretx)
     25 
     26     dp = zeros(n)
     27     for i in 11:-1:0
     28         for t in (i+1):n
     29             dp[t] = dp[t] * retd_retx[t-i] + dpvw[t-i]
     30         end
     31     end
     32     dp ./= 100.0
     33 
     34     dates = Date.(msi.date)
     35     dateym = year.(dates) .* 100 .+ month.(dates)
     36 
     37     return DataFrame(dateym = dateym, dp = dp)
     38 end
     39 
     40 """
     41     compute_tbill(; observation_end) -> DataFrame
     42 
     43 Download 3-month T-bill from FRED (TB3MS). Returns DataFrame with dateym, rf.
     44 """
     45 function compute_tbill(; observation_end::Date = Date(2026,12,31))
     46     df = fred_observations("TB3MS"; end_date=observation_end)
     47     dropmissing!(df)
     48     dateym = year.(df.date) .* 100 .+ month.(df.date)
     49     rf = df.value ./ 100.0
     50     return DataFrame(dateym = dateym, rf = rf)
     51 end
     52 
     53 """
     54     compute_excess_returns(msi_path, tbill_df) -> DataFrame
     55 
     56 Compute 3-year geometric average future excess returns.
     57 Replicates R logic from import_predictors.R lines 100-114.
     58 
     59 The R code does:
     60 1. lead1_retm = shift(retm, 1, type="lead")  -- next month's return
     61 2. retm_y = exp(roll_sum(log(1+lead1_retm), n=12, align="left")) - 1  -- 12-month forward return
     62 3. rf_y = geometric average of quarterly T-bill rates (current + 3,6,9 month leads)
     63 4. rmrf_y3 = 3-year geometric average excess return
     64 """
     65 function compute_excess_returns(msi_path::String, tbill_df::DataFrame)
     66     msi = CSV.read(msi_path, DataFrame)
     67     dropmissing!(msi, [:vwretd])
     68     dates = Date.(msi.date)
     69     dateym = year.(dates) .* 100 .+ month.(dates)
     70 
     71     df = DataFrame(dateym = dateym, retm = msi.vwretd)
     72     df = innerjoin(df, tbill_df, on=:dateym)
     73 
     74     n = nrow(df)
     75     retm = df.retm
     76     rf = df.rf
     77 
     78     # 12-month forward market return: lead by 1, then rolling 12-month sum of log returns
     79     retm_y = Vector{Union{Float64,Missing}}(missing, n)
     80     for t in 1:(n-12)
     81         retm_y[t] = exp(sum(log.(1.0 .+ retm[(t+1):(t+12)]))) - 1.0
     82     end
     83 
     84     # Annual risk-free: geometric average of quarterly T-bill
     85     rf_y = Vector{Union{Float64,Missing}}(missing, n)
     86     for t in 1:(n-9)
     87         rf_y[t] = (1+rf[t])^0.25 * (1+rf[t+3])^0.25 * (1+rf[t+6])^0.25 * (1+rf[t+9])^0.25 - 1.0
     88     end
     89 
     90     # 3-year geometric average excess return
     91     rmrf_y3 = Vector{Union{Float64,Missing}}(missing, n)
     92     for t in 1:(n-24)
     93         if !ismissing(retm_y[t]) && !ismissing(retm_y[t+12]) && !ismissing(retm_y[t+24]) &&
     94            !ismissing(rf_y[t])   && !ismissing(rf_y[t+12])   && !ismissing(rf_y[t+24])
     95             rmrf_y3[t] = (
     96                 ((1+retm_y[t]) * (1+retm_y[t+12]) * (1+retm_y[t+24]))^(1/3) -
     97                 (((1+rf_y[t])  * (1+rf_y[t+12])   * (1+rf_y[t+24]))^(1/3) - 1) - 1
     98             )
     99         end
    100     end
    101 
    102     return DataFrame(dateym = df.dateym, rmrf_y3 = rmrf_y3)
    103 end
    104 
    105 """
    106     build_predictors(msi_path, cay_path) -> DataFrame
    107 
    108 Full pipeline: compute D/P, download T-bill, compute excess returns,
    109 load CAY, merge all. Returns DataFrame with: dateym, dp, rf, rmrf_y3, cay.
    110 """
    111 function build_predictors(msi_path::String, cay_path::String)
    112     dp_df    = compute_dp(msi_path)
    113     tbill_df = compute_tbill()
    114     rmrf_df  = compute_excess_returns(msi_path, tbill_df)
    115 
    116     # Load CAY (either Lettau's or our computed version)
    117     cay_raw = CSV.read(cay_path, DataFrame; header=1)
    118     rename!(cay_raw, Symbol.(["date", "c", "w", "y", "cay"]))
    119     cay_raw.date = Date.(cay_raw.date)
    120     cay_df = DataFrame(
    121         dateym = year.(cay_raw.date) .* 100 .+ month.(cay_raw.date),
    122         cay = cay_raw.cay,
    123     )
    124 
    125     # Merge
    126     predict = innerjoin(dp_df, tbill_df, on=:dateym)
    127     predict = innerjoin(predict, rmrf_df, on=:dateym)
    128     predict = innerjoin(predict, cay_df, on=:dateym)
    129     dropmissing!(predict)
    130 
    131     return predict
    132 end
    133 
    134 # When run as script
    135 if abspath(PROGRAM_FILE) == @__FILE__
    136     mkpath("tmp")
    137     predict = build_predictors("output/msi.csv", "input/cay_computed.csv")
    138     CSV.write("tmp/predict.csv", predict)
    139     println("Wrote tmp/predict.csv: $(nrow(predict)) rows")
    140 end
    141 
    142 end # module