import_predictors.log.R (9167B)
1 2 R version 4.1.3 (2022-03-10) -- "One Push-Up" 3 Copyright (C) 2022 The R Foundation for Statistical Computing 4 Platform: x86_64-apple-darwin17.0 (64-bit) 5 6 R is free software and comes with ABSOLUTELY NO WARRANTY. 7 You are welcome to redistribute it under certain conditions. 8 Type 'license()' or 'licence()' for distribution details. 9 10 Natural language support but running in an English locale 11 12 R is a collaborative project with many contributors. 13 Type 'contributors()' for more information and 14 'citation()' on how to cite R or R packages in publications. 15 16 Type 'demo()' for some demos, 'help()' for on-line help, or 17 'help.start()' for an HTML browser interface to help. 18 Type 'q()' to quit R. 19 20 > #!/usr/bin/env Rscript 21 > # 22 > # import_predictors.R 23 > # 24 > # This code creates or imports the predictors of aggregate equity returns 25 > # There are three parts for T-bill, D/P ratio and cay 26 > # 27 > # 1. T-bill comes from the H15 release of the FRB 28 > # we downloaded it directly from FRED at https://fred.stlouisfed.org/series/TB3MS 29 > # 30 > # 2. cay comes from Martin Lettau's website at http://faculty.haas.berkeley.edu/lettau/data_cay.html 31 > # 32 > # 3. D-P ratio is estimated from the MSI CRSP Files 33 > # We use a method of continuously reinvested dividends 34 > # See attached LaTeX file for explanations of the procedure 35 > # Data is from CRSP and available at /wrds/crsp/sasdata/a_stock/msi.sas7bdat 36 > # 37 > # 4. Estimate future excess returns: we use a horizon of three years in the paper 38 > # 39 > # (c) Valentin Haddad, Erik Loualiche & Matthew Plosser 40 > # 41 > # Last updated on June 4th 2019 42 > # 43 > ################################################################################## 44 > 45 > 46 > ################################################################################## 47 > message("Log file for code executed at\n") 48 Log file for code executed at 49 50 > message(format(Sys.time(), "%a %b %d %X %Y")) 51 Mon Jun 06 12:38:01 2022 52 > ################################################################################## 53 > 54 > 55 > ################################################################################## 56 > # APPEND REQUIRED PACKAGES 57 > 58 > # See this https://stackoverflow.com/questions/4090169/elegant-way-to-check-for-missing-packages-and-install-them 59 > using<-function(...) { 60 + libs<-unlist(list(...)) 61 + req<-unlist(lapply(libs,require,character.only=TRUE)) 62 + need<-libs[req==FALSE] 63 + if(length(need)>0){ 64 + install.packages(need) 65 + lapply(need,require,character.only=TRUE) 66 + } 67 + } 68 > 69 > package_to_load <- c("crayon", "devtools", "alfred", "haven", "dplyr", 70 + "stringr", "lubridate", "RcppRoll", "statar", "data.table") 71 > using(package_to_load) 72 Loading required package: crayon 73 Loading required package: devtools 74 Loading required package: usethis 75 Loading required package: alfred 76 Loading required package: haven 77 Loading required package: dplyr 78 79 Attaching package: ‘dplyr’ 80 81 The following objects are masked from ‘package:stats’: 82 83 filter, lag 84 85 The following objects are masked from ‘package:base’: 86 87 intersect, setdiff, setequal, union 88 89 Loading required package: stringr 90 Loading required package: lubridate 91 92 Attaching package: ‘lubridate’ 93 94 The following objects are masked from ‘package:base’: 95 96 date, intersect, setdiff, union 97 98 Loading required package: RcppRoll 99 Loading required package: statar 100 Loading required package: data.table 101 102 Attaching package: ‘data.table’ 103 104 The following objects are masked from ‘package:lubridate’: 105 106 hour, isoweek, mday, minute, month, quarter, second, wday, week, 107 yday, year 108 109 The following objects are masked from ‘package:dplyr’: 110 111 between, first, last 112 113 > 114 > check_file = file.exists("log/R-session-info.log.R") 115 > sink("log/R-session-info.log.R", append=check_file) 116 > cat(bold("\n\n# -----\n# Session info for import_predictors\n\n")) 117 > session_info() 118 > sink() 119 > ################################################################################## 120 > 121 > 122 > ################################################################################## 123 > # 1. TREASURIES 124 > dt_tbill <- get_fred_series("TB3MS", "rf", observation_start = "1950-01-01", observation_end = "2020-12-31") %>% data.table 125 > dt_tbill <- dt_tbill[, .(dateym=year(date)*100+month(date), rf=rf/100)] 126 > dt_tbill[] 127 dateym rf 128 1: 195001 0.0107 129 2: 195002 0.0112 130 3: 195003 0.0112 131 4: 195004 0.0115 132 5: 195005 0.0116 133 --- 134 848: 202008 0.0010 135 849: 202009 0.0011 136 850: 202010 0.0010 137 851: 202011 0.0009 138 852: 202012 0.0009 139 > # fwrite(dt_tbill, "./input/tbill.csv") 140 > ################################################################################## 141 > 142 > 143 > ################################################################################## 144 > # 2. CAY 145 > dt_cay <- fread("./input/cay_current.csv", skip=0, header=T) 146 > setnames(dt_cay, c("date", "c", "w", "y", "cay")) 147 > dt_cay <- dt_cay[, .(date_y=year(date), month = month(date), cay) ] 148 > dt_cay <- dt_cay[, .(dateym=date_y*100+month, cay) ] 149 > dt_cay[] 150 dateym cay 151 1: 195203 0.01510493 152 2: 195206 0.02483727 153 3: 195209 0.01484008 154 4: 195212 0.02216598 155 5: 195303 0.02152118 156 --- 157 267: 201809 -0.02934508 158 268: 201812 -0.02020734 159 269: 201903 -0.04435449 160 270: 201906 -0.03764155 161 271: 201909 -0.03665922 162 > ################################################################################## 163 > 164 > 165 > ################################################################################## 166 > # 3. Dividend-Price RATIO 167 > dt_msi <- read_sas("./input/msi.sas7bdat") %>% data.table 168 > dt_msi <- dt_msi[, .(date=DATE, vwretd, vwretx) ] 169 > fwrite(dt_msi, "./output/msi.csv") # SAVED HERE IF YOU NEED IT 170 > 171 > # ESTIMATE THE DP RATIO 172 > dt_dp <- dt_msi[, .(date, vwretx, vwretd, vwrx=1+vwretx, vwrd=1+vwretd) ] 173 > dt_dp[, `:=`(vwrx=1+vwretx, vwrd=1+vwretd) ] 174 > dt_dp[, `:=`(dpvw = 100 * (vwretd-vwretx) / (1+vwretx) ) ] 175 > dt_dp[, `:=`(retd_retx = (1+vwretd) / (1+vwretx) ) ] 176 > dt_dp[, `:=`(datem = as.monthly(date)) ] 177 > dt_dp[, dp := 0 ] 178 > for (i in seq(11,0)){ 179 + dt_dp[, dp := (dp*tlag(retd_retx, i, time=datem) + tlag(dpvw, i, time=datem)) ] 180 + } 181 > dt_dp <- dt_dp[, .(dateym=year(datem)*100+month(datem), dp=dp/100)] 182 > dt_dp[] 183 dateym dp 184 1: 192512 NA 185 2: 192601 NA 186 3: 192602 NA 187 4: 192603 NA 188 5: 192604 NA 189 --- 190 1152: 202111 0.01475104 191 1153: 202112 0.01473475 192 1154: 202201 0.01454479 193 1155: 202202 0.01458693 194 1156: 202203 0.01466498 195 > ################################################################################## 196 > 197 > 198 > ################################################################################## 199 > # ESTIMATE FUTURE EXCESS RETURNS 200 > dt_rmrf <- fread("./output/msi.csv") %>% data.table 201 > dt_rmrf <- dt_rmrf[, .(dateym=year(date)*100+month(date), retm=vwretd) ] 202 > dt_rmrf <- merge(dt_rmrf, dt_tbill, by = "dateym") 203 > 204 > dt_rmrf[, lead1_retm := shift(retm, 1, type="lead") ] 205 > dt_rmrf[, retm_y := exp( roll_sum(log(1+lead1_retm), n=12, align="left", fill=NA) ) - 1 ] 206 > dt_rmrf[, rf_y := (1+rf)^(1/4) * (1+shift(rf, 3, type="lead"))^(1/4) * 207 + (1+shift(rf, 6, type="lead"))^(1/4) * (1+shift(rf, 9, type="lead"))^(1/4) - 1 ] 208 > dt_rmrf[, rmrf_y3 := 1 * ( 209 + ( (1+retm_y) * (1 + shift(retm_y, 12, type="lead")) * (1 + shift(retm_y, 24, type="lead")) )^(1/3) - 210 + ( ((1+rf_y) * (1 + shift(rf_y, 12, type="lead")) * (1 + shift(rf_y, 24, type="lead")) )^(1/3) - 1) -1) ] 211 > 212 > dt_rmrf <- dt_rmrf[, .(dateym, rmrf_y3) ] 213 > dt_rmrf[ !is.na(rmrf_y3) ] 214 dateym rmrf_y3 215 1: 195001 0.19164596 216 2: 195002 0.18441326 217 3: 195003 0.17389623 218 4: 195004 0.14728401 219 5: 195005 0.13288460 220 --- 221 812: 201708 0.10990858 222 813: 201709 0.08875791 223 814: 201710 0.07444771 224 815: 201711 0.10732976 225 816: 201712 0.12023222 226 > ################################################################################## 227 > 228 > 229 > ################################################################################## 230 > # MERGE THE PREDICTORS 231 > dt_predict <- merge(dt_dp, dt_tbill, by = c("dateym")) 232 > dt_predict <- merge(dt_predict, dt_rmrf, by = c("dateym"), all.x = T) 233 > dt_predict <- merge(dt_predict, dt_cay, by = c("dateym"), all.x = T) 234 > dt_predict <- dt_predict[ !is.na(rmrf_y3) ] 235 > dt_predict <- dt_predict[ !is.na(cay) ] 236 > dt_predict[] 237 dateym dp rf rmrf_y3 cay 238 1: 195203 0.05817138 0.0159 0.18092953 0.01510493 239 2: 195206 0.05739649 0.0170 0.21642173 0.02483727 240 3: 195209 0.05709103 0.0171 0.23193277 0.01484008 241 4: 195212 0.05522191 0.0209 0.22202729 0.02216598 242 5: 195303 0.05455042 0.0201 0.26058722 0.02152118 243 --- 244 260: 201612 0.02284466 0.0051 0.11899389 -0.01902802 245 261: 201703 0.02185142 0.0074 0.01294375 -0.02168661 246 262: 201706 0.02147595 0.0098 0.07365837 -0.02432134 247 263: 201709 0.02132813 0.0103 0.08875791 -0.02799587 248 264: 201712 0.02075782 0.0132 0.12023222 -0.02490222 249 > 250 > fwrite(dt_predict, "./tmp/predict.csv") 251 > ################################################################################## 252 > 253 > 254 > 255 > 256 > 257 > 258 > 259 > 260 > 261 > 262 > 263 > 264 > 265 > 266 > 267 > proc.time() 268 user system elapsed 269 1.506 0.172 2.113