FinanceRoutines.jl

Financial data routines for Julia
Log | Files | Refs | README | LICENSE

ImportFamaFrench.jl (10831B)


      1 # --------------------------------------------------------------------------------------------------
      2 
      3 # ImportFamaFrench.jl
      4 
      5 # Collection of functions that import
      6 #  financial data from Ken French's website into julia
      7 # --------------------------------------------------------------------------------------------------
      8 
      9 
     10 
     11 # --------------------------------------------------------------------------------------------------
     12 # Shared helper: download a Ken French zip and extract the CSV entry
     13 # --------------------------------------------------------------------------------------------------
     14 function _download_ff_zip(url)
     15     http_response = Downloads.download(url)
     16     z = ZipFile.Reader(http_response)
     17     csv_file = filter(x -> match(r".*csv", lowercase(x.name)) !== nothing, z.files)[1]
     18     return (z, csv_file)
     19 end
     20 
     21 
     22 # --------------------------------------------------------------------------------------------------
     23 """
     24     import_FF3(;frequency::Symbol=:monthly) -> DataFrame
     25 
     26 Import Fama-French 3-factor model data directly from Ken French's data library.
     27 
     28 Downloads and parses the Fama-French research data factors (market risk premium,
     29 size factor, value factor, and risk-free rate) at the specified frequency.
     30 
     31 # Arguments
     32 - `frequency::Symbol=:monthly`: Data frequency to import. Options are:
     33   - `:monthly` - Monthly factor returns (default)
     34   - `:annual` - Annual factor returns
     35   - `:daily` - Daily factor returns
     36 
     37 # Returns
     38 - `DataFrame`: Fama-French 3-factor data with columns:
     39   - **Monthly/Annual**: `datem`/`datey`, `mktrf`, `smb`, `hml`, `rf`
     40   - **Daily**: `date`, `mktrf`, `smb`, `hml`, `rf`
     41 
     42 Where:
     43 - `mktrf`: Market return minus risk-free rate (market risk premium)
     44 - `smb`: Small minus big (size factor)
     45 - `hml`: High minus low (value factor)
     46 - `rf`: Risk-free rate
     47 
     48 # Examples
     49 ```julia
     50 # Import monthly data (default)
     51 monthly_ff = import_FF3()
     52 
     53 # Import annual data
     54 annual_ff = import_FF3(frequency=:annual)
     55 
     56 # Import daily data
     57 daily_ff = import_FF3(frequency=:daily)
     58 ```
     59 
     60 # Notes
     61 - Data is sourced directly from Kenneth French's data library at Dartmouth
     62 - Monthly and annual data excludes the daily/monthly breakdowns respectively
     63 - Date formats are automatically parsed to appropriate Julia date types
     64 - Missing values are filtered out from the datasets
     65 - Requires internet connection to download data
     66 
     67 # Data Source
     68 Kenneth R. French Data Library: https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html
     69 """
     70 function import_FF3(;frequency::Symbol=:monthly)
     71     url_mth_yr = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip"
     72     url_daily  = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_daily_CSV.zip"
     73     col_types  = [String7, Float64, Float64, Float64, Float64]
     74 
     75     return _import_ff_factors(frequency, url_mth_yr, url_daily, col_types,
     76         col_names_monthly = [:datem, :mktrf, :smb, :hml, :rf],
     77         col_names_annual  = [:datey, :mktrf, :smb, :hml, :rf],
     78         col_names_daily   = [:date, :mktrf, :smb, :hml, :rf])
     79 end
     80 # --------------------------------------------------------------------------------------------------
     81 
     82 
     83 # --------------------------------------------------------------------------------------------------
     84 # Shared import logic for FF3/FF5/momentum — handles all three frequencies
     85 # --------------------------------------------------------------------------------------------------
     86 function _import_ff_factors(frequency::Symbol, url_mth_yr, url_daily, col_types;
     87     col_names_monthly, col_names_annual, col_names_daily)
     88 
     89     if frequency == :annual
     90 
     91         z, csv_file = _download_ff_zip(url_mth_yr)
     92         df = copy(_parse_ff_annual(csv_file, types=col_types, col_names=col_names_annual))
     93         close(z)
     94         return df
     95 
     96     elseif frequency == :monthly
     97 
     98         z, csv_file = _download_ff_zip(url_mth_yr)
     99         df = copy(_parse_ff_monthly(csv_file, types=col_types, col_names=col_names_monthly))
    100         close(z)
    101         transform!(df, col_names_monthly[1] => ByRow(x -> MonthlyDate(x, "yyyymm")) => col_names_monthly[1])
    102         return df
    103 
    104     elseif frequency == :daily
    105 
    106         z, csv_file = _download_ff_zip(url_daily)
    107         df = copy(CSV.File(csv_file, header=4, footerskip=1) |> DataFrame)
    108         close(z)
    109         rename!(df, col_names_daily)
    110         date_col = col_names_daily[1]
    111         val_col = col_names_daily[2]
    112         subset!(df, date_col => ByRow(!ismissing), val_col => ByRow(!ismissing))
    113         transform!(df, :date => ByRow(x -> Date(string(x), "yyyymmdd")) => :date)
    114         return df
    115 
    116     else
    117         error("Frequency $frequency not known. Options are :daily, :monthly, or :annual")
    118     end
    119 end
    120 # --------------------------------------------------------------------------------------------------
    121 
    122 
    123 # --------------------------------------------------------------------------------------------------
    124 function _parse_ff_annual(zip_file; types=nothing,
    125     col_names=[:datey, :mktrf, :smb, :hml, :rf])
    126 
    127     lines = String[]
    128     found_annual = false
    129 
    130     # Read all lines from the zip file entry
    131     file_lines = split(String(read(zip_file)), '\n')
    132 
    133     for line in file_lines
    134         if occursin(r"Annual Factors", line)
    135             found_annual = true
    136             continue
    137         end
    138 
    139         if found_annual
    140             # Data lines start with a 4-digit year
    141             if occursin(r"^\s*\d{4}", line)
    142                 clean_line = replace(line, r"[\r]" => "")
    143                 push!(lines, clean_line)
    144             elseif !isempty(lines) && occursin(r"^\s*$", line)
    145                 # Empty line after we've started collecting data = end of section
    146                 break
    147             end
    148             # Otherwise skip (headers, sub-headers, blank lines before data)
    149         end
    150     end
    151 
    152     if !found_annual
    153         error("Annual Factors section not found in file")
    154     end
    155 
    156     lines_buffer = IOBuffer(join(lines, "\n"))
    157     return CSV.File(lines_buffer, header=false, delim=",", ntasks=1, types=types) |> DataFrame |>
    158            df -> rename!(df, col_names)
    159 end
    160 # --------------------------------------------------------------------------------------------------
    161 
    162 
    163 # --------------------------------------------------------------------------------------------------
    164 function _parse_ff_monthly(zip_file; types=nothing,
    165     col_names=[:datem, :mktrf, :smb, :hml, :rf])
    166 
    167     # Read all lines from the zip file entry
    168     file_lines = split(String(read(zip_file)), '\n')
    169 
    170     # Find the first data line (starts with digits, like "192607")
    171     skipto = 1
    172     for (i, line) in enumerate(file_lines)
    173         if occursin(r"^\s*\d{6}", line)
    174             skipto = i
    175             break
    176         end
    177     end
    178 
    179     # Collect data lines until we hit "Annual Factors"
    180     data_lines = String[]
    181 
    182     for i in skipto:length(file_lines)
    183         line = file_lines[i]
    184 
    185         # Stop when we hit Annual Factors section
    186         if occursin(r"Annual Factors", line)
    187             break
    188         end
    189 
    190         # Skip empty lines
    191         if occursin(r"^\s*$", line)
    192             continue
    193         end
    194 
    195         # Add non-empty data lines
    196         push!(data_lines, line)
    197     end
    198 
    199     # Create IOBuffer with header + data
    200     buffer = IOBuffer(join(data_lines, "\n"))
    201 
    202     return CSV.File(buffer, header=false, delim=",", ntasks=1, types=types) |> DataFrame |>
    203            df -> rename!(df, col_names)
    204 
    205 end
    206 # --------------------------------------------------------------------------------------------------
    207 
    208 
    209 # --------------------------------------------------------------------------------------------------
    210 """
    211     import_FF5(;frequency::Symbol=:monthly) -> DataFrame
    212 
    213 Import Fama-French 5-factor model data directly from Ken French's data library.
    214 
    215 Downloads and parses the Fama-French 5-factor research data (market risk premium,
    216 size, value, profitability, and investment factors plus the risk-free rate).
    217 
    218 # Arguments
    219 - `frequency::Symbol=:monthly`: Data frequency. Options: `:monthly`, `:annual`, `:daily`
    220 
    221 # Returns
    222 - `DataFrame` with columns:
    223   - **Monthly**: `datem`, `mktrf`, `smb`, `hml`, `rmw`, `cma`, `rf`
    224   - **Annual**: `datey`, `mktrf`, `smb`, `hml`, `rmw`, `cma`, `rf`
    225   - **Daily**: `date`, `mktrf`, `smb`, `hml`, `rmw`, `cma`, `rf`
    226 
    227 Where:
    228 - `mktrf`: Market return minus risk-free rate
    229 - `smb`: Small minus big (size)
    230 - `hml`: High minus low (value)
    231 - `rmw`: Robust minus weak (profitability)
    232 - `cma`: Conservative minus aggressive (investment)
    233 - `rf`: Risk-free rate
    234 
    235 # Examples
    236 ```julia
    237 monthly_ff5 = import_FF5()
    238 annual_ff5 = import_FF5(frequency=:annual)
    239 daily_ff5 = import_FF5(frequency=:daily)
    240 ```
    241 
    242 # Data Source
    243 Kenneth R. French Data Library: https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html
    244 """
    245 function import_FF5(;frequency::Symbol=:monthly)
    246     url_mth_yr = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_5_Factors_2x3_CSV.zip"
    247     url_daily  = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_5_Factors_2x3_daily_CSV.zip"
    248     col_types  = [String7, Float64, Float64, Float64, Float64, Float64, Float64]
    249 
    250     return _import_ff_factors(frequency, url_mth_yr, url_daily, col_types,
    251         col_names_monthly = [:datem, :mktrf, :smb, :hml, :rmw, :cma, :rf],
    252         col_names_annual  = [:datey, :mktrf, :smb, :hml, :rmw, :cma, :rf],
    253         col_names_daily   = [:date, :mktrf, :smb, :hml, :rmw, :cma, :rf])
    254 end
    255 # --------------------------------------------------------------------------------------------------
    256 
    257 
    258 # --------------------------------------------------------------------------------------------------
    259 """
    260     import_FF_momentum(;frequency::Symbol=:monthly) -> DataFrame
    261 
    262 Import Fama-French momentum factor from Ken French's data library.
    263 
    264 # Arguments
    265 - `frequency::Symbol=:monthly`: Data frequency. Options: `:monthly`, `:annual`, `:daily`
    266 
    267 # Returns
    268 - `DataFrame` with columns:
    269   - **Monthly**: `datem`, `mom`
    270   - **Annual**: `datey`, `mom`
    271   - **Daily**: `date`, `mom`
    272 
    273 # Examples
    274 ```julia
    275 monthly_mom = import_FF_momentum()
    276 daily_mom = import_FF_momentum(frequency=:daily)
    277 ```
    278 
    279 # Data Source
    280 Kenneth R. French Data Library: https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html
    281 """
    282 function import_FF_momentum(;frequency::Symbol=:monthly)
    283     url_mth_yr = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Momentum_Factor_CSV.zip"
    284     url_daily  = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Momentum_Factor_daily_CSV.zip"
    285     col_types  = [String7, Float64]
    286 
    287     return _import_ff_factors(frequency, url_mth_yr, url_daily, col_types,
    288         col_names_monthly = [:datem, :mom],
    289         col_names_annual  = [:datey, :mom],
    290         col_names_daily   = [:date, :mom])
    291 end
    292 # --------------------------------------------------------------------------------------------------