commit 98848cbd9132330f6dd6abb80fcbffb6141ad731
parent 3032dc570dd21e38320ad173cfd9b1cbe8618e64
Author: Erik Loualiche <eloualic@umn.edu>
Date: Sun, 22 Mar 2026 10:30:57 -0500
Generalize FF parsing with shared helpers for FF5 reuse
- Extract _download_ff_zip and _import_ff_factors shared helpers
- Add col_names parameter to _parse_ff_annual and _parse_ff_monthly
- Fix daily filter to use subset! with generic column names
- import_FF3 now delegates to _import_ff_factors
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
1 file changed, 62 insertions(+), 55 deletions(-)
diff --git a/src/ImportFamaFrench.jl b/src/ImportFamaFrench.jl
@@ -9,10 +9,14 @@
# --------------------------------------------------------------------------------------------------
-# List of exported functions
-# export import_FF3 # read monthly FF3
+# Shared helper: download a Ken French zip and extract the CSV entry
# --------------------------------------------------------------------------------------------------
-
+function _download_ff_zip(url)
+ http_response = Downloads.download(url)
+ z = ZipFile.Reader(http_response)
+ csv_file = filter(x -> match(r".*csv", lowercase(x.name)) !== nothing, z.files)[1]
+ return (z, csv_file)
+end
# --------------------------------------------------------------------------------------------------
@@ -21,13 +25,13 @@
Import Fama-French 3-factor model data directly from Ken French's data library.
-Downloads and parses the Fama-French research data factors (market risk premium,
+Downloads and parses the Fama-French research data factors (market risk premium,
size factor, value factor, and risk-free rate) at the specified frequency.
# Arguments
- `frequency::Symbol=:monthly`: Data frequency to import. Options are:
- `:monthly` - Monthly factor returns (default)
- - `:annual` - Annual factor returns
+ - `:annual` - Annual factor returns
- `:daily` - Daily factor returns
# Returns
@@ -37,7 +41,7 @@ size factor, value factor, and risk-free rate) at the specified frequency.
Where:
- `mktrf`: Market return minus risk-free rate (market risk premium)
-- `smb`: Small minus big (size factor)
+- `smb`: Small minus big (size factor)
- `hml`: High minus low (value factor)
- `rf`: Risk-free rate
@@ -64,110 +68,113 @@ daily_ff = import_FF3(frequency=:daily)
Kenneth R. French Data Library: https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html
"""
function import_FF3(;frequency::Symbol=:monthly)
+ url_mth_yr = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip"
+ url_daily = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_daily_CSV.zip"
+ col_types = [String7, Float64, Float64, Float64, Float64]
+
+ return _import_ff_factors(frequency, url_mth_yr, url_daily, col_types,
+ col_names_monthly = [:datem, :mktrf, :smb, :hml, :rf],
+ col_names_annual = [:datey, :mktrf, :smb, :hml, :rf],
+ col_names_daily = [:date, :mktrf, :smb, :hml, :rf])
+end
+# --------------------------------------------------------------------------------------------------
- ff_col_classes = [String7, Float64, Float64, Float64, Float64];
- url_FF_mth_yr = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip"
- url_FF_daily = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_daily_CSV.zip"
- # ----------------------------------------------------------------------------------------------
- if frequency==:annual
+# --------------------------------------------------------------------------------------------------
+# Shared import logic for FF3/FF5/momentum — handles all three frequencies
+# --------------------------------------------------------------------------------------------------
+function _import_ff_factors(frequency::Symbol, url_mth_yr, url_daily, col_types;
+ col_names_monthly, col_names_annual, col_names_daily)
+
+ if frequency == :annual
- http_response = Downloads.download(url_FF_mth_yr);
- z = ZipFile.Reader(http_response) ;
- a_file_in_zip = filter(x -> match(r".*csv", lowercase(x.name)) != nothing, z.files)[1]
- df_FF3 = copy(_parse_ff_annual(a_file_in_zip, types=ff_col_classes))
+ z, csv_file = _download_ff_zip(url_mth_yr)
+ df = copy(_parse_ff_annual(csv_file, types=col_types, col_names=col_names_annual))
close(z)
- return df_FF3
+ return df
- # ----------------------------------------------------------------------------------------------
- elseif frequency==:monthly
+ elseif frequency == :monthly
- http_response = Downloads.download(url_FF_mth_yr);
- z = ZipFile.Reader(http_response) ;
- a_file_in_zip = filter(x -> match(r".*csv", lowercase(x.name)) != nothing, z.files)[1]
- df_FF3 = copy(_parse_ff_monthly(a_file_in_zip, types=ff_col_classes))
+ z, csv_file = _download_ff_zip(url_mth_yr)
+ df = copy(_parse_ff_monthly(csv_file, types=col_types, col_names=col_names_monthly))
close(z)
-
- transform!(df_FF3, :datem => ByRow(x -> MonthlyDate(x, "yyyymm")) => :datem)
- return df_FF3
-
-
- # ----------------------------------------------------------------------------------------------
- elseif frequency==:daily
-
- http_response = Downloads.download(url_FF_daily);
- z = ZipFile.Reader(http_response) ;
- a_file_in_zip = filter(x -> match(r".*csv", lowercase(x.name)) != nothing, z.files)[1]
- df_FF3 = copy(CSV.File(a_file_in_zip, header=4, footerskip=1) |> DataFrame);
+ transform!(df, col_names_monthly[1] => ByRow(x -> MonthlyDate(x, "yyyymm")) => col_names_monthly[1])
+ return df
+
+ elseif frequency == :daily
+
+ z, csv_file = _download_ff_zip(url_daily)
+ df = copy(CSV.File(csv_file, header=4, footerskip=1) |> DataFrame)
close(z)
- rename!(df_FF3, [:date, :mktrf, :smb, :hml, :rf]);
- df_FF3 = @p df_FF3 |> filter(.!ismissing.(_.date) && .!ismissing.(_.mktrf))
- transform!(df_FF3, :date => ByRow(x -> Date(string(x), "yyyymmdd") ) => :date)
- return df_FF3
+ rename!(df, col_names_daily)
+ date_col = col_names_daily[1]
+ val_col = col_names_daily[2]
+ subset!(df, date_col => ByRow(!ismissing), val_col => ByRow(!ismissing))
+ transform!(df, :date => ByRow(x -> Date(string(x), "yyyymmdd")) => :date)
+ return df
- # ----------------------------------------------------------------------------------------------
else
error("Frequency $frequency not known. Options are :daily, :monthly, or :annual")
end
-
end
# --------------------------------------------------------------------------------------------------
# --------------------------------------------------------------------------------------------------
-function _parse_ff_annual(zip_file; types=nothing)
+function _parse_ff_annual(zip_file; types=nothing,
+ col_names=[:datey, :mktrf, :smb, :hml, :rf])
lines = String[]
found_annual = false
-
+
# Read all lines from the zip file entry
file_lines = split(String(read(zip_file)), '\n')
-
+
for line in file_lines
if occursin(r"Annual Factors", line)
found_annual = true
continue
end
-
+
if found_annual
# Skip the header line that comes after "Annual Factors"
if occursin(r"Mkt-RF|SMB|HML|RF", line)
continue
end
-
+
if occursin(r"^\s*$", line) || occursin(r"[A-Za-z]{3,}", line[1:min(10, length(line))])
- if !occursin(r"^\s*$", line) && !occursin(r"^\s*\d{4}", line) # Added \s*
+ if !occursin(r"^\s*$", line) && !occursin(r"^\s*\d{4}", line)
break
end
continue
end
-
- if occursin(r"^\s*\d{4}", line)
- clean_line = replace(line, r"[\r]" => "")
+
+ if occursin(r"^\s*\d{4}", line)
+ clean_line = replace(line, r"[\r]" => "")
push!(lines, clean_line)
end
end
end
-
+
if !found_annual
error("Annual Factors section not found in file")
end
-
+
lines_buffer = IOBuffer(join(lines, "\n"))
return CSV.File(lines_buffer, header=false, delim=",", ntasks=1, types=types) |> DataFrame |>
- df -> rename!(df, [:datey, :mktrf, :smb, :hml, :rf])
+ df -> rename!(df, col_names)
end
# --------------------------------------------------------------------------------------------------
# --------------------------------------------------------------------------------------------------
-function _parse_ff_monthly(zip_file; types=nothing)
+function _parse_ff_monthly(zip_file; types=nothing,
+ col_names=[:datem, :mktrf, :smb, :hml, :rf])
# Read all lines from the zip file entry
file_lines = split(String(read(zip_file)), '\n')
# Find the first data line (starts with digits, like "192607")
- # instead of hardcoding a skip count that breaks if the header changes
skipto = 1
for (i, line) in enumerate(file_lines)
if occursin(r"^\s*\d{6}", line)
@@ -200,7 +207,7 @@ function _parse_ff_monthly(zip_file; types=nothing)
buffer = IOBuffer(join(data_lines, "\n"))
return CSV.File(buffer, header=false, delim=",", ntasks=1, types=types) |> DataFrame |>
- df -> rename!(df, [:datem, :mktrf, :smb, :hml, :rf])
+ df -> rename!(df, col_names)
end
# --------------------------------------------------------------------------------------------------