commit 4425c933e99620b143718a9ec319dbf5112099bf
parent 2740debc1cf3792f138522b54832abc825678d76
Author: Erik Loualiche <eloualiche@users.noreply.github.com>
Date: Sat, 24 May 2025 11:24:41 -0500
Merge pull request #5 from LouLouLibs/main
Main
Diffstat:
12 files changed, 2268 insertions(+), 271 deletions(-)
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
name = "FinanceRoutines"
uuid = "2e4c0fa2-b49b-4c8f-9592-485f04b9fc03"
authors = ["Erik Loualiche <eloualic@umn.edu>"]
-version = "0.3.1"
+version = "0.4.1"
[deps]
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
diff --git a/README.md b/README.md
@@ -82,11 +82,21 @@ df_FF3_daily = import_FF3(:daily)
The function downloads yield curves from the [NY Fed GSW](https://www.federalreserve.gov/pubs/feds/2006/200628/200628abs.html) and estimate returns based on the curves
```julia
-df_GSW = import_GSW();
-estimate_yield_GSW!(df_GSW; maturity=1); # maturity is in years
-select(df_GSW, :date, :yield_1y)
+df_GSW = import_gsw_parameters(date_range=(Date("1960-01-01"), Dates.today()) )
+FinanceRoutines.add_yields!(df_GSW, [1, 10]) # maturities is in years
+# or compute the yields yourself using functions
+transform!(df_GSW,
+ AsTable(:) => ByRow(row ->
+ begin
+ gsw_params = GSWParameters(row)
+ ismissing(gsw_params) ? missing : gsw_yield(5, gsw_params)
+ end) => :yield_5y,
+ )
```
+See the [doc](https://eloualiche.github.io/FinanceRoutines.jl/) for more options.
+
+
### Common operations in asset pricing
Look in the documentation for a guide on how to estimate betas: over the whole sample and using rolling regressions.
diff --git a/docs/make.jl b/docs/make.jl
@@ -13,7 +13,8 @@ makedocs(
pages=[
"Home" => "index.md",
"Manual" => [
- "man/wrds_guide.md"
+ "man/wrds_guide.md",
+ "man/yield_curve_gsw.md"
],
"Demos" => [
"demo/beta.md",
diff --git a/docs/src/man/yield_curve_gsw.md b/docs/src/man/yield_curve_gsw.md
@@ -0,0 +1,235 @@
+# Import Yield Curve Data
+
+Some utilities for working with Gürkaynak-Sack-Wright (GSW) yield curve data from the New York Fed and Nelson-Siegel-Svensson model calculations.
+Note that some of the code was first written by hand and then reimplemented using AI; while I have tested some functions, you may want to do your own sanity checks.
+
+## Overview
+
+This package provides tools to:
+- Import daily GSW yield curve parameters from the Federal Reserve
+- Calculate yields, prices, and returns using Nelson-Siegel-Svensson models
+- Handle both 3-factor (Nelson-Siegel) and 4-factor (Svensson) model periods
+- Work with time series of bond returns and risk premiums
+
+
+## Installation
+
+```julia
+using FinanceRoutines; # Pkg.add(url="https://github.com/eloualiche/FinanceRoutines.jl")
+```
+
+## Quick Start
+
+```julia
+# Import GSW parameters from the Fed
+df = import_gsw_parameters(date_range=(Date("1960-01-01"), Dates.today()) )
+
+# Add yield calculations for multiple maturities
+FinanceRoutines.add_yields!(df, [1, 2, 5, 10, 30])
+
+# Add bond prices
+FinanceRoutines.add_prices!(df, [1, 5, 10])
+
+# Calculate daily returns for 10-year bonds
+FinanceRoutines.add_returns!(df, 10.0, frequency=:daily, return_type=:log)
+# Calculate excess returns over 3-month rate
+FinanceRoutines.add_excess_returns!(df, 10.0, risk_free_maturity=0.25)
+```
+
+
+## Core Types
+
+### GSWParameters
+
+Structure to hold Nelson-Siegel-Svensson model parameters:
+
+```julia
+# 4-factor Svensson model
+params = GSWParameters(5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+
+# 3-factor Nelson-Siegel model (missing β₃, τ₂)
+params_3f = GSWParameters(5.0, -2.0, 1.5, missing, 2.5, missing)
+
+# From DataFrame row
+params = GSWParameters(df[1, :])
+```
+
+## Core Functions
+
+### Data Import
+
+```julia
+# Import all available data
+df = import_gsw_parameters()
+
+# Import specific date range
+df = import_gsw_parameters(date_range=(Date("2010-01-01"), Date("2020-12-31")))
+```
+
+### Yield Calculations
+
+```julia
+# Single yield calculation
+yield = gsw_yield(10.0, params) # 10-year yield
+yield = gsw_yield(10.0, 5.0, -2.0, 1.5, 0.8, 2.5, 0.5) # Using individual parameters
+
+# Yield curve
+maturities = [0.25, 0.5, 1, 2, 5, 10, 30]
+yields = gsw_yield_curve(maturities, params)
+```
+
+### Price Calculations
+
+```julia
+# Zero-coupon bond prices
+price = gsw_price(10.0, params) # 10-year zero price
+price = gsw_price(10.0, params, face_value=100.0) # Custom face value
+
+# Price curve
+prices = gsw_price_curve(maturities, params)
+```
+
+### Return Calculations
+
+```julia
+# Bond returns between two periods
+params_today = GSWParameters(5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+params_yesterday = GSWParameters(4.9, -1.9, 1.4, 0.9, 2.4, 0.6)
+
+# Daily log return
+ret = gsw_return(10.0, params_today, params_yesterday)
+
+# Monthly arithmetic return
+ret = gsw_return(10.0, params_today, params_yesterday,
+ frequency=:monthly, return_type=:arithmetic)
+
+# Excess return over risk-free rate
+excess_ret = gsw_excess_return(10.0, params_today, params_yesterday)
+```
+
+### Forward Rates
+
+```julia
+# 1-year forward rate starting in 2 years
+fwd_rate = gsw_forward_rate(2.0, 3.0, params)
+```
+
+## DataFrame Operations
+
+### Adding Calculations to DataFrames
+
+```julia
+# Add yields for multiple maturities
+FinanceRoutines.add_yields!(df, [1, 2, 5, 10, 30])
+
+# Add prices with custom face value
+FinanceRoutines.add_prices!(df, [1, 5, 10], face_value=100.0)
+
+# Add daily log returns
+FinanceRoutines.add_returns!(df, 10.0, frequency=:daily, return_type=:log)
+
+# Add monthly arithmetic returns
+FinanceRoutines.add_returns!(df, 5.0, frequency=:monthly, return_type=:arithmetic)
+
+# Add excess returns
+FinanceRoutines.add_excess_returns!(df, 10.0, risk_free_maturity=0.25)
+```
+
+### Column Names
+
+The package creates standardized column names:
+- Yields: `yield_1y`, `yield_10y`, `yield_0.5y`
+- Prices: `price_1y`, `price_10y`, `price_0.5y`
+- Returns: `ret_10y_daily`, `ret_5y_monthly`
+- Excess returns: `excess_ret_10y_daily`
+
+## Convenience Functions
+
+### Yield Curve Snapshots
+
+```julia
+# Create yield curve for a single date
+curve = FinanceRoutines.gsw_curve_snapshot(params)
+curve = FinanceRoutines.gsw_curve_snapshot(5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+
+# Custom maturities
+curve = FinanceRoutines.gsw_curve_snapshot(params, maturities=[1, 3, 5, 7, 10, 20, 30])
+```
+
+## Model Specifications
+
+The package automatically handles two model types:
+
+### 4-Factor Svensson Model
+- Uses all 6 parameters: β₀, β₁, β₂, β₃, τ₁, τ₂
+- More flexible yield curve shapes
+- Used in recent periods
+
+### 3-Factor Nelson-Siegel Model
+- Uses 4 parameters: β₀, β₁, β₂, τ₁ (β₃=0, τ₂=τ₁)
+- Simpler model specification
+- Used in earlier periods or when data is missing
+
+The package automatically detects which model to use based on available parameters.
+
+## Missing Data Handling
+
+- Automatically converts `-999` flag values to `missing`
+- Gracefully handles periods with missing τ₂/β₃ parameters
+- Propagates missing values through calculations appropriately
+
+## Example Analysis
+
+```julia
+using DataFrames, Statistics
+
+# Import data for 1970s and 1980s
+df = import_gsw_parameters(date_range=(Date("1970-01-01"), Date("1989-12-31")))
+
+# Add calculations
+FinanceRoutines.add_yields!(df, 1) # 1-year yields
+FinanceRoutines.add_prices!(df, 1) # 1-year prices
+FinanceRoutines.add_returns!(df, 2, frequency=:daily, return_type=:log) # 2-year daily returns
+
+# Analyze by decade
+transform!(df, :date => (x -> year.(x) .÷ 10 * 10) => :decade)
+
+# Summary statistics
+stats = combine(
+ groupby(df, :decade),
+ :yield_1y => (x -> mean(skipmissing(x))) => :mean_yield,
+ :yield_1y => (x -> std(skipmissing(x))) => :vol_yield,
+ :ret_2y_daily => (x -> mean(skipmissing(x))) => :mean_return,
+ :ret_2y_daily => (x -> std(skipmissing(x))) => :vol_return
+)
+```
+
+
+## API
+## Data Source
+
+GSW yield curve parameters are downloaded from the Federal Reserve Economic Data (FRED):
+- URL: https://www.federalreserve.gov/data/yield-curve-tables/feds200628.csv
+- Updated daily
+- Historical data available from 1961
+
+## References
+
+- Gürkaynak, R. S., B. Sack, and J. H. Wright (2007). "The U.S. Treasury yield curve: 1961 to the present." Journal of Monetary Economics 54(8), 2291-2304.
+- Nelson, C. R. and A. F. Siegel (1987). "Parsimonious modeling of yield curves." Journal of Business 60(4), 473-489.
+- Svensson, L. E. (1994). "Estimating and interpreting forward interest rates: Sweden 1992-1994." NBER Working Paper No. 4871.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/FinanceRoutines.jl b/src/FinanceRoutines.jl
@@ -1,11 +1,11 @@
module FinanceRoutines
-# ------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------------
import Downloads
import ZipFile
import CSV
-import DataFrames: AbstractDataFrame, AsTable, DataFrame, ByRow, groupby, nrow, passmissing, Not,
+import DataFrames: AbstractDataFrame, AsTable, DataFrame, DataFrameRow, ByRow, groupby, nrow, passmissing, Not,
rename!, select, select!, subset!, transform!, leftjoin, disallowmissing!
import DataPipes: @p
import Dates: Dates, Date, Day, Month, year
@@ -25,10 +25,10 @@ import ShiftedArrays: lag
import Tables: columntable
import WeakRefStrings: String3, String7, String15
import ZipFile: ZipFile.Reader
-# ------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------------
-# ------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------------
# Import functions
include("Utilities.jl")
include("betas.jl")
@@ -37,16 +37,18 @@ include("ImportYields.jl")
include("ImportCRSP.jl")
include("ImportComp.jl")
include("Merge_CRSP_Comp.jl")
-# ------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------------
-# ------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------------
# List of exported functions
export greet_FinanceRoutines # for debugging
# Yields on Treasuries
-export import_GSW
-export estimate_yield_GSW!, estimate_price_GSW!, estimate_return_GSW!
+export import_gsw_parameters # basic data import function
+export GSWParameters # the GSW type of yield curve calculations
+export gsw_yield, gsw_price, gsw_forward_rate, gsw_yield_curve, gsw_price_curve,
+ gsw_return, gsw_excess_return
# Fama-French data
export import_FF3
@@ -65,9 +67,8 @@ export link_MSF
# More practical functions
export calculate_rolling_betas
+# --------------------------------------------------------------------------------------------------
-# ------------------------------------------------------------------------------------------
-
-
+# --------------------------------------------------------------------------------------------------
end
diff --git a/src/ImportCRSP.jl b/src/ImportCRSP.jl
@@ -15,40 +15,6 @@
# ------------------------------------------------------------------------------------------
-# ------------------------------------------------------------------------------------------
-# function list_crsp(;
-# wrds_conn, user, password)
-
-# list_libraries = """
-# WITH RECURSIVE "names"("name") AS (
-# SELECT n.nspname AS "name"
-# FROM pg_catalog.pg_namespace n
-# WHERE n.nspname !~ '^pg_'
-# AND n.nspname <> 'information_schema')
-# SELECT "name"
-# FROM "names"
-# WHERE pg_catalog.has_schema_privilege(
-# current_user, "name", 'USAGE') = TRUE;
-# """
-# res_list_libraries = execute(wrds_conn, list_libraries);
-# df_libraries = DataFrame(columntable(res_list_libraries))
-# @rsubset(df_libraries, occursin(r"crsp", :name) )
-
-# library = "crsp"
-# list_tables = """
-# SELECT table_name FROM INFORMATION_SCHEMA.views
-# WHERE table_schema IN ('$library');
-# """
-# res_list_tables = execute(wrds_conn, list_tables);
-# df_tables = DataFrame(columntable(res_list_tables))
-# @rsubset(df_tables, occursin(r"mse", :table_name) )
-
-# return run_sql_query(conn, query)
-
-
-# end
-# ------------------------------------------------------------------------------------------
-
# ------------------------------------------------------------------------------------------
"""
@@ -77,14 +43,14 @@ function import_MSF(wrds_conn::Connection;
# -- GETTING COLUMN NAMES
# download potential columns
- msenames_columns = get_postgres_columns("crsp", "msenames"; wrds_conn=wrds_conn,
+ msenames_columns = _get_postgres_columns("crsp", "msenames"; wrds_conn=wrds_conn,
prior_columns = vcat(["PERMNO", "NAMEDT", "NAMEENDT", "SHRCD", "EXCHCD", "HEXCD",
"NAICS", "HSICCD", "CUSIP"],
uppercase.(variables))
)
msenames_columns = join(uppercase.(msenames_columns), ", ")
- msf_columns = get_postgres_columns("crsp", "msf"; wrds_conn=wrds_conn,
+ msf_columns = _get_postgres_columns("crsp", "msf"; wrds_conn=wrds_conn,
prior_columns = vcat(["PERMNO","PERMCO","DATE","PRC","ALTPRC","RET","RETX","SHROUT","CFACPR","CFACSHR"],
uppercase.(variables))
)
@@ -339,16 +305,23 @@ function import_MSF_v2(wrds_conn::Connection;
# ----------------------------------------------------------------------------------------------
# the easy way
@log_msg "# -- GETTING MONTHLY STOCK FILE (CIZ) ... msf_v2"
- # msf_columns = get_postgres_columns("crsp", "msf_v2"; wrds_conn=wrds_conn) |> sort
+ msf_v2_columns = _get_postgres_columns("crsp", "msf_v2"; wrds_conn=wrds_conn) |> sort
+ col_select = ["permno", "hdrcusip", "mthcaldt", "mthprc", "mthret", "mthcap", "shrout",
+ "mthretx", "mthprevcap", "mthprevprc", "permco"]
+ col_query = @p vcat(col_select, variables) |>
+ uppercase.(__) |> filter(!isempty) |> filter(_ ∈ msf_v2_columns)
+ # note that selecting all variables to download here is a lot slower than with msf_v1 because of the many more variables ...
+
postgre_query_msf = """
- SELECT *
- FROM crsp.msf_v2
- WHERE MTHCALDT >= '$(string(date_range[1]))' AND MTHCALDT <= '$(string(date_range[2]))'
- AND SHARETYPE = 'NS' AND SECURITYTYPE = 'EQTY' AND SECURITYSUBTYPE = 'COM'
- AND USINCFLG = 'Y' AND ISSUERTYPE IN ('ACOR', 'CORP')
- AND PRIMARYEXCH IN ('N', 'A', 'Q') AND CONDITIONALTYPE = 'RW' AND TRADINGSTATUSFLG = 'A'
+ SELECT $(join(col_query, ", "))
+ FROM crsp.msf_v2
+ WHERE MTHCALDT >= '$(string(date_range[1]))' AND MTHCALDT <= '$(string(date_range[2]))'
+ AND SHARETYPE = 'NS' AND SECURITYTYPE = 'EQTY' AND SECURITYSUBTYPE = 'COM'
+ AND USINCFLG = 'Y' AND ISSUERTYPE IN ('ACOR', 'CORP')
+ AND PRIMARYEXCH IN ('N', 'A', 'Q') AND CONDITIONALTYPE = 'RW' AND TRADINGSTATUSFLG = 'A'
"""
df_msf_v2 = execute(wrds_conn, postgre_query_msf) |> DataFrame;
+
transform!(df_msf_v2, # clean up the dataframe
names(df_msf_v2, check_integer.(eachcol(df_msf_v2))) .=> (x->convert.(Union{Missing, Int}, x));
renamecols = false);
@@ -360,8 +333,8 @@ function import_MSF_v2(wrds_conn::Connection;
# the hard way
# ------
log_msg("# -- GETTING MONTHLY STOCK FILE (CIZ) ... stkmthsecuritydata")
- msf_columns = get_postgres_columns("crsp", "stkmthsecuritydata"; wrds_conn=wrds_conn) # download potential columns
- # msf_columns = get_postgres_columns("crsp", "msf_v2"; wrds_conn=wrds_conn) # this one is pre-merged!
+ msf_columns = _get_postgres_columns("crsp", "stkmthsecuritydata"; wrds_conn=wrds_conn) # download potential columns
+ # msf_columns = _get_postgres_columns("crsp", "msf_v2"; wrds_conn=wrds_conn) # this one is pre-merged!
msf_columns = join(uppercase.(msf_columns), ", ")
# legacy SIZ to CIZ conversion of shrcd flag (see doc)
@@ -389,7 +362,7 @@ function import_MSF_v2(wrds_conn::Connection;
# -- need to get shrout
- # stkshares = get_postgres_columns("crsp", "stkshares"; wrds_conn=wrds_conn)
+ # stkshares = _get_postgres_columns("crsp", "stkshares"; wrds_conn=wrds_conn)
postgre_query_stkshares = """
SELECT * FROM crsp.stkshares
WHERE SHRSTARTDT >= '$(string(date_range[1]))' AND SHRENDDT <= '$(string(date_range[2]))'
@@ -410,12 +383,12 @@ function import_MSF_v2(wrds_conn::Connection;
# ----------------------------------------------------------------------------------------------
# ------
@log_msg "# -- GETTING StkSecurityInfoHist (CIZ)"
- # stksecurityinfo = get_postgres_columns("crsp", "stksecurityinfohist"; wrds_conn=wrds_conn)
+ # stksecurityinfo = _get_postgres_columns("crsp", "stksecurityinfohist"; wrds_conn=wrds_conn)
stksecurityinfo_cols = vcat(
["PERMNO", "SecInfoStartDt", "SecInfoEndDt", "IssuerNm", "ShareClass",
"PrimaryExch", "TradingStatusFlg", "NAICS", "SICCD", "HDRCUSIP"],
uppercase.(variables)) |> filter(!isempty) |> unique
- stksecurityinfo = get_postgres_columns("crsp", "stksecurityinfohist"; wrds_conn=wrds_conn,
+ stksecurityinfo = _get_postgres_columns("crsp", "stksecurityinfohist"; wrds_conn=wrds_conn,
prior_columns = stksecurityinfo_cols) |> sort
stksecurityinfo_cols = join(uppercase.(stksecurityinfo_cols), ", ")
@@ -540,8 +513,8 @@ function import_DSF_v2(wrds_conn::Connection;
# could pick either way ...
- # dsf_columns = get_postgres_columns("crsp", "dsf_v2"; wrds_conn=wrds_conn) |> sort
- # stkmthsecuritydata_columns = get_postgres_columns("crsp", "stkdlysecuritydata"; wrds_conn=wrds_conn) |> sort
+ # dsf_columns = _get_postgres_columns("crsp", "dsf_v2"; wrds_conn=wrds_conn) |> sort
+ # stkmthsecuritydata_columns = _get_postgres_columns("crsp", "stkdlysecuritydata"; wrds_conn=wrds_conn) |> sort
# set up the query for msf
postgre_query_dsf = """
@@ -590,7 +563,7 @@ end
# WHERE table_schema = \$1
# """
# postgres_res = execute(wrds_conn, postgres_query, (table_schema,))
-function get_postgres_columns(table_schema, table_name; wrds_conn, prior_columns::Vector{String} = [""])
+function _get_postgres_columns(table_schema, table_name; wrds_conn, prior_columns::Vector{String} = [""])
# download potential columns
postgres_query= """
@@ -611,7 +584,7 @@ function get_postgres_columns(table_schema, table_name; wrds_conn, prior_columns
end
-function get_postgres_table(table_schema, table_name; wrds_conn, prior_columns::Vector{String} = [""])
+function _get_postgres_table(table_schema, table_name; wrds_conn, prior_columns::Vector{String} = [""])
if isempty(prior_columns) || prior_columns == [""]
columns = "*"
@@ -627,5 +600,39 @@ function get_postgres_table(table_schema, table_name; wrds_conn, prior_columns::
postgres_res = execute(wrds_conn, postgres_query)
return columntable(postgres_res)
end
+# --------------------------------------------------------------------------------------------------
+
+# --------------------------------------------------------------------------------------------------
+# function list_crsp(;
+# wrds_conn, user, password)
+# list_libraries = """
+# WITH RECURSIVE "names"("name") AS (
+# SELECT n.nspname AS "name"
+# FROM pg_catalog.pg_namespace n
+# WHERE n.nspname !~ '^pg_'
+# AND n.nspname <> 'information_schema')
+# SELECT "name"
+# FROM "names"
+# WHERE pg_catalog.has_schema_privilege(
+# current_user, "name", 'USAGE') = TRUE;
+# """
+# res_list_libraries = execute(wrds_conn, list_libraries);
+# df_libraries = DataFrame(columntable(res_list_libraries))
+# @rsubset(df_libraries, occursin(r"crsp", :name) )
+
+# library = "crsp"
+# list_tables = """
+# SELECT table_name FROM INFORMATION_SCHEMA.views
+# WHERE table_schema IN ('$library');
+# """
+# res_list_tables = execute(wrds_conn, list_tables);
+# df_tables = DataFrame(columntable(res_list_tables))
+# @rsubset(df_tables, occursin(r"mse", :table_name) )
+
+# return run_sql_query(conn, query)
+
+
+# end
+# --------------------------------------------------------------------------------------------------
diff --git a/src/ImportFamaFrench.jl b/src/ImportFamaFrench.jl
@@ -1,92 +1,197 @@
-# ------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------------
# ImportFamaFrench.jl
# Collection of functions that import
-# financial data into julia
-# ------------------------------------------------------------------------------------------
+# financial data from Ken French's website into julia
+# --------------------------------------------------------------------------------------------------
-# ------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------------
# List of exported functions
-# export greet_FinanceRoutines # for debugging
# export import_FF3 # read monthly FF3
-# ------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------------
-# ------------------------------------------------------------------------------------------
-function greet_FinanceRoutines()
- return "Hello FinanceRoutines!"
-end
-# ------------------------------------------------------------------------------------------
-
-
-
-# ------------------------------------------------------------------------------------------
-function import_FF3()
+# --------------------------------------------------------------------------------------------------
+"""
+ import_FF3(;frequency::Symbol=:monthly) -> DataFrame
+
+Import Fama-French 3-factor model data directly from Ken French's data library.
+
+Downloads and parses the Fama-French research data factors (market risk premium,
+size factor, value factor, and risk-free rate) at the specified frequency.
+
+# Arguments
+- `frequency::Symbol=:monthly`: Data frequency to import. Options are:
+ - `:monthly` - Monthly factor returns (default)
+ - `:annual` - Annual factor returns
+ - `:daily` - Daily factor returns
+
+# Returns
+- `DataFrame`: Fama-French 3-factor data with columns:
+ - **Monthly/Annual**: `datem`/`datey`, `mktrf`, `smb`, `hml`, `rf`
+ - **Daily**: `date`, `mktrf`, `smb`, `hml`, `rf`
+
+Where:
+- `mktrf`: Market return minus risk-free rate (market risk premium)
+- `smb`: Small minus big (size factor)
+- `hml`: High minus low (value factor)
+- `rf`: Risk-free rate
+
+# Examples
+```julia
+# Import monthly data (default)
+monthly_ff = import_FF3()
+
+# Import annual data
+annual_ff = import_FF3(frequency=:annual)
+
+# Import daily data
+daily_ff = import_FF3(frequency=:daily)
+```
+
+# Notes
+- Data is sourced directly from Kenneth French's data library at Dartmouth
+- Monthly and annual data excludes the daily/monthly breakdowns respectively
+- Date formats are automatically parsed to appropriate Julia date types
+- Missing values are filtered out from the datasets
+- Requires internet connection to download data
+
+# Data Source
+Kenneth R. French Data Library: https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html
+"""
+function import_FF3(;frequency::Symbol=:monthly)
- url_FF = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip"
ff_col_classes = [String7, Float64, Float64, Float64, Float64];
- row_lim = div(MonthlyDate(Dates.today()) - MonthlyDate(1926, 7), Dates.Month(1))
-
- http_response = Downloads.download(url_FF);
- z = ZipFile.Reader(http_response) ;
- a_file_in_zip = filter(x -> match(r".*csv", lowercase(x.name)) != nothing, z.files)[1]
- df_FF3 = copy(
- CSV.File(a_file_in_zip,
- skipto=5, header=4, limit=row_lim, delim=",", ntasks=1,
- types=ff_col_classes) |>
- DataFrame);
- close(z)
-
- rename!(df_FF3, [:datem, :mktrf, :smb, :hml, :rf])
- df_FF3 = @p df_FF3 |> filter(.!ismissing.(_.datem) && isequal.( length.(strip(_.datem)), 6 ) )
- df_FF3 = @p df_FF3 |> filter(.!ismissing.(_.mktrf))
-
- transform!(df_FF3, :datem => ByRow(x -> MonthlyDate(x, "yyyymm")) => :datem)
- df_FF3 = @p df_FF3 |> filter(_.datem .>= MonthlyDate("1900-01", "yyyy-mm"))
-
- return df_FF3
-end
-
+ url_FF_mth_yr = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip"
+ url_FF_daily = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_daily_CSV.zip"
+ # ----------------------------------------------------------------------------------------------
+ if frequency==:annual
-"""
- import_FF3(frequency::Symbol)
+ http_response = Downloads.download(url_FF_mth_yr);
+ z = ZipFile.Reader(http_response) ;
+ a_file_in_zip = filter(x -> match(r".*csv", lowercase(x.name)) != nothing, z.files)[1]
+ df_FF3 = copy(_parse_ff_annual(a_file_in_zip, types=ff_col_classes))
+ close(z)
+ return df_FF3
-Download and import the Fama-French 3 Factors from Ken French website.
+ # ----------------------------------------------------------------------------------------------
+ elseif frequency==:monthly
-If `frequency` is unspecified, import the monthly research returns.
-If `frequency` is :daily, import the daily research returns.
+ http_response = Downloads.download(url_FF_mth_yr);
+ z = ZipFile.Reader(http_response) ;
+ a_file_in_zip = filter(x -> match(r".*csv", lowercase(x.name)) != nothing, z.files)[1]
+ df_FF3 = copy(_parse_ff_monthly(a_file_in_zip, types=ff_col_classes))
+ close(z)
-"""
-function import_FF3(frequency::Symbol)
+ transform!(df_FF3, :datem => ByRow(x -> MonthlyDate(x, "yyyymm")) => :datem)
+ return df_FF3
- if frequency==:monthly
- return import_FF3()
+ # ----------------------------------------------------------------------------------------------
elseif frequency==:daily
- url_FF = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_daily_CSV.zip"
-
- http_response = Downloads.download(url_FF);
-
+
+ http_response = Downloads.download(url_FF_daily);
z = ZipFile.Reader(http_response) ;
a_file_in_zip = filter(x -> match(r".*csv", lowercase(x.name)) != nothing, z.files)[1]
df_FF3 = copy(CSV.File(a_file_in_zip, header=4, footerskip=1) |> DataFrame);
close(z)
-
rename!(df_FF3, [:date, :mktrf, :smb, :hml, :rf]);
df_FF3 = @p df_FF3 |> filter(.!ismissing.(_.date) && .!ismissing.(_.mktrf))
-
transform!(df_FF3, :date => ByRow(x -> Date(string(x), "yyyymmdd") ) => :date)
-
return df_FF3
+ # ----------------------------------------------------------------------------------------------
else
- @warn "Frequency $frequency not known. Try :daily or leave blank for :monthly"
+ error("Frequency $frequency not known. Options are :daily, :monthly, or :annual")
+ end
+
+end
+# --------------------------------------------------------------------------------------------------
+
+
+# --------------------------------------------------------------------------------------------------
+function _parse_ff_annual(zip_file; types=nothing)
+
+ lines = String[]
+ found_annual = false
+
+ # Read all lines from the zip file entry
+ file_lines = split(String(read(zip_file)), '\n')
+
+ for line in file_lines
+ if occursin(r"Annual Factors", line)
+ found_annual = true
+ continue
+ end
+
+ if found_annual
+ # Skip the header line that comes after "Annual Factors"
+ if occursin(r"Mkt-RF|SMB|HML|RF", line)
+ continue
+ end
+
+ if occursin(r"^\s*$", line) || occursin(r"[A-Za-z]{3,}", line[1:min(10, length(line))])
+ if !occursin(r"^\s*$", line) && !occursin(r"^\d{4}", line)
+ break
+ end
+ continue
+ end
+
+ if occursin(r"^\d{4}", line)
+ push!(lines, line)
+ end
+ end
+ end
+
+ if !found_annual
+ error("Annual Factors section not found in file")
+ end
+
+ buffer = IOBuffer(join(lines, "\n"))
+ return CSV.File(buffer, header=false, delim=",", ntasks=1, types=types) |> DataFrame |>
+ df -> rename!(df, [:datey, :mktrf, :smb, :hml, :rf])
+end
+# --------------------------------------------------------------------------------------------------
+
+
+# --------------------------------------------------------------------------------------------------
+function _parse_ff_monthly(zip_file; types=nothing)
+
+
+ # Read all lines from the zip file entry
+ file_lines = split(String(read(zip_file)), '\n')
+ skipto = 5
+
+ # Collect data lines until we hit "Annual Factors"
+ data_lines = String[]
+
+ for i in skipto:length(file_lines)
+ line = file_lines[i]
+
+ # Stop when we hit Annual Factors section
+ if occursin(r"Annual Factors", line)
+ break
+ end
+
+ # Skip empty lines
+ if occursin(r"^\s*$", line)
+ continue
+ end
+
+ # Add non-empty data lines
+ push!(data_lines, line)
end
+
+ # Create IOBuffer with header + data
+ buffer = IOBuffer(join(data_lines, "\n"))
+
+ return CSV.File(buffer, header=false, delim=",", ntasks=1, types=types) |> DataFrame |>
+ df -> rename!(df, [:datem, :mktrf, :smb, :hml, :rf])
end
-# ------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------------
diff --git a/src/ImportYields.jl b/src/ImportYields.jl
@@ -1,151 +1,1327 @@
-# ------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------------
# ImportYields.jl
# Collection of functions that import Treasury Yields data
-# ------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------------
+
+
+# --------------------------------------------------------------------------------------------------
+# GSW Parameter Type Definition
+# --------------------------------------------------------------------------------------------------
+
+"""
+ GSWParameters
+
+Structure to hold Gürkaynak-Sack-Wright Nelson-Siegel-Svensson model parameters.
+
+# Fields
+- `β₀::Float64`: Level parameter (BETA0)
+- `β₁::Float64`: Slope parameter (BETA1)
+- `β₂::Float64`: Curvature parameter (BETA2)
+- `β₃::Float64`: Second curvature parameter (BETA3) - may be missing if model uses 3-factor version
+- `τ₁::Float64`: First decay parameter (TAU1, must be positive)
+- `τ₂::Float64`: Second decay parameter (TAU2, must be positive) - may be missing if model uses 3-factor version
+
+# Examples
+```julia
+# Create GSW parameters manually (4-factor model)
+params = GSWParameters(5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+
+# Create GSW parameters for 3-factor model (when τ₂/β₃ are missing)
+params_3factor = GSWParameters(5.0, -2.0, 1.5, missing, 2.5, missing)
+
+# Create from DataFrame row
+df = import_gsw_parameters()
+params = GSWParameters(df[1, :]) # First row
+
+# Access individual parameters
+println("Level: ", params.β₀)
+println("Slope: ", params.β₁)
+```
+
+# Notes
+- Constructor validates that available decay parameters are positive
+- Handles missing values for τ₂ and β₃ (common when using 3-factor Nelson-Siegel model)
+- When τ₂ or β₃ are missing, the model degenerates to the 3-factor Nelson-Siegel form
+- Can be constructed from DataFrameRow for convenience
+"""
+struct GSWParameters
+ β₀::Union{Float64, Missing} # Level
+ β₁::Union{Float64, Missing} # Slope
+ β₂::Union{Float64, Missing} # Curvature 1
+ β₃::Union{Float64, Missing} # Curvature 2 (may be missing for 3-factor model)
+ τ₁::Union{Float64, Missing} # Decay 1 (must be positive when present)
+ τ₂::Union{Float64, Missing} # Decay 2 (may be missing for 3-factor model)
+
+ # Inner constructor with validation
+ function GSWParameters(β₀, β₁, β₂, β₃, τ₁, τ₂)
+
+ # Check if core parameters are missing
+ if ismissing(β₀) || ismissing(β₁) || ismissing(β₂) || ismissing(τ₁)
+ return missing
+ end
+
+ # Validate that non-missing decay parameters are positive
+ if !ismissing(τ₁) && τ₁ <= 0
+ throw(ArgumentError("First decay parameter τ₁ must be positive when present, got τ₁=$τ₁"))
+ end
+ if !ismissing(τ₂) && τ₂ <= 0
+ throw(ArgumentError("Second decay parameter τ₂ must be positive when present, got τ₂=$τ₂"))
+ end
+
+ # Convert to appropriate types
+ new(
+ ismissing(β₀) ? missing : Float64(β₀),
+ ismissing(β₁) ? missing : Float64(β₁),
+ ismissing(β₂) ? missing : Float64(β₂),
+ ismissing(β₃) ? missing : Float64(β₃),
+ ismissing(τ₁) ? missing : Float64(τ₁),
+ ismissing(τ₂) ? missing : Float64(τ₂)
+ )
+ end
+end
+
+# Convenience constructors
+"""
+ GSWParameters(row::DataFrameRow)
+
+Create GSWParameters from a DataFrame row containing BETA0, BETA1, BETA2, BETA3, TAU1, TAU2 columns.
+Handles missing values (including -999 flags) gracefully.
+"""
+function GSWParameters(row::DataFrameRow)
+ return GSWParameters(row.BETA0, row.BETA1, row.BETA2, row.BETA3, row.TAU1, row.TAU2)
+end
+
+"""
+ GSWParameters(row::NamedTuple)
+
+Create GSWParameters from a NamedTuple containing the required fields.
+Handles missing values (including -999 flags) gracefully.
+"""
+function GSWParameters(row::NamedTuple)
+ return GSWParameters(row.BETA0, row.BETA1, row.BETA2, row.BETA3, row.TAU1, row.TAU2)
+end
+
+
+"""
+ is_three_factor_model(params::GSWParameters)
+
+Check if GSW parameters represent a 3-factor Nelson-Siegel model (missing β₃ and τ₂).
+
+# Returns
+- `Bool`: true if this is a 3-factor model, false if 4-factor Svensson model
+"""
+function is_three_factor_model(params::GSWParameters)
+ return ismissing(params.β₃) || ismissing(params.τ₂)
+end
+
+# Helper function to extract parameters as tuple, handling missing values
+"""
+ _extract_params(params::GSWParameters)
+
+Extract parameters as tuple for use in calculation functions.
+For 3-factor models, uses τ₁ for both decay parameters and sets β₃=0.
+"""
+function _extract_params(params::GSWParameters)
+ # Handle 3-factor vs 4-factor models
+ if is_three_factor_model(params)
+ # For 3-factor model: set β₃=0 and use τ₁ for both decay parameters
+ β₃ = 0.0
+ τ₂ = ismissing(params.τ₂) ? params.τ₁ : params.τ₂
+ else
+ β₃ = params.β₃
+ τ₂ = params.τ₂
+ end
+
+ return (params.β₀, params.β₁, params.β₂, β₃, params.τ₁, τ₂)
+end
+# --------------------------------------------------------------------------------------------------
+
+
+
+# --------------------------------------------------------------------------------------------------
+"""
+ import_gsw_parameters(; date_range=nothing, validate=true)
+
+Import Gürkaynak-Sack-Wright (GSW) yield curve parameters from the Federal Reserve.
+
+Downloads the daily GSW yield curve parameter estimates from the Fed's website and returns
+a cleaned DataFrame with the Nelson-Siegel-Svensson model parameters.
+
+# Arguments
+- `date_range::Union{Nothing, Tuple{Date, Date}}`: Optional date range for filtering data.
+ If `nothing`, returns all available data. Default: `nothing`
+- `validate::Bool`: Whether to validate input parameters and data quality. Default: `true`
+
+# Returns
+- `DataFrame`: Contains columns `:date`, `:BETA0`, `:BETA1`, `:BETA2`, `:BETA3`, `:TAU1`, `:TAU2`
+
+# Throws
+- `ArgumentError`: If date range is invalid
+- `HTTP.ExceptionRequest.StatusError`: If download fails
+- `Exception`: If data parsing fails
+
+# Examples
+```julia
+# Import all available data
+df = import_gsw_parameters()
+
+# Import data for specific date range
+df = import_gsw_parameters(date_range=(Date("2020-01-01"), Date("2023-12-31")))
+
+# Import without validation (faster, but less safe)
+df = import_gsw_parameters(validate=false)
+```
+
+# Notes
+- Data source: Federal Reserve Economic Data (FRED)
+- The GSW model uses the Nelson-Siegel-Svensson functional form
+- Missing values in the original data are converted to `missing`
+- Data is automatically sorted by date
+- Additional variables:
+ - Zero-coupon yield,Continuously Compounded,SVENYXX
+ - Par yield,Coupon-Equivalent,SVENPYXX
+ - Instantaneous forward rate,Continuously Compounded,SVENFXX
+ - One-year forward rate,Coupon-Equivalent,SVEN1FXX
+
+"""
+function import_gsw_parameters(;
+ date_range::Union{Nothing, Tuple{Date, Date}} = nothing,
+ additional_variables::Vector{Symbol}=Symbol[],
+ validate::Bool = true)
+
+
+ # Download data with error handling
+ @info "Downloading GSW Yield Curve Parameters from Federal Reserve"
+
+ try
+ url_gsw = "https://www.federalreserve.gov/data/yield-curve-tables/feds200628.csv"
+ temp_file = Downloads.download(url_gsw)
+
+ # Parse CSV with proper error handling
+ df_gsw = CSV.read(temp_file, DataFrame,
+ skipto=11,
+ header=10,
+ silencewarnings=true)
+
+ # Clean up temporary file
+ rm(temp_file, force=true)
+
+ # Clean and process the data
+ df_clean = _clean_gsw_data(df_gsw, date_range; additional_variables=additional_variables)
+
+
+ if validate
+ _validate_gsw_data(df_clean)
+ end
+
+ @info "Successfully imported $(nrow(df_clean)) rows of GSW parameters"
+ return df_clean
+
+ catch e
+ if e isa Downloads.RequestError
+ throw(ArgumentError("Failed to download GSW data from Federal Reserve. Check internet connection."))
+ elseif e isa CSV.Error
+ throw(ArgumentError("Failed to parse GSW data. The file format may have changed."))
+ else
+ rethrow(e)
+ end
+ end
+end
+
+
+
+"""
+ _clean_gsw_data(df_raw, date_range)
+
+Clean and format the raw GSW data from the Federal Reserve.
+"""
+function _clean_gsw_data(df_raw::DataFrame,
+ date_range::Union{Nothing, Tuple{Date, Date}};
+ additional_variables::Vector{Symbol}=Symbol[])
+
+
+ # Make a copy to avoid modifying original
+ df = copy(df_raw)
+ # Standardize column names
+ rename!(df, "Date" => "date")
+
+ # Apply date filtering if specified
+ if !isnothing(date_range)
+ start_date, end_date = date_range
+ if start_date > end_date
+ @warn "starting date posterior to end date ... shuffling them around"
+ start_date, end_date = min(start_date, end_date), max(start_date, end_date)
+ end
+ filter!(row -> start_date <= row.date <= end_date, df)
+ end
+
+ # Select and order relevant columns
+ parameter_cols = vcat(
+ [:BETA0, :BETA1, :BETA2, :BETA3, :TAU1, :TAU2],
+ intersect(additional_variables, propertynames(df))
+ ) |> unique
+ select!(df, :date, parameter_cols...)
+
+ # Convert parameter columns to Float64, handling missing values
+ for col in parameter_cols
+ transform!(df, col => ByRow(_safe_parse_float) => col)
+ end
+
+ # Sort by date for consistency
+ sort!(df, :date)
+
+ return df
+end
+
+"""
+ _safe_parse_float(value)
+
+Safely parse a value to Float64, returning missing for unparseable values.
+Handles common flag values for missing data in economic datasets.
+"""
+function _safe_parse_float(value)
+ if ismissing(value) || value == ""
+ return missing
+ end
+
+ # Handle string values
+ if value isa AbstractString
+ parsed = tryparse(Float64, strip(value))
+ if isnothing(parsed)
+ return missing
+ end
+ value = parsed
+ end
+
+ # Handle numeric values and check for common missing data flags
+ try
+ numeric_value = Float64(value)
+
+ # Check for common missing data flags used in economic datasets
+ # -999, -9999 are common flags for missing/unavailable data
+ if numeric_value == -999.99
+ return missing
+ end
+
+ return numeric_value
+ catch
+ return missing
+ end
+end
+
+"""
+ _validate_gsw_data(df)
+
+Validate the cleaned GSW data for basic quality checks.
+"""
+function _validate_gsw_data(df::DataFrame)
+ if nrow(df) == 0
+ throw(ArgumentError("No data found for the specified date range"))
+ end
+
+ # Check for required columns
+ required_cols = [:date, :BETA0, :BETA1, :BETA2, :BETA3, :TAU1, :TAU2]
+ missing_cols = setdiff(required_cols, propertynames(df))
+ if !isempty(missing_cols)
+ throw(ArgumentError("Missing required columns: $(missing_cols)"))
+ end
+
+ # Check for reasonable parameter ranges (basic sanity check)
+ param_cols = [:BETA0, :BETA1, :BETA2, :BETA3, :TAU1, :TAU2]
+ for col in param_cols
+ col_data = skipmissing(df[!, col]) |> collect
+ if length(col_data) == 0
+ @warn "Column $col contains only missing values"
+ end
+ end
+
+ # Check date continuity (warn if there are large gaps)
+ if nrow(df) > 1
+ date_diffs = diff(df.date)
+ large_gaps = findall(x -> x > Day(7), date_diffs)
+ if !isempty(large_gaps)
+ @warn "Found $(length(large_gaps)) gaps larger than 7 days in the data"
+ end
+ end
+end
+# --------------------------------------------------------------------------------------------------
+
+
+
+# --------------------------------------------------------------------------------------------------
+# GSW Core Calculation Functions
+
+# Method 1: Using GSWParameters struct (preferred for clean API)
+"""
+ gsw_yield(maturity, params::GSWParameters)
+
+Calculate yield from GSW Nelson-Siegel-Svensson parameters using parameter struct.
+
+# Arguments
+- `maturity::Real`: Time to maturity in years (must be positive)
+- `params::GSWParameters`: GSW parameter struct
+
+# Returns
+- `Float64`: Yield in percent (e.g., 5.0 for 5%)
+
+# Examples
+```julia
+params = GSWParameters(5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+yield = gsw_yield(10.0, params)
+```
+"""
+function gsw_yield(maturity::Real, params::GSWParameters)
+ return gsw_yield(maturity, _extract_params(params)...)
+end
+
+# Method 2: Using individual parameters (for flexibility and backward compatibility)
+"""
+ gsw_yield(maturity, β₀, β₁, β₂, β₃, τ₁, τ₂)
+
+Calculate yield from Gürkaynak-Sack-Wright Nelson-Siegel-Svensson parameters.
+
+Computes the yield for a given maturity using the Nelson-Siegel-Svensson functional form
+with the GSW parameter estimates. Automatically handles 3-factor vs 4-factor models.
+
+# Arguments
+- `maturity::Real`: Time to maturity in years (must be positive)
+- `β₀::Real`: Level parameter (BETA0)
+- `β₁::Real`: Slope parameter (BETA1)
+- `β₂::Real`: Curvature parameter (BETA2)
+- `β₃::Real`: Second curvature parameter (BETA3) - set to 0 or missing for 3-factor model
+- `τ₁::Real`: First decay parameter
+- `τ₂::Real`: Second decay parameter - can equal τ₁ for 3-factor model
+
+# Returns
+- `Float64`: Yield in percent (e.g., 5.0 for 5%)
+
+# Throws
+- `ArgumentError`: If maturity is non-positive or τ parameters are non-positive
+
+# Examples
+```julia
+# Calculate 1-year yield (4-factor model)
+yield = gsw_yield(1.0, 5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+
+# Calculate 10-year yield (3-factor model, β₃=0)
+yield = gsw_yield(10.0, 5.0, -2.0, 1.5, 0.0, 2.5, 2.5)
+```
+
+# Notes
+- Based on the Nelson-Siegel-Svensson functional form
+- When β₃=0 or τ₂=τ₁, degenerates to 3-factor Nelson-Siegel model
+- Returns yield in percentage terms (not decimal)
+- Function is vectorizable: use `gsw_yield.(maturities, β₀, β₁, β₂, β₃, τ₁, τ₂)`
+"""
+function gsw_yield(maturity::Real,
+ β₀::Real, β₁::Real, β₂::Real, β₃::Real, τ₁::Real, τ₂::Real)
+
+ # Input validation
+ if maturity <= 0
+ throw(ArgumentError("Maturity must be positive, got $maturity"))
+ end
+
+ # Handle any missing values
+ if any(ismissing, [β₀, β₁, β₂, β₃, τ₁, τ₂])
+ return missing
+ end
+
+ # For 3-factor model compatibility: if β₃ is 0 or very small, skip the fourth term
+ use_four_factor = !ismissing(β₃) && abs(β₃) > 1e-10 && !ismissing(τ₂) && τ₂ > 0
+
+ # Nelson-Siegel-Svensson formula
+ t = Float64(maturity)
+
+ # Calculate decay terms
+ exp_t_τ₁ = exp(-t/τ₁)
+
+ # yield terms
+ term1 = β₀ # Level
+ term2 = β₁ * (1.0 - exp_t_τ₁) / (t/τ₁) # Slope
+ term3 = β₂ * ((1.0 - exp_t_τ₁) / (t/τ₁) - exp_t_τ₁) # First curvature
+
+ # Fourth term only for 4-factor Svensson model
+ term4 = if use_four_factor
+ exp_t_τ₂ = exp(-t/τ₂)
+ β₃ * ((1.0 - exp_t_τ₂) / (t/τ₂) - exp_t_τ₂) # Second curvature
+ else
+ 0.0
+ end
+
+ yield = term1 + term2 + term3 + term4
+
+ return Float64(yield)
+end
+
+# Method 1: Using GSWParameters struct
+"""
+ gsw_price(maturity, params::GSWParameters; face_value=1.0)
+
+Calculate zero-coupon bond price from GSW parameters using parameter struct.
+
+# Arguments
+- `maturity::Real`: Time to maturity in years (must be positive)
+- `params::GSWParameters`: GSW parameter struct
+- `face_value::Real`: Face value of the bond (default: 1.0)
+
+# Returns
+- `Float64`: Bond price
+
+# Examples
+```julia
+params = GSWParameters(5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+price = gsw_price(10.0, params)
+```
+"""
+function gsw_price(maturity::Real, params::GSWParameters; face_value::Real = 1.0)
+ return gsw_price(maturity, _extract_params(params)..., face_value=face_value)
+end
+
+# Method 2: Using individual parameters
+"""
+ gsw_price(maturity, β₀, β₁, β₂, β₃, τ₁, τ₂; face_value=1.0)
+
+Calculate zero-coupon bond price from GSW Nelson-Siegel-Svensson parameters.
+
+Computes the price of a zero-coupon bond using the yield derived from GSW parameters.
+
+# Arguments
+- `maturity::Real`: Time to maturity in years (must be positive)
+- `β₀::Real`: Level parameter (BETA0)
+- `β₁::Real`: Slope parameter (BETA1)
+- `β₂::Real`: Curvature parameter (BETA2)
+- `β₃::Real`: Second curvature parameter (BETA3)
+- `τ₁::Real`: First decay parameter
+- `τ₂::Real`: Second decay parameter
+- `face_value::Real`: Face value of the bond (default: 1.0)
+
+# Returns
+- `Float64`: Bond price
+
+# Throws
+- `ArgumentError`: If maturity is non-positive, τ parameters are non-positive, or face_value is non-positive
+
+# Examples
+```julia
+# Calculate price of 1-year zero-coupon bond
+price = gsw_price(1.0, 5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+
+# Calculate price with different face value
+price = gsw_price(1.0, 5.0, -2.0, 1.5, 0.8, 2.5, 0.5, face_value=1000.0)
+```
+
+# Notes
+- Uses continuous compounding: P = F * exp(-r * t)
+- Yield is converted from percentage to decimal for calculation
+- Function is vectorizable: use `gsw_price.(maturities, β₀, β₁, β₂, β₃, τ₁, τ₂)`
+"""
+function gsw_price(maturity::Real, β₀::Real, β₁::Real, β₂::Real, β₃::Real, τ₁::Real, τ₂::Real;
+ face_value::Real = 1.0)
+
+ # Input validation
+ if maturity <= 0
+ throw(ArgumentError("Maturity must be positive, got $maturity"))
+ end
+ if face_value <= 0
+ throw(ArgumentError("Face value must be positive, got $face_value"))
+ end
+
+ # Handle any missing values
+ if any(ismissing, [β₀, β₁, β₂, β₃, τ₁, τ₂, maturity, face_value])
+ return missing
+ end
+
+ # Get yield in percentage terms
+ yield_percent = gsw_yield(maturity, β₀, β₁, β₂, β₃, τ₁, τ₂)
+
+ if ismissing(yield_percent)
+ return missing
+ end
+
+ # Convert to decimal and calculate price using continuous compounding
+ continuous_rate = log(1.0 + yield_percent / 100.0)
+ price = face_value * exp(-continuous_rate * maturity)
+
+ return Float64(price)
+end
+
+# Method 1: Using GSWParameters struct
+"""
+ gsw_forward_rate(maturity₁, maturity₂, params::GSWParameters)
+
+Calculate instantaneous forward rate between two maturities using GSW parameter struct.
+
+# Arguments
+- `maturity₁::Real`: Start maturity in years (must be positive and < maturity₂)
+- `maturity₂::Real`: End maturity in years (must be positive and > maturity₁)
+- `params::GSWParameters`: GSW parameter struct
+
+# Returns
+- `Float64`: Forward rate (decimal rate)
+
+# Examples
+```julia
+params = GSWParameters(5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+fwd_rate = gsw_forward_rate(2.0, 3.0, params)
+```
+"""
+function gsw_forward_rate(maturity₁::Real, maturity₂::Real, params::GSWParameters)
+ return gsw_forward_rate(maturity₁, maturity₂, _extract_params(params)...)
+end
+
+# Method 2: Using individual parameters
+"""
+ gsw_forward_rate(maturity₁, maturity₂, β₀, β₁, β₂, β₃, τ₁, τ₂)
+
+Calculate instantaneous forward rate between two maturities using GSW parameters.
+
+# Arguments
+- `maturity₁::Real`: Start maturity in years (must be positive and < maturity₂)
+- `maturity₂::Real`: End maturity in years (must be positive and > maturity₁)
+- `β₀, β₁, β₂, β₃, τ₁, τ₂`: GSW parameters
+# Returns
+- `Float64`: Forward rate (decimal rate)
+# Examples
+```julia
+# Calculate 1-year forward rate starting in 2 years
+fwd_rate = gsw_forward_rate(2.0, 3.0, 5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+```
+"""
+function gsw_forward_rate(maturity₁::Real, maturity₂::Real,
+ β₀::Real, β₁::Real, β₂::Real, β₃::Real, τ₁::Real, τ₂::Real)
+
+ if maturity₁ <= 0 || maturity₂ <= maturity₁
+ throw(ArgumentError("Must have 0 < maturity₁ < maturity₂, got maturity₁=$maturity₁, maturity₂=$maturity₂"))
+ end
+
+ # Handle missing values
+ if any(ismissing, [β₀, β₁, β₂, β₃, τ₁, τ₂, maturity₁, maturity₂])
+ return missing
+ end
+
+ # Get prices at both maturities
+ p₁ = gsw_price(maturity₁, β₀, β₁, β₂, β₃, τ₁, τ₂)
+ p₂ = gsw_price(maturity₂, β₀, β₁, β₂, β₃, τ₁, τ₂)
+
+ if ismissing(p₁) || ismissing(p₂)
+ return missing
+ end
+
+ # Calculate forward rate: f = -ln(P₂/P₁) / (T₂ - T₁)
+ forward_rate_decimal = -log(p₂ / p₁) / (maturity₂ - maturity₁)
+
+ # Convert to percentage
+ return Float64(forward_rate_decimal)
+end
+
+# ------------------------------------------------------------------------------------------
+# Vectorized convenience functions
# ------------------------------------------------------------------------------------------
+
"""
- import_GSW(; date_range)
+ gsw_yield_curve(maturities, params::GSWParameters)
- GSW Curves
+Calculate yields for multiple maturities using GSW parameter struct.
-# arguments
- - `date_range::Tuple{Date, Date}`: range for selection of data
+# Arguments
+- `maturities::AbstractVector{<:Real}`: Vector of maturities in years
+- `params::GSWParameters`: GSW parameter struct
+# Returns
+- `Vector{Float64}`: Vector of yields in percent
+
+# Examples
+```julia
+params = GSWParameters(5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+maturities = [0.25, 0.5, 1, 2, 5, 10, 30]
+yields = gsw_yield_curve(maturities, params)
+```
"""
-function import_GSW(;
- date_range::Tuple{Date, Date} = (Date("1900-01-01"), Dates.today()) )
+function gsw_yield_curve(maturities::AbstractVector{<:Real}, params::GSWParameters)
+ return gsw_yield.(maturities, Ref(params))
+end
-# Download the curves from the Fed
- @info "Downloading GSW Yield Curve Tables"
- url_GSW = "https://www.federalreserve.gov/data/yield-curve-tables/feds200628.csv";
- http_response = Downloads.download(url_GSW);
- df_gsw = CSV.read(http_response, DataFrame, skipto=11, header=10)
+"""
+ gsw_yield_curve(maturities, β₀, β₁, β₂, β₃, τ₁, τ₂)
- # clean up the table
- rename!(df_gsw, "Date" => "date");
- @p df_gsw |> filter!( (_.date >= date_range[1]) && (_.date <= date_range[2]) )
- select!(df_gsw, :date, :BETA0, :BETA1, :BETA2, :BETA3, :TAU1, :TAU2);
- transform!(df_gsw, [:BETA0, :BETA1, :BETA2, :BETA3, :TAU1, :TAU2] .=>
- ByRow(c -> tryparse(Float64, c) |> (x-> isnothing(x) ? missing : x) ), renamecols=false)
+Calculate yields for multiple maturities using GSW parameters.
+# Arguments
+- `maturities::AbstractVector{<:Real}`: Vector of maturities in years
+- `β₀, β₁, β₂, β₃, τ₁, τ₂`: GSW parameters
- return df_gsw
+# Returns
+- `Vector{Float64}`: Vector of yields in percent
+# Examples
+```julia
+maturities = [0.25, 0.5, 1, 2, 5, 10, 30]
+yields = gsw_yield_curve(maturities, 5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+```
+"""
+function gsw_yield_curve(maturities::AbstractVector{<:Real}, β₀::Real, β₁::Real, β₂::Real, β₃::Real, τ₁::Real, τ₂::Real)
+ return gsw_yield.(maturities, β₀, β₁, β₂, β₃, τ₁, τ₂)
end
"""
- estimate_yield_GSW!
+ gsw_price_curve(maturities, params::GSWParameters; face_value=1.0)
+
+Calculate zero-coupon bond prices for multiple maturities using GSW parameter struct.
+
+# Arguments
+- `maturities::AbstractVector{<:Real}`: Vector of maturities in years
+- `params::GSWParameters`: GSW parameter struct
+- `face_value::Real`: Face value of bonds (default: 1.0)
-# arguments
- - `maturity::Real`: in years
+# Returns
+- `Vector{Float64}`: Vector of bond prices
+# Examples
+```julia
+params = GSWParameters(5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+maturities = [0.25, 0.5, 1, 2, 5, 10, 30]
+prices = gsw_price_curve(maturities, params)
+```
"""
-function estimate_yield_GSW!(df::DataFrame;
- maturity::Real=1)
+function gsw_price_curve(maturities::AbstractVector{<:Real}, params::GSWParameters; face_value::Real = 1.0)
+ return gsw_price.(maturities, Ref(params), face_value=face_value)
+end
+
+"""
+ gsw_price_curve(maturities, β₀, β₁, β₂, β₃, τ₁, τ₂; face_value=1.0)
- # @rtransform!(df,
- # :y=NSSparamtoYield(maturity, :BETA0, :BETA1, :BETA2, :BETA3, :TAU1, :TAU2) )
- transform!(df,
- AsTable([:BETA0, :BETA1, :BETA2, :BETA3, :TAU1, :TAU2]) =>
- ByRow(n -> NSSparamtoYield(maturity, n.BETA0, n.BETA1, n.BETA2, n.BETA3, n.TAU1, n.TAU2) ) =>
- :y)
+Calculate zero-coupon bond prices for multiple maturities using GSW parameters.
- rename!(df, "y" => "yield_$(maturity)y")
+# Arguments
+- `maturities::AbstractVector{<:Real}`: Vector of maturities in years
+- `β₀, β₁, β₂, β₃, τ₁, τ₂`: GSW parameters
+- `face_value::Real`: Face value of bonds (default: 1.0)
+# Returns
+- `Vector{Float64}`: Vector of bond prices
+
+# Examples
+```julia
+maturities = [0.25, 0.5, 1, 2, 5, 10, 30]
+prices = gsw_price_curve(maturities, 5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+```
+"""
+function gsw_price_curve(maturities::AbstractVector{<:Real}, β₀::Real, β₁::Real, β₂::Real, β₃::Real, τ₁::Real, τ₂::Real;
+ face_value::Real = 1.0)
+ return gsw_price.(maturities, β₀, β₁, β₂, β₃, τ₁, τ₂, face_value=face_value)
end
+# --------------------------------------------------------------------------------------------------
+
+
+# --------------------------------------------------------------------------------------------------
+# Return calculation functions
+# ------------------------------------------------------------------------------------------
+
+# Method 1: Using individual parameters
"""
- estimate_price_GSW!
+ gsw_return(maturity, β₀_t, β₁_t, β₂_t, β₃_t, τ₁_t, τ₂_t,
+ β₀_t₋₁, β₁_t₋₁, β₂_t₋₁, β₃_t₋₁, τ₁_t₋₁, τ₂_t₋₁;
+ frequency=:daily, return_type=:log)
+
+Calculate bond return between two periods using GSW parameters.
+
+Computes the return on a zero-coupon bond between two time periods by comparing
+the price today (with aged maturity) to the price in the previous period.
+
+# Arguments
+- `maturity::Real`: Original maturity of the bond in years
+- `β₀_t, β₁_t, β₂_t, β₃_t, τ₁_t, τ₂_t`: GSW parameters at time t
+- `β₀_t₋₁, β₁_t₋₁, β₂_t₋₁, β₃_t₋₁, τ₁_t₋₁, τ₂_t₋₁`: GSW parameters at time t-1
+- `frequency::Symbol`: Return frequency (:daily, :monthly, :annual)
+- `return_type::Symbol`: :log for log returns, :arithmetic for simple returns
+
+# Returns
+- `Float64`: Bond return
+
+# Examples
+```julia
+# Daily log return on 10-year bond
+ret = gsw_return(10.0, 5.0, -2.0, 1.5, 0.8, 2.5, 0.5, # today's params
+ 4.9, -1.9, 1.4, 0.9, 2.4, 0.6) # yesterday's params
+
+# Monthly arithmetic return
+ret = gsw_return(5.0, 5.0, -2.0, 1.5, 0.8, 2.5, 0.5,
+ 4.9, -1.9, 1.4, 0.9, 2.4, 0.6,
+ frequency=:monthly, return_type=:arithmetic)
+```
+"""
+function gsw_return(maturity::Real,
+ β₀_t::Real, β₁_t::Real, β₂_t::Real, β₃_t::Real, τ₁_t::Real, τ₂_t::Real,
+ β₀_t₋₁::Real, β₁_t₋₁::Real, β₂_t₋₁::Real, β₃_t₋₁::Real, τ₁_t₋₁::Real, τ₂_t₋₁::Real;
+ frequency::Symbol = :daily,
+ return_type::Symbol = :log)
+
+ # Input validation
+ if maturity <= 0
+ throw(ArgumentError("Maturity must be positive, got $maturity"))
+ end
+
+ valid_frequencies = [:daily, :monthly, :annual]
+ if frequency ∉ valid_frequencies
+ throw(ArgumentError("frequency must be one of $valid_frequencies, got $frequency"))
+ end
+
+ valid_return_types = [:log, :arithmetic]
+ if return_type ∉ valid_return_types
+ throw(ArgumentError("return_type must be one of $valid_return_types, got $return_type"))
+ end
+
+ # Handle missing values
+ all_params = [β₀_t, β₁_t, β₂_t, β₃_t, τ₁_t, τ₂_t, β₀_t₋₁, β₁_t₋₁, β₂_t₋₁, β₃_t₋₁, τ₁_t₋₁, τ₂_t₋₁]
+ if any(ismissing, all_params)
+ return missing
+ end
+
+ # Determine time step based on frequency
+ Δt = if frequency == :daily
+ 1/360 # Using 360-day year convention
+ elseif frequency == :monthly
+ 1/12
+ elseif frequency == :annual
+ 1.0
+ end
+
+ # Calculate prices
+ # P_t: Price today of bond with remaining maturity (maturity - Δt)
+ aged_maturity = max(maturity - Δt, 0.001) # Avoid zero maturity
+ price_today = gsw_price(aged_maturity, β₀_t, β₁_t, β₂_t, β₃_t, τ₁_t, τ₂_t)
+
+ # P_t₋₁: Price yesterday of bond with original maturity
+ price_previous = gsw_price(maturity, β₀_t₋₁, β₁_t₋₁, β₂_t₋₁, β₃_t₋₁, τ₁_t₋₁, τ₂_t₋₁)
+
+ if ismissing(price_today) || ismissing(price_previous)
+ return missing
+ end
+
+ # Calculate return
+ if return_type == :log
+ return log(price_today / price_previous)
+ else # arithmetic
+ return (price_today - price_previous) / price_previous
+ end
+end
-# arguments
- - `maturity::Real`: in years
+# Method 2: Using GSWParameters structs
"""
-function estimate_price_GSW!(df::DataFrame;
- maturity::Real=1)
+ gsw_return(maturity, params_t::GSWParameters, params_t₋₁::GSWParameters; frequency=:daily, return_type=:log)
- transform!(df,
- AsTable([:BETA0, :BETA1, :BETA2, :BETA3, :TAU1, :TAU2]) =>
- ByRow(n -> NSSparamtoPrice(maturity, n.BETA0, n.BETA1, n.BETA2, n.BETA3, n.TAU1, n.TAU2) ) =>
- :y)
+Calculate bond return between two periods using GSW parameter structs.
+# Arguments
+- `maturity::Real`: Original maturity of the bond in years
+- `params_t::GSWParameters`: GSW parameters at time t
+- `params_t₋₁::GSWParameters`: GSW parameters at time t-1
+- `frequency::Symbol`: Return frequency (:daily, :monthly, :annual)
+- `return_type::Symbol`: :log for log returns, :arithmetic for simple returns
- rename!(df, "y" => "price_$(maturity)y")
+# Returns
+- `Float64`: Bond return
+# Examples
+```julia
+params_today = GSWParameters(5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+params_yesterday = GSWParameters(4.9, -1.9, 1.4, 0.9, 2.4, 0.6)
+ret = gsw_return(10.0, params_today, params_yesterday)
+```
+"""
+function gsw_return(maturity::Real, params_t::GSWParameters, params_t₋₁::GSWParameters;
+ frequency::Symbol = :daily, return_type::Symbol = :log)
+ return gsw_return(maturity, _extract_params(params_t)..., _extract_params(params_t₋₁)...,
+ frequency=frequency, return_type=return_type)
end
+
+
+# Method 1: Using GSWParameters structs
"""
- estimate_return_GSW!
+ gsw_excess_return(maturity, params_t::GSWParameters, params_t₋₁::GSWParameters;
+ risk_free_maturity=0.25, frequency=:daily, return_type=:log)
+
+Calculate excess return of a bond over the risk-free rate using GSW parameter structs.
+
+# Arguments
+- `maturity::Real`: Original maturity of the bond in years
+- `params_t::GSWParameters`: GSW parameters at time t
+- `params_t₋₁::GSWParameters`: GSW parameters at time t-1
+- `risk_free_maturity::Real`: Maturity for risk-free rate calculation (default: 0.25 for 3-month)
+- `frequency::Symbol`: Return frequency (:daily, :monthly, :annual)
+- `return_type::Symbol`: :log for log returns, :arithmetic for simple returns
-# arguments
- - `maturity::Real`: in years
- - `frequency::Symbol`: :daily, :monthly, :annual type
- - `type::Symbol`: :log or standard one-period arithmetic return
+# Returns
+- `Float64`: Excess return (bond return - risk-free return)
+# Examples
+```julia
+params_today = GSWParameters(5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+params_yesterday = GSWParameters(4.9, -1.9, 1.4, 0.9, 2.4, 0.6)
+excess_ret = gsw_excess_return(10.0, params_today, params_yesterday)
+```
"""
-function estimate_return_GSW!(df::DataFrame;
- maturity::Real=1, frequency::Symbol=:daily, type::Symbol=:log)
+function gsw_excess_return(maturity::Real, params_t::GSWParameters, params_t₋₁::GSWParameters;
+ risk_free_maturity::Real = 0.25,
+ frequency::Symbol = :daily,
+ return_type::Symbol = :log)
+ return gsw_excess_return(maturity, _extract_params(params_t)..., _extract_params(params_t₋₁)...,
+ risk_free_maturity=risk_free_maturity, frequency=frequency, return_type=return_type)
+end
+
+# Method 2: Using individual parameters
+"""
+ gsw_excess_return(maturity, β₀_t, β₁_t, β₂_t, β₃_t, τ₁_t, τ₂_t,
+ β₀_t₋₁, β₁_t₋₁, β₂_t₋₁, β₃_t₋₁, τ₁_t₋₁, τ₂_t₋₁;
+ risk_free_maturity=0.25, frequency=:daily, return_type=:log)
+
+Calculate excess return of a bond over the risk-free rate.
+
+# Arguments
+- Same as `gsw_return` plus:
+- `risk_free_maturity::Real`: Maturity for risk-free rate calculation (default: 0.25 for 3-month)
- if frequency==:daily
- Δmaturity = 1/360; Δdays = 1;
- elseif frequency==:monthly
- Δmaturity = 1 / 12; Δdays = 30;
- elseif frequency==:annual
- Δmaturity = 1; Δdays = 360;
+# Returns
+- `Float64`: Excess return (bond return - risk-free return)
+"""
+function gsw_excess_return(maturity::Real,
+ β₀_t::Real, β₁_t::Real, β₂_t::Real, β₃_t::Real, τ₁_t::Real, τ₂_t::Real,
+ β₀_t₋₁::Real, β₁_t₋₁::Real, β₂_t₋₁::Real, β₃_t₋₁::Real, τ₁_t₋₁::Real, τ₂_t₋₁::Real;
+ risk_free_maturity::Real = 0.25,
+ frequency::Symbol = :daily,
+ return_type::Symbol = :log)
+
+ # Calculate bond return
+ bond_return = gsw_return(maturity, β₀_t, β₁_t, β₂_t, β₃_t, τ₁_t, τ₂_t,
+ β₀_t₋₁, β₁_t₋₁, β₂_t₋₁, β₃_t₋₁, τ₁_t₋₁, τ₂_t₋₁,
+ frequency=frequency, return_type=return_type)
+
+ # Calculate risk-free return
+ rf_return = gsw_return(risk_free_maturity, β₀_t, β₁_t, β₂_t, β₃_t, τ₁_t, τ₂_t,
+ β₀_t₋₁, β₁_t₋₁, β₂_t₋₁, β₃_t₋₁, τ₁_t₋₁, τ₂_t₋₁,
+ frequency=frequency, return_type=return_type)
+
+ if ismissing(bond_return) || ismissing(rf_return)
+ return missing
end
+
+ return bond_return - rf_return
+end
+# --------------------------------------------------------------------------------------------------
- sort!(df, :date)
- # @rtransform!(df,
- # :p2=NSSparamtoPrice(maturity, :BETA0, :BETA1, :BETA2, :BETA3, :TAU1, :TAU2),
- # :p1=NSSparamtoPrice(maturity+Δmaturity, :BETA0, :BETA1, :BETA2, :BETA3, :TAU1, :TAU2) );
- transform!(df,
- AsTable([:BETA0, :BETA1, :BETA2, :BETA3, :TAU1, :TAU2]) =>
- ByRow(n -> NSSparamtoPrice(maturity,
- n.BETA0, n.BETA1, n.BETA2, n.BETA3, n.TAU1, n.TAU2) ) => :p2,
- AsTable([:BETA0, :BETA1, :BETA2, :BETA3, :TAU1, :TAU2]) =>
- ByRow(n -> NSSparamtoPrice(maturity + Δmaturity,
- n.BETA0, n.BETA1, n.BETA2, n.BETA3, n.TAU1, n.TAU2) ) => :p1
- )
- transform!(df, [:date, :p1] => ((d,p) -> tlag(d, p, Day(Δdays))) => :lag_p1)
- if type==:log
- transform!(df, [:p2, :lag_p1] => ByRow( (p,lp) -> log(p/lp) ) => "ret_$(maturity)y_$(frequency)" );
- else
- transform!(df, [:p2, :lag_p1] => ByRow( (p,lp) -> (p-lp) / lp ) => "ret_$(maturity)y_$(frequency)" );
+
+# --------------------------------------------------------------------------------------------------
+# GSW DataFrame Wrapper Functions
+# ------------------------------------------------------------------------------------------
+
+"""
+ add_yields!(df, maturities; validate=true)
+
+Add yield calculations to a DataFrame containing GSW parameters.
+
+Adds columns with yields for specified maturities using the Nelson-Siegel-Svensson
+model parameters in the DataFrame.
+
+# Arguments
+- `df::DataFrame`: DataFrame containing GSW parameters (must have columns: BETA0, BETA1, BETA2, BETA3, TAU1, TAU2)
+- `maturities::Union{Real, AbstractVector{<:Real}}`: Maturity or vector of maturities in years
+- `validate::Bool`: Whether to validate DataFrame structure (default: true)
+
+# Returns
+- `DataFrame`: Modified DataFrame with additional yield columns named `yield_Xy` (e.g., `yield_1y`, `yield_10y`)
+
+# Examples
+```julia
+df = import_gsw_parameters()
+
+# Add single maturity
+add_yields!(df, 10.0)
+
+# Add multiple maturities
+add_yields!(df, [1, 2, 5, 10, 30])
+
+# Add with custom maturity (fractional)
+add_yields!(df, [0.25, 0.5, 1.0])
+```
+
+# Notes
+- Modifies the DataFrame in place
+- Column names use format: `yield_Xy` where X is the maturity
+- Handles missing parameter values gracefully
+- Validates required columns are present
+"""
+function add_yields!(df::DataFrame, maturities::Union{Real, AbstractVector{<:Real}};
+ validate::Bool = true)
+
+ if validate
+ _validate_gsw_dataframe(df)
end
- select!(df, Not([:lag_p1, :p1, :p2]) )
- select!(df, [:date, Symbol("ret_$(maturity)y_$(frequency)")],
- Not([:date, Symbol("ret_$(maturity)y_$(frequency)")]) )
+
+ # Ensure maturities is a vector
+ mat_vector = maturities isa Real ? [maturities] : collect(maturities)
+
+ # Validate maturities
+ if any(m -> m <= 0, mat_vector)
+ throw(ArgumentError("All maturities must be positive"))
+ end
+
+ # Add yield columns using GSWParameters struct
+ for maturity in mat_vector
+ col_name = _maturity_to_column_name("yield", maturity)
+
+ transform!(df,
+ AsTable([:BETA0, :BETA1, :BETA2, :BETA3, :TAU1, :TAU2]) =>
+ ByRow(function(params)
+ gsw_params = GSWParameters(params)
+ if ismissing(gsw_params)
+ return missing
+ else
+ return gsw_yield(maturity, gsw_params)
+ end
+ end) => col_name)
+ end
+
+ return df
+end
+"""
+ add_prices!(df, maturities; face_value=100.0, validate=true)
+
+Add zero-coupon bond price calculations to a DataFrame containing GSW parameters.
+
+# Arguments
+- `df::DataFrame`: DataFrame containing GSW parameters
+- `maturities::Union{Real, AbstractVector{<:Real}}`: Maturity or vector of maturities in years
+- `face_value::Real`: Face value of bonds (default: 100.0)
+- `validate::Bool`: Whether to validate DataFrame structure (default: true)
+
+# Returns
+- `DataFrame`: Modified DataFrame with additional price columns named `price_Xy`
+
+# Examples
+```julia
+df = import_gsw_parameters()
+
+# Add prices for multiple maturities
+add_prices!(df, [1, 5, 10])
+
+# Add prices with different face value
+add_prices!(df, 10.0, face_value=1000.0)
+```
+"""
+function add_prices!(df::DataFrame, maturities::Union{Real, AbstractVector{<:Real}};
+ face_value::Real = 100.0, validate::Bool = true)
+
+ if validate
+ _validate_gsw_dataframe(df)
+ end
+
+ if face_value <= 0
+ throw(ArgumentError("Face value must be positive, got $face_value"))
+ end
+
+ # Ensure maturities is a vector
+ mat_vector = maturities isa Real ? [maturities] : collect(maturities)
+
+ # Validate maturities
+ if any(m -> m <= 0, mat_vector)
+ throw(ArgumentError("All maturities must be positive"))
+ end
+
+ # Add price columns using GSWParameters struct
+ for maturity in mat_vector
+ col_name = _maturity_to_column_name("price", maturity)
+
+ transform!(df,
+ AsTable([:BETA0, :BETA1, :BETA2, :BETA3, :TAU1, :TAU2]) =>
+ ByRow(function(params)
+ gsw_params = GSWParameters(params)
+ if ismissing(gsw_params)
+ return missing
+ else
+ return gsw_price(maturity, gsw_params, face_value=face_value)
+ end
+ end) => col_name)
+ end
+
return df
+end
+
+"""
+ add_returns!(df, maturity; frequency=:daily, return_type=:log, validate=true)
+
+Add bond return calculations to a DataFrame containing GSW parameters.
+
+Calculates returns by comparing bond prices across time periods. Requires DataFrame
+to be sorted by date and contain consecutive time periods.
+
+# Arguments
+- `df::DataFrame`: DataFrame containing GSW parameters and dates (must have :date column)
+- `maturity::Real`: Bond maturity in years
+- `frequency::Symbol`: Return frequency (:daily, :monthly, :annual)
+- `return_type::Symbol`: :log for log returns, :arithmetic for simple returns
+- `validate::Bool`: Whether to validate DataFrame structure (default: true)
+
+# Returns
+- `DataFrame`: Modified DataFrame with return column named `ret_Xy_frequency`
+ (e.g., `ret_10y_daily`, `ret_5y_monthly`)
+# Examples
+```julia
+df = import_gsw_parameters()
+
+# Add daily log returns for 10-year bond
+add_returns!(df, 10.0)
+
+# Add monthly arithmetic returns for 5-year bond
+add_returns!(df, 5.0, frequency=:monthly, return_type=:arithmetic)
+```
+
+# Notes
+- Requires DataFrame to be sorted by date
+- First row will have missing return (no previous period)
+- Uses lag of parameters to calculate returns properly
+"""
+function add_returns!(df::DataFrame, maturity::Real;
+ frequency::Symbol = :daily,
+ return_type::Symbol = :log,
+ validate::Bool = true)
+
+ if validate
+ _validate_gsw_dataframe(df, check_date=true)
+ end
+
+ if maturity <= 0
+ throw(ArgumentError("Maturity must be positive, got $maturity"))
+ end
+
+ valid_frequencies = [:daily, :monthly, :annual]
+ if frequency ∉ valid_frequencies
+ throw(ArgumentError("frequency must be one of $valid_frequencies, got $frequency"))
+ end
+
+ valid_return_types = [:log, :arithmetic]
+ if return_type ∉ valid_return_types
+ throw(ArgumentError("return_type must be one of $valid_return_types, got $return_type"))
+ end
+
+ # Sort by date to ensure proper time series order
+ sort!(df, :date)
+
+ # Determine time step based on frequency
+ time_step = if frequency == :daily
+ Day(1)
+ elseif frequency == :monthly
+ Day(30) # Approximate
+ elseif frequency == :annual
+ Day(360) # Using 360-day year
+ end
+
+ # Create lagged parameter columns using PanelShift.jl
+ param_cols = [:BETA0, :BETA1, :BETA2, :BETA3, :TAU1, :TAU2]
+ for col in param_cols
+ lag_col = Symbol("lag_$col")
+ transform!(df, [:date, col] =>
+ ((dates, values) -> tlag(dates, values, time_step)) =>
+ lag_col)
+ end
+
+ # Calculate returns using current and lagged parameters
+ col_name = Symbol(string(_maturity_to_column_name("ret", maturity)) * "_" * string(frequency))
+
+ transform!(df,
+ AsTable(vcat(param_cols, [Symbol("lag_$col") for col in param_cols])) =>
+ ByRow(params -> begin
+ current_params = GSWParameters(params.BETA0, params.BETA1, params.BETA2,
+ params.BETA3, params.TAU1, params.TAU2)
+ lagged_params = GSWParameters(params.lag_BETA0, params.lag_BETA1, params.lag_BETA2,
+ params.lag_BETA3, params.lag_TAU1, params.lag_TAU2)
+ if ismissing(current_params) || ismissing(lagged_params)
+ missing
+ else
+ gsw_return(maturity, current_params, lagged_params,
+ frequency=frequency, return_type=return_type)
+ end
+ end
+ ) => col_name)
+
+ # Clean up temporary lagged columns
+ select!(df, Not([Symbol("lag_$col") for col in param_cols]))
+
+ # Reorder columns to put return column first (after date)
+ if :date in names(df)
+ other_cols = filter(col -> col ∉ [:date, col_name], names(df))
+ select!(df, :date, col_name, other_cols...)
+ end
+
+ return df
end
-# ------------------------------------------------------------------------------------------
+"""
+ add_excess_returns!(df, maturity; risk_free_maturity=0.25, frequency=:daily, return_type=:log, validate=true)
+
+Add excess return calculations (bond return - risk-free return) to DataFrame.
+
+# Arguments
+- Same as `add_returns!` plus:
+- `risk_free_maturity::Real`: Maturity for risk-free rate (default: 0.25 for 3-month)
+
+# Returns
+- `DataFrame`: Modified DataFrame with excess return column named `excess_ret_Xy_frequency`
+"""
+function add_excess_returns!(df::DataFrame, maturity::Real;
+ risk_free_maturity::Real = 0.25,
+ frequency::Symbol = :daily,
+ return_type::Symbol = :log,
+ validate::Bool = true)
+
+ if validate
+ _validate_gsw_dataframe(df, check_date=true)
+ end
+
+ # Add regular returns first (will be cleaned up)
+ temp_df = copy(df)
+ add_returns!(temp_df, maturity, frequency=frequency, return_type=return_type, validate=false)
+ add_returns!(temp_df, risk_free_maturity, frequency=frequency, return_type=return_type, validate=false)
+
+ # Calculate excess returns
+ bond_ret_col = Symbol(string(_maturity_to_column_name("ret", maturity)) * "_" * string(frequency))
+ rf_ret_col = Symbol(string(_maturity_to_column_name("ret", risk_free_maturity)) * "_" * string(frequency))
+ excess_col = Symbol(string(_maturity_to_column_name("excess_ret", maturity)) * "_" * string(frequency))
+
+ transform!(temp_df, [bond_ret_col, rf_ret_col] =>
+ ByRow((bond_ret, rf_ret) -> ismissing(bond_ret) || ismissing(rf_ret) ? missing : bond_ret - rf_ret) =>
+ excess_col)
+
+ # Add only the excess return column to original DataFrame
+ df[!, excess_col] = temp_df[!, excess_col]
+
+ return df
+end
+
+# ------------------------------------------------------------------------------------------
+# Convenience functions
# ------------------------------------------------------------------------------------------
-function NSSparamtoPrice(t, B0, B1, B2, B3, T1, T2)
- r = B0 .+ B1.*((1.0 .- exp.(-t/T1))/(t/T1))+ B2*(((1-exp(-t/T1))/(t/T1))-exp(-t/T1)) +
- B3*(((1-exp(-t/T2))/(t/T2))-exp(-t/T2))
- r = log(1 + r/100)
- p = exp(-r*t)
- return(p)
+
+"""
+ gsw_curve_snapshot(params::GSWParameters; maturities=[0.25, 0.5, 1, 2, 5, 10, 30])
+
+Create a snapshot DataFrame of yields and prices for GSW parameters using parameter struct.
+
+# Arguments
+- `params::GSWParameters`: GSW parameter struct
+- `maturities::AbstractVector`: Vector of maturities to calculate (default: standard curve)
+
+# Returns
+- `DataFrame`: Contains columns :maturity, :yield, :price
+
+# Examples
+```julia
+params = GSWParameters(5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+curve = gsw_curve_snapshot(params)
+
+# Custom maturities
+curve = gsw_curve_snapshot(params, maturities=[0.5, 1, 3, 5, 7, 10, 20, 30])
+```
+"""
+function gsw_curve_snapshot(params::GSWParameters;
+ maturities::AbstractVector = [0.25, 0.5, 1, 2, 5, 10, 30])
+
+ yields = gsw_yield_curve(maturities, params)
+ prices = gsw_price_curve(maturities, params)
+
+ return DataFrame(
+ maturity = maturities,
+ yield = yields,
+ price = prices
+ )
end
-function NSSparamtoYield(t, B0, B1, B2, B3, T1, T2)
- r = B0 .+ B1.*((1.0 .- exp.(-t/T1))/(t/T1))+ B2*(((1-exp(-t/T1))/(t/T1))-exp(-t/T1)) +
- B3*(((1-exp(-t/T2))/(t/T2))-exp(-t/T2))
- return(r)
+"""
+ gsw_curve_snapshot(β₀, β₁, β₂, β₃, τ₁, τ₂; maturities=[0.25, 0.5, 1, 2, 5, 10, 30])
+
+Create a snapshot DataFrame of yields and prices for a single date's GSW parameters.
+
+# Arguments
+- `β₀, β₁, β₂, β₃, τ₁, τ₂`: GSW parameters for a single date
+- `maturities::AbstractVector`: Vector of maturities to calculate (default: standard curve)
+
+# Returns
+- `DataFrame`: Contains columns :maturity, :yield, :price
+
+# Examples
+```julia
+# Create yield curve snapshot
+curve = gsw_curve_snapshot(5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+
+# Custom maturities
+curve = gsw_curve_snapshot(5.0, -2.0, 1.5, 0.8, 2.5, 0.5,
+ maturities=[0.5, 1, 3, 5, 7, 10, 20, 30])
+```
+"""
+function gsw_curve_snapshot(β₀::Real, β₁::Real, β₂::Real, β₃::Real, τ₁::Real, τ₂::Real;
+ maturities::AbstractVector = [0.25, 0.5, 1, 2, 5, 10, 30])
+
+ yields = gsw_yield_curve(maturities, β₀, β₁, β₂, β₃, τ₁, τ₂)
+ prices = gsw_price_curve(maturities, β₀, β₁, β₂, β₃, τ₁, τ₂)
+
+ return DataFrame(
+ maturity = maturities,
+ yield = yields,
+ price = prices
+ )
end
+
+# ------------------------------------------------------------------------------------------
+# Internal helper functions
# ------------------------------------------------------------------------------------------
+"""
+ _validate_gsw_dataframe(df; check_date=false)
+
+Validate that DataFrame has required GSW parameter columns.
+"""
+function _validate_gsw_dataframe(df::DataFrame; check_date::Bool = false)
+ required_cols = [:BETA0, :BETA1, :BETA2, :BETA3, :TAU1, :TAU2]
+ missing_cols = setdiff(required_cols, propertynames(df))
+
+ if !isempty(missing_cols)
+ throw(ArgumentError("DataFrame missing required GSW parameter columns: $missing_cols"))
+ end
+
+ if check_date && :date ∉ propertynames(df)
+ throw(ArgumentError("DataFrame must contain :date column for return calculations"))
+ end
+
+ if nrow(df) == 0
+ throw(ArgumentError("DataFrame is empty"))
+ end
+end
+
+"""
+ _maturity_to_column_name(prefix, maturity)
-# --------------------------------------------------------------
-# CLEAN UP BOND DATA
-# @time df_gsw = CSV.File("./input/GSW_yield.csv", skipto=11, header=10, missingstring="NA") |> DataFrame;
-# --------------------------------------------------------------
+Convert maturity to standardized column name.
+"""
+function _maturity_to_column_name(prefix::String, maturity::Real)
+ # Handle fractional maturities nicely
+ if maturity == floor(maturity)
+ return Symbol("$(prefix)_$(Int(maturity))y")
+ else
+ # For fractional, use decimal but clean up trailing zeros
+ maturity_str = string(maturity)
+ maturity_str = replace(maturity_str, r"\.?0+$" => "") # Remove trailing zeros
+ return Symbol("$(prefix)_$(maturity_str)y")
+ end
+end
+# --------------------------------------------------------------------------------------------------+
\ No newline at end of file
diff --git a/src/Merge_CRSP_Comp.jl b/src/Merge_CRSP_Comp.jl
@@ -1,26 +1,87 @@
#! /usr/bin/env julia
-# ------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------------
# Merge_CRSP_Comp.jl
# Collection of functions that get the link files from crsp/compustat
-# ------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------------
# --------------------------------------------------------------------------------------------------
# List of exported functions
# export link_Funda
# export link_MSF
-# ------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------------
-# ------------------------------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------------
+"""
+ import_ccm_link(wrds_conn::Connection)
+ import_ccm_link(; user::String="", password::String="")
+
+Import and process the CRSP/Compustat Merged (CCM) linking table from WRDS.
+
+Downloads the CCM linking table that maps between CRSP's PERMNO and Compustat's GVKEY
+identifiers, enabling cross-database research between CRSP and Compustat datasets.
+
+# Arguments
+## Method 1
+- `wrds_conn::Connection`: An established database connection to WRDS PostgreSQL server
+
+## Method 2 (Keyword Arguments)
+- `user::String=""`: WRDS username. If empty, attempts to use default connection via `open_wrds_pg()`
+- `password::String=""`: WRDS password. Only used if `user` is provided
+
+# Returns
+- `DataFrame`: Processed linking table with the following columns:
+ - `:gvkey`: Compustat's permanent company identifier (converted to Int)
+ - `:permno`: CRSP's permanent security identifier (renamed from `:lpermno`)
+ - `:linkdt`: Start date of the link validity period
+ - `:linkenddt`: End date of the link validity period (missing values set to today's date)
+ - `:linkprim`: Primary link marker (String3 type)
+ - `:liid`: IID of the linked CRSP issue (String3 type)
+ - `:linktype`: Type of link (String3 type)
+ - Additional columns from the original CRSP.CCMXPF_LNKHIST table
+
+# Processing Steps
+1. Downloads the complete CRSP.CCMXPF_LNKHIST table from WRDS
+2. Converts integer columns to proper Int type (handling missing values)
+3. Parses GVKEY from string to integer format
+4. Converts link descriptors to String3 type for efficiency
+5. Filters to keep only primary links:
+ - Link types: "LU" (US companies), "LC" (Canadian), "LS" (ADRs)
+ - Link primary: "P" (Primary) or "C" (Primary after CUSIP link)
+6. Sets missing end dates to today's date (assuming link is still active)
+7. Renames `:lpermno` to `:permno` for consistency
+
+# Examples
+```julia
+# Using existing connection
+wrds_conn = open_wrds_pg("myusername", "mypassword")
+df_linktable = import_ccm_link(wrds_conn)
+
+# Using automatic connection
+df_linktable = import_ccm_link()
+
+# Using credentials directly
+df_linktable = import_ccm_link(user="myusername", password="mypassword")
+```
+
+# Notes
+- Requires active WRDS subscription and PostgreSQL access
+- Only primary security links are retained (see WRDS CCM documentation for link type details)
+- Missing link end dates are interpreted as currently active links
+- The function uses `@p` macro for pipeline operations and `@debug` for logging
+- All date columns (`:linkdt`, `:linkenddt`) and `:permno` are set as non-missing
+
+# References
+- WRDS CCM Database documentation: https://wrds-www.wharton.upenn.edu/pages/support/manuals-and-overviews/crsp/crspcompustat-merged-ccm/
+
+See also: [`link_Funda`](@ref), [`link_MSF`](@ref), [`open_wrds_pg`](@ref)
+"""
function import_ccm_link(wrds_conn::Connection)
-# df_funda = CSV.read("./tmp/funda.csv.gz", DataFrame);
-# df_msf = CSV.read("./tmp/msf.csv.gz", DataFrame);
-
-# Download link table
+ # Download link table
postgre_query_linktable = """
SELECT *
FROM crsp.ccmxpf_lnkhist
@@ -34,9 +95,8 @@ function import_ccm_link(wrds_conn::Connection)
transform!(df_linktable, :gvkey => ByRow(x->parse(Int, x)) => :gvkey);
transform!(df_linktable, [:linkprim, :liid, :linktype] .=> ByRow(String3), renamecols=false)
-# Prepare the table
+ # Prepare the table
@p df_linktable |> filter!(_.linktype ∈ ("LU", "LC", "LS") && _.linkprim ∈ ("P", "C") )
- # @rsubset(df_linktable, !ismissing(:lpermno))
df_linktable[ ismissing.(df_linktable.linkenddt), :linkenddt ] .= Dates.today();
disallowmissing!(df_linktable, [:linkdt, :linkenddt, :lpermno]);
@debug "renaming lpermno in linktable to permno"
@@ -58,25 +118,141 @@ function import_ccm_link(;
return import_ccm_link(wrds_conn)
end
-# ------------------------------------------------------------------------------------------
-
+# --------------------------------------------------------------------------------------------------
-# ------------------------------------------------------------------------------------------
-function link_Funda(df_linktable::DataFrame, df_funda::DataFrame)
+# --------------------------------------------------------------------------------------------------
+"""
+ link_Funda(df_linktable::DataFrame, df_funda::DataFrame, variables::Vector{Symbol}=Symbol[])
+
+Link Compustat fundamentals data with CRSP security identifiers using a linking table.
+
+This function performs a temporal join between Compustat fundamental data and a security
+linking table (typically CRSP/Compustat Merged Database linking table) to assign PERMNO
+identifiers to Compustat records based on valid date ranges.
+
+# Arguments
+- `df_linktable::DataFrame`: Linking table containing the mapping between GVKEY and PERMNO
+ identifiers. Must include columns:
+ - `:gvkey`: Compustat's permanent company identifier
+ - `:linkdt`: Start date of the link validity period
+ - `:linkenddt`: End date of the link validity period
+ - `:permno`: CRSP's permanent security identifier
+ - Additional columns that will be removed: `:linkprim`, `:liid`, `:linktype`
+
+- `df_funda::DataFrame`: Compustat fundamentals data. Must include columns:
+ - `:gvkey`: Compustat's permanent company identifier
+ - `:datadate`: Date of the fundamental data observation
+
+- `variables::Vector{Symbol}=Symbol[]`: which existing variables in the dataframe do we want to keep
+
+# Returns
+- `DataFrame`: Joined dataset containing all columns from `df_funda` plus `:permno` from
+ the linking table. The following columns from the linking table are excluded from output:
+ `:gvkey_1`, `:linkprim`, `:liid`, `:linktype`, `:linkdt`, `:linkenddt`
+
+# Details
+The function performs an inner join where:
+1. Records are matched on `:gvkey`
+2. The `:datadate` from fundamentals must fall within the valid link period
+ `[linkdt, linkenddt]` from the linking table
+
+This ensures that each fundamental data observation is matched with the correct PERMNO
+based on the security's identification history, handling cases where companies change
+their CRSP identifiers over time.
+
+# Examples
+```julia
+# Load linking table and fundamentals data
+df_linktable = load_ccm_links()
+df_funda = load_compustat_funda()
+
+# Link the datasets
+linked_data = link_Funda(df_linktable, df_funda)
+# Result contains fundamental data with PERMNO identifiers added
+```
+
+# Notes
+Uses FlexiJoins.innerjoin for temporal joining capabilities
+Only records with valid links during the observation date are retained
+"""
+function link_Funda(df_linktable::DataFrame, df_funda::DataFrame,
+ variables::Vector{Symbol}=Symbol[])
funda_link_permno = FlexiJoins.innerjoin(
(select(df_funda, :gvkey, :datadate), df_linktable),
by_key(:gvkey) & by_pred(:datadate, ∈, x->x.linkdt..x.linkenddt) )
+ variables_to_remove = vcat(:gvkey_1,
+ setdiff([:linkprim, :liid, :linktype, :linkdt, :linkenddt], variables) )
+
select!(funda_link_permno,
- Not([:gvkey_1, :linkprim, :liid, :linktype, :linkdt, :linkenddt]) )
+ Not(variables_to_remove))
return funda_link_permno
end
+# ------------------------------------------------------------------------------------------
+
+# ------------------------------------------------------------------------------------------
+"""
+ link_MSF(df_linktable::DataFrame, df_msf::DataFrame; variables::Vector{Symbol}=Symbol[])
+
+Link CRSP monthly stock file data with Compustat identifiers using a linking table.
+
+This function performs a temporal join to add GVKEY (Compustat identifiers) to CRSP monthly
+stock data, enabling cross-database analysis between CRSP and Compustat datasets.
+
+# Arguments
+- `df_linktable::DataFrame`: Linking table containing the mapping between PERMNO and GVKEY
+ identifiers. Must include columns:
+ - `:permno`: CRSP's permanent security identifier
+ - `:gvkey`: Compustat's permanent company identifier
+ - `:linkdt`: Start date of the link validity period
+ - `:linkenddt`: End date of the link validity period
+
+- `df_msf::DataFrame`: CRSP monthly stock file data. Must include columns:
+ - `:permno`: CRSP's permanent security identifier
+ - `:date`: Date of the stock observation
+ - Additional columns as specified in `variables` (if any)
+
+# Keyword Arguments
+- `variables::Vector{Symbol}=Symbol[]`: Optional list of additional columns to retain from
+ the linking process. Only columns that exist in both datasets will be kept.
+
+# Returns
+- `DataFrame`: Original CRSP data with GVKEY identifiers added where valid links exist.
+ Includes:
+ - All original columns from `df_msf`
+ - `:gvkey`: Compustat identifier (where available)
+ - `:datey`: Year extracted from the `:date` column
+ - Any additional columns specified in `variables` that exist in the joined data
+
+# Details
+The function performs a two-step process:
+1. **Inner join with temporal filtering**: Matches CRSP records to the linking table where
+ the stock date falls within the valid link period `[linkdt, linkenddt]`
+2. **Left join back to original data**: Ensures all original CRSP records are retained,
+ with GVKEY values added only where valid links exist
+
+Records with missing GVKEY values after the initial join are filtered out before the
+merge-back step, ensuring only valid links are propagated.
+
+# Examples
+```julia
+# Load data
+df_linktable = load_ccm_links()
+df_msf = load_crsp_monthly()
+
+# Basic linking
+linked_msf = link_MSF(df_linktable, df_msf)
+
+# Include additional variables from the linking table
+linked_msf = link_MSF(df_linktable, df_msf, variables=[:linkprim, :linktype])
+```
+"""
function link_MSF(df_linktable::DataFrame, df_msf::DataFrame;
variables::Vector{Symbol}=Symbol[])
@@ -97,10 +273,11 @@ function link_MSF(df_linktable::DataFrame, df_msf::DataFrame;
return df_msf_merged
end
+# ------------------------------------------------------------------------------------------
-
+# ------------------------------------------------------------------------------------------
# function link_ccm(df_linktable, df_msf, df_funda)
# # ccm
diff --git a/test/UnitTests/KenFrench.jl b/test/UnitTests/KenFrench.jl
@@ -1,14 +1,20 @@
@testset "Importing Fama-French factors from Ken French library" begin
+ import Dates
- df_FF3 = FinanceRoutines.import_FF3();
- @test names(df_FF3) == ["datem", "mktrf", "smb", "hml", "rf"];
- @test (nrow(df_FF3) >= 1000 & nrow(df_FF3) <= 1250);
-
- df_FF3_daily = FinanceRoutines.import_FF3(:daily);
+
+ df_FF3_annual = FinanceRoutines.import_FF3(frequency=:annual);
+ @test names(df_FF3_annual) == ["datey", "mktrf", "smb", "hml", "rf"]
+ @test nrow(df_FF3_annual) >= Dates.year(Dates.today()) - 1926 - 1
+
+
+ df_FF3_monthly = FinanceRoutines.import_FF3(frequency=:monthly);
+ @test names(df_FF3_monthly) == ["datem", "mktrf", "smb", "hml", "rf"]
+ @test nrow(df_FF3_monthly) >= (Dates.year(Dates.today()) - 1926 - 1) * 12
+
+ df_FF3_daily = FinanceRoutines.import_FF3(frequency=:daily);
@test names(df_FF3_daily) == ["date", "mktrf", "smb", "hml", "rf"]
- @test (nrow(df_FF3_daily) >= 25_000 & nrow(df_FF3_daily) <= 26_000)
+ @test nrow(df_FF3_daily) >= 25_900 & nrow(df_FF3_daily) <= 26_500
-
end
diff --git a/test/UnitTests/Yields.jl b/test/UnitTests/Yields.jl
@@ -1,32 +1,309 @@
-@testset "Treasury Yields (GSW)" begin
+@testset "GSW Treasury Yields" begin
- import Dates: Date
+ import Dates: Date, year
import Statistics: mean, std
- df_GSW = import_GSW(date_range = (Date("1970-01-01"), Date("1989-12-31")) )
- @test names(df_GSW) == ["date", "BETA0", "BETA1", "BETA2", "BETA3", "TAU1", "TAU2"]
-
- estimate_yield_GSW!(df_GSW; maturity=1)
- estimate_price_GSW!(df_GSW; maturity=1)
- estimate_return_GSW!(df_GSW; maturity=2, frequency=:daily, type=:log)
-
- transform!(df_GSW, :date => (x -> year.(x) .÷ 10 * 10) => :date_decade)
-
- df_stats = combine(
- groupby(df_GSW, :date_decade),
- :yield_1y => ( x -> mean(skipmissing(x)) ) => :mean_yield,
- :yield_1y => ( x -> sqrt(std(skipmissing(x))) ) => :vol_yield,
- :price_1y => ( x -> mean(skipmissing(x)) ) => :mean_price,
- :price_1y => ( x -> sqrt(std(skipmissing(x))) ) => :vol_price,
- :ret_2y_daily => ( x -> mean(skipmissing(x)) ) => :mean_ret_2y_daily,
- :ret_2y_daily => ( x -> sqrt(std(skipmissing(x))) ) => :vol_ret_2y_daily
- )
-
- @test df_stats[1, :mean_yield] < df_stats[2, :mean_yield]
- @test df_stats[1, :vol_yield] < df_stats[2, :vol_yield]
- @test df_stats[1, :mean_price] > df_stats[2, :mean_price]
- @test df_stats[1, :vol_price] < df_stats[2, :vol_price]
- @test df_stats[1, :mean_ret_2y_daily] < df_stats[2, :mean_ret_2y_daily]
- @test df_stats[1, :vol_ret_2y_daily] < df_stats[2, :vol_ret_2y_daily]
-
-end
+
+ # Test data import and basic structure
+ @testset "Data Import and Basic Structure" begin
+ # Test with original function name (backward compatibility)
+ df_GSW = import_gsw_parameters(date_range = (Date("1970-01-01"), Date("1989-12-31")),
+ additional_variables=[:SVENF05, :SVENF06, :SVENF07, :SVENF99])
+
+ @test names(df_GSW) == ["date", "BETA0", "BETA1", "BETA2", "BETA3", "TAU1", "TAU2", "SVENF05", "SVENF06", "SVENF07"]
+ @test nrow(df_GSW) > 0
+ @test all(df_GSW.date .>= Date("1970-01-01"))
+ @test all(df_GSW.date .<= Date("1989-12-31"))
+
+ # Test date range validation
+ @test_logs (:warn, "starting date posterior to end date ... shuffling them around") match_mode=:any import_gsw_parameters(date_range = (Date("1990-01-01"), Date("1980-01-01")));
+
+ # Test missing data handling (-999 flags)
+ @test any(ismissing, df_GSW.TAU2) # Should have some missing τ₂ values in this period
+ end
+
+ # Test GSWParameters struct
+ @testset "GSWParameters Struct" begin
+
+ # Test normal construction
+ params = GSWParameters(5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+ @test params.β₀ == 5.0
+ @test params.β₁ == -2.0
+ @test params.τ₁ == 2.5
+ @test params.τ₂ == 0.5
+
+ # Test 3-factor model (missing τ₂, β₃)
+ params_3f = GSWParameters(5.0, -2.0, 1.5, missing, 2.5, missing)
+ @test ismissing(params_3f.β₃)
+ @test ismissing(params_3f.τ₂)
+ @test FinanceRoutines.is_three_factor_model(params_3f)
+ @test !FinanceRoutines.is_three_factor_model(params)
+
+ # Test validation
+ @test_throws ArgumentError GSWParameters(5.0, -2.0, 1.5, 0.8, -1.0, 0.5) # negative τ₁
+ @test_throws ArgumentError GSWParameters(5.0, -2.0, 1.5, 0.8, 2.5, -0.5) # negative τ₂
+
+ # Test DataFrame row construction
+ df_GSW = import_gsw_parameters(date_range = (Date("1985-01-01"), Date("1985-01-31")))
+ if nrow(df_GSW) > 0
+ params_from_row = GSWParameters(df_GSW[20, :])
+ @test params_from_row isa GSWParameters
+ end
+
+ end
+
+ # Test core calculation functions
+ @testset "Core Calculation Functions" begin
+
+ params = GSWParameters(5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+ params_3f = GSWParameters(5.0, -2.0, 1.5, missing, 2.5, missing)
+
+ # Test yield calculations
+ yield_4f = gsw_yield(10.0, params)
+ yield_3f = gsw_yield(10.0, params_3f)
+ yield_scalar = gsw_yield(10.0, 5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+
+ @test yield_4f isa Float64
+ @test yield_3f isa Float64
+ @test yield_scalar ≈ yield_4f
+
+ # Test price calculations
+ price_4f = gsw_price(10.0, params)
+ price_3f = gsw_price(10.0, params_3f)
+ price_scalar = gsw_price(10.0, 5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+
+ @test price_4f isa Float64
+ @test price_3f isa Float64
+ @test price_scalar ≈ price_4f
+ @test price_4f < 1.0 # Price should be less than face value for positive yields
+
+ # Test forward rates
+ fwd_4f = gsw_forward_rate(2.0, 3.0, params)
+ fwd_scalar = gsw_forward_rate(2.0, 3.0, 5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+ @test fwd_4f ≈ fwd_scalar
+
+ # Test vectorized functions
+ maturities = [0.25, 0.5, 1, 2, 5, 10, 30]
+ yields = gsw_yield_curve(maturities, params)
+ prices = gsw_price_curve(maturities, params)
+
+ @test length(yields) == length(maturities)
+ @test length(prices) == length(maturities)
+ @test all(y -> y isa Float64, yields)
+ @test all(p -> p isa Float64, prices)
+
+ # Test input validation
+ @test_throws ArgumentError gsw_yield(-1.0, params) # negative maturity
+ @test_throws ArgumentError gsw_price(-1.0, params) # negative maturity
+ @test_throws ArgumentError gsw_forward_rate(3.0, 2.0, params) # invalid maturity order
+ end
+
+ # Test return calculations
+ @testset "Return Calculations" begin
+
+ params_t = GSWParameters(5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+ params_t_minus_1 = GSWParameters(4.9, -1.9, 1.4, 0.9, 2.4, 0.6)
+
+ # Test return calculation with structs
+ ret_struct = gsw_return(10.0, params_t, params_t_minus_1)
+ ret_scalar = gsw_return(10.0, 5.0, -2.0, 1.5, 0.8, 2.5, 0.5,
+ 4.9, -1.9, 1.4, 0.9, 2.4, 0.6)
+
+ @test ret_struct ≈ ret_scalar
+ @test ret_struct isa Float64
+
+ # Test different return types
+ ret_log = gsw_return(10.0, params_t, params_t_minus_1, return_type=:log)
+ ret_arith = gsw_return(10.0, params_t, params_t_minus_1, return_type=:arithmetic)
+
+ @test ret_log ≠ ret_arith # Should be different
+ @test ret_log isa Float64
+ @test ret_arith isa Float64
+
+ # Test excess returns
+ excess_ret = gsw_excess_return(10.0, params_t, params_t_minus_1)
+ @test excess_ret isa Float64
+
+ end
+
+ # Test DataFrame wrapper functions (original API)
+ @testset "DataFrame Wrappers - Original API Tests" begin
+
+ df_GSW = import_gsw_parameters(date_range = (Date("1970-01-01"), Date("1989-12-31")))
+
+ # Test original functions with new names
+ FinanceRoutines.add_yields!(df_GSW, 1.0)
+ FinanceRoutines.add_prices!(df_GSW, 1.0)
+ FinanceRoutines.add_returns!(df_GSW, 2.0, frequency=:daily, return_type=:log)
+
+
+ # Verify columns were created
+ @test "yield_1y" in names(df_GSW)
+ @test "price_1y" in names(df_GSW)
+ @test "ret_2y_daily" in names(df_GSW)
+
+ # Test the original statistical analysis
+ transform!(df_GSW, :date => (x -> year.(x) .÷ 10 * 10) => :date_decade)
+ df_stats = combine(
+ groupby(df_GSW, :date_decade),
+ :yield_1y => ( x -> mean(skipmissing(x)) ) => :mean_yield,
+ :yield_1y => ( x -> sqrt(std(skipmissing(x))) ) => :vol_yield,
+ :price_1y => ( x -> mean(skipmissing(x)) ) => :mean_price,
+ :price_1y => ( x -> sqrt(std(skipmissing(x))) ) => :vol_price,
+ :ret_2y_daily => ( x -> mean(skipmissing(x)) ) => :mean_ret_2y_daily,
+ :ret_2y_daily => ( x -> sqrt(std(skipmissing(x))) ) => :vol_ret_2y_daily
+ )
+
+ # Original tests - should still pass
+ @test df_stats[1, :mean_yield] < df_stats[2, :mean_yield]
+ @test df_stats[1, :vol_yield] < df_stats[2, :vol_yield]
+ @test df_stats[1, :mean_price] > df_stats[2, :mean_price]
+ @test df_stats[1, :vol_price] < df_stats[2, :vol_price]
+ @test df_stats[1, :mean_ret_2y_daily] < df_stats[2, :mean_ret_2y_daily]
+ @test df_stats[1, :vol_ret_2y_daily] < df_stats[2, :vol_ret_2y_daily]
+ end
+
+ # Test enhanced DataFrame wrapper functions
+ @testset "DataFrame Wrappers - Enhanced API" begin
+
+ df_GSW = import_gsw_parameters(date_range = (Date("1980-01-01"), Date("1985-12-31")))
+
+ # Test multiple maturities at once
+ FinanceRoutines.add_yields!(df_GSW, [0.5, 1, 2, 5, 10])
+ expected_yield_cols = ["yield_0.5y", "yield_1y", "yield_2y", "yield_5y", "yield_10y"]
+ @test all(col -> col in names(df_GSW), expected_yield_cols)
+
+ # Test multiple prices
+ FinanceRoutines.add_prices!(df_GSW, [1, 5, 10], face_value=100.0)
+ expected_price_cols = ["price_1y", "price_5y", "price_10y"]
+ @test all(col -> col in names(df_GSW), expected_price_cols)
+
+ # Test different frequencies
+ FinanceRoutines.add_returns!(df_GSW, 5, frequency=:monthly, return_type=:arithmetic)
+ @test "ret_5y_monthly" in names(df_GSW)
+
+ # Test excess returns
+ FinanceRoutines.add_excess_returns!(df_GSW, 10, risk_free_maturity=0.25)
+ @test "excess_ret_10y_daily" in names(df_GSW)
+
+ # Test that calculations work with missing data
+ @test any(!ismissing, df_GSW.yield_1y)
+ @test any(!ismissing, df_GSW.price_1y)
+ end
+
+ # Test convenience functions
+ @testset "Convenience Functions" begin
+
+ params = GSWParameters(5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+
+ # Test curve snapshot with struct
+ curve_struct = FinanceRoutines.gsw_curve_snapshot(params)
+ @test names(curve_struct) == ["maturity", "yield", "price"]
+ @test nrow(curve_struct) == 7 # default maturities
+
+ # Test curve snapshot with scalars
+ curve_scalar = FinanceRoutines.gsw_curve_snapshot(5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+ @test curve_struct.yield ≈ curve_scalar.yield
+ @test curve_struct.price ≈ curve_scalar.price
+
+ # Test custom maturities
+ custom_maturities = [1, 3, 5, 7, 10]
+ curve_custom = FinanceRoutines.gsw_curve_snapshot(params, maturities=custom_maturities)
+ @test nrow(curve_custom) == length(custom_maturities)
+ @test curve_custom.maturity == custom_maturities
+ end
+
+ # Test edge cases and robustness
+ @testset "Edge Cases and Robustness" begin
+ # Test very short and very long maturities
+ params = GSWParameters(5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+
+ yield_short = gsw_yield(0.001, params) # Very short maturity
+ yield_long = gsw_yield(100.0, params) # Very long maturity
+ @test yield_short isa Float64
+ @test yield_long isa Float64
+
+ # Test with extreme parameter values
+ params_extreme = GSWParameters(0.0, 0.0, 0.0, 0.0, 10.0, 20.0)
+ yield_extreme = gsw_yield(1.0, params_extreme)
+ @test yield_extreme ≈ 0.0 # Should be zero with all β parameters = 0
+
+ # Test missing data handling in calculations
+ df_with_missing = DataFrame(
+ date = [Date("2020-01-01")],
+ BETA0 = [5.0], BETA1 = [-2.0], BETA2 = [1.5],
+ BETA3 = [missing], TAU1 = [2.5], TAU2 = [missing]
+ )
+
+ FinanceRoutines.add_yields!(df_with_missing, 10.0)
+ @test !ismissing(df_with_missing.yield_10y[1]) # Should work with 3-factor model
+ end
+
+ # Test performance and consistency
+ @testset "Performance and Consistency" begin
+
+ params = GSWParameters(5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+
+ # Test that struct and scalar APIs give identical results
+ maturities = [0.25, 0.5, 1, 2, 5, 10, 20, 30]
+
+ yields_struct = gsw_yield.(maturities, Ref(params))
+ yields_scalar = gsw_yield.(maturities, 5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+
+ @test yields_struct ≈ yields_scalar
+
+ prices_struct = gsw_price.(maturities, Ref(params))
+ prices_scalar = gsw_price.(maturities, 5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+
+ @test prices_struct ≈ prices_scalar
+
+ # Test yield curve monotonicity assumptions don't break
+ @test all(diff(yields_struct) .< 5.0) # No huge jumps in yield curve
+ end
+
+ # Test 3-factor vs 4-factor model compatibility
+ @testset "3-Factor vs 4-Factor Model Compatibility" begin
+ # Create both model types
+ params_4f = GSWParameters(5.0, -2.0, 1.5, 0.8, 2.5, 0.5)
+ params_3f = GSWParameters(5.0, -2.0, 1.5, missing, 2.5, missing)
+
+ # Test that 3-factor model gives reasonable results
+ yield_4f = gsw_yield(10.0, params_4f)
+ yield_3f = gsw_yield(10.0, params_3f)
+
+ @test abs(yield_4f - yield_3f) < 2.0 # Should be reasonably close
+
+ # Test DataFrame with mixed model periods
+ df_mixed = DataFrame(
+ date = [Date("2020-01-01"), Date("2020-01-02")],
+ BETA0 = [5.0, 5.1], BETA1 = [-2.0, -2.1], BETA2 = [1.5, 1.4],
+ BETA3 = [0.8, missing], TAU1 = [2.5, 2.4], TAU2 = [0.5, missing]
+ )
+
+ FinanceRoutines.add_yields!(df_mixed, 10.0)
+ @test !ismissing(df_mixed.yield_10y[1]) # 4-factor period
+ @test !ismissing(df_mixed.yield_10y[2]) # 3-factor period
+ end
+
+end # @testset "GSW Extended Test Suite"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,4 +1,6 @@
# --------------------------------------------------------------------------------------------------
+# using Revise; import Pkg; Pkg.activate(".") # for debugging
+
using FinanceRoutines
using Test
@@ -10,7 +12,9 @@ import DataPipes: @p
# --------------------------------------------------------------------------------------------------
const testsuite = [
- "KenFrench", "WRDS", "Yields", "betas"
+ "KenFrench",
+ "WRDS", "betas",
+ "Yields",
]
# --------------------------------------------------------------------------------------------------
@@ -42,9 +46,6 @@ end
# just for checking things on the fly
@testset "Debugging tests ..." begin
- @test FinanceRoutines.greet_FinanceRoutines() == "Hello FinanceRoutines!"
- @test FinanceRoutines.greet_FinanceRoutines() != "Hello world!"
-
WRDS_USERNAME = get(ENV, "WRDS_USERNAME", "")
WRDS_PWD = get(ENV, "WRDS_PWD", "")
@test !isempty(WRDS_USERNAME)