commit f8e2820045f2e9154061c1ecfd0ccc9fe009f59d
parent c12513ffdb5a3e3ddf38f7429ffac9a84fee8b2d
Author: Erik Loualiche <eloualiche@users.noreply.github.com>
Date: Sun, 15 Feb 2026 18:49:31 -0600
Fix SQL injection, connection leaks, bond pricing, and cleanup (#26)
* Code review improvements: security, correctness, and cleanup
- Fix SQL injection: parameterize queries in ImportCRSP and ImportComp
- Fix connection leaks: add with_wrds_connection helper, use try/finally
- Fix bond_yield_excel: correct coupon annuity PV formula (missing (1+dr)
factor), compute DSC=E-A for 30/360 day count consistency, add
_day_count_days helper — all 3 pre-existing test failures now pass
- Fix betas: use _min_data threshold instead of parameter count p
- Fix build_MSF!: replace dead code with actual column validation
- Fix import_DSF convenience methods: remove invalid variables kwarg
- Tighten GSWParameters types: core fields now concrete Float64
- Make FF monthly parser robust to header format changes
- Optimize add_excess_returns! to single lag computation pass
- Export import_ccm_link, remove unused Statistics dependency
- Remove orphaned build_MSF_v2 docstring and dead code
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
* Add Statistics to test extras in Project.toml
Statistics was removed from [deps] (unused by package) but the test
files import it. Move to [extras]/[targets] so tests can find it.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---------
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat:
10 files changed, 276 insertions(+), 202 deletions(-)
diff --git a/Project.toml b/Project.toml
@@ -20,7 +20,6 @@ Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
PeriodicalDates = "276e7ca9-e0d7-440b-97bc-a6ae82f545b1"
Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665"
-Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
WeakRefStrings = "ea10d353-3f73-51f8-a26c-33c1cb351aa5"
ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"
@@ -44,7 +43,8 @@ ZipFile = "0.10"
julia = "1.10"
[extras]
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
[targets]
-test = ["Test"]
+test = ["Statistics", "Test"]
diff --git a/src/FinanceRoutines.jl b/src/FinanceRoutines.jl
@@ -62,6 +62,7 @@ export build_MSF, build_MSF! # clean Monthly Stock File
export import_Funda
export build_Funda!, build_Funda
# -- Link
+export import_ccm_link
export link_Funda
export link_MSF
diff --git a/src/ImportCRSP.jl b/src/ImportCRSP.jl
@@ -61,9 +61,9 @@ function import_MSF(wrds_conn::Connection;
postgre_query_msf = """
SELECT $msf_columns
FROM crsp.msf
- WHERE DATE >= '$(string(date_range[1]))' AND DATE <= '$(string(date_range[2]))'
+ WHERE DATE >= \$1 AND DATE <= \$2
"""
- res_q_msf = execute(wrds_conn, postgre_query_msf)
+ res_q_msf = execute(wrds_conn, postgre_query_msf, (date_range[1], date_range[2]))
df_msf = DataFrame(columntable(res_q_msf))
transform!(df_msf, # clean up the dataframe
names(df_msf, check_integer.(eachcol(df_msf))) .=> (x->convert.(Union{Missing, Int}, x));
@@ -136,13 +136,9 @@ function import_MSF(;
variables::Vector{String} = [""],
user::AbstractString = "", password::AbstractString = "")
- if user == ""
- wrds_conn = open_wrds_pg()
- else
- wrds_conn = open_wrds_pg(user, password)
+ with_wrds_connection(user=user, password=password) do conn
+ import_MSF(conn; date_range=date_range, variables=variables)
end
- import_MSF(wrds_conn, date_range=date_range, variables=variables)
-
end
# ------------------------------------------------------------------------------------------
@@ -172,10 +168,9 @@ function build_MSF!(
verbose::Bool=false
)
- # Check that all necessary variables are in
- ["mktcap", "shrout", "prc", "permno", "datem", "dlstcd", "ret", "dlret",
- "cfacpr", "cfacshr"]
-
+ required = ["shrout", "prc", "permno", "datem", "dlstcd", "ret", "dlret"]
+ missing_cols = setdiff(required, names(df))
+ !isempty(missing_cols) && throw(ArgumentError("Missing required columns: $(join(missing_cols, ", "))"))
# Create marketcap:
transform!(df, [:shrout, :prc] => ByRow( (s,p) -> s * abs(p) ) => :mktcap)
@@ -316,12 +311,12 @@ function import_MSF_v2(wrds_conn::Connection;
postgre_query_msf = """
SELECT $(join(col_query, ", "))
FROM crsp.msf_v2
- WHERE MTHCALDT >= '$(string(date_range[1]))' AND MTHCALDT <= '$(string(date_range[2]))'
- AND SHARETYPE = 'NS' AND SECURITYTYPE = 'EQTY' AND SECURITYSUBTYPE = 'COM'
+ WHERE MTHCALDT >= \$1 AND MTHCALDT <= \$2
+ AND SHARETYPE = 'NS' AND SECURITYTYPE = 'EQTY' AND SECURITYSUBTYPE = 'COM'
AND USINCFLG = 'Y' AND ISSUERTYPE IN ('ACOR', 'CORP')
AND PRIMARYEXCH IN ('N', 'A', 'Q') AND CONDITIONALTYPE = 'RW' AND TRADINGSTATUSFLG = 'A'
"""
- df_msf_v2 = execute(wrds_conn, postgre_query_msf) |> DataFrame;
+ df_msf_v2 = execute(wrds_conn, postgre_query_msf, (date_range[1], date_range[2])) |> DataFrame;
transform!(df_msf_v2, # clean up the dataframe
names(df_msf_v2, check_integer.(eachcol(df_msf_v2))) .=> (x->convert.(Union{Missing, Int}, x));
@@ -443,27 +438,6 @@ end
# ------------------------------------------------------------------------------------------
-"""
- build_MSF_v2(df_msf::DataFrame; save, trim_cols, clean_cols, verbose)
-
-Clean up the CRSP Monthly Stock File (see `import_MSF`)
-
-# Arguments
-- `df::DataFrame`: A standard dataframe with compustat data (minimum variables are in `import_Funda`)
-
-# Keywords
-- `save::String`: Save a gzip version of the data on path `\$save/funda.csv.gz`; Default does not save the data.
-- `trim_cols::Bool`: Only keep a subset of relevant columns in the final dataset
-- `clean_cols::Bool`: Clean up the columns of the dataframe to be of type Float64; Default is `false` and leaves the Decimal type intact
-- `logging_level::Symbol`: How to log results
-
-
-# Returns
-- `df::DataFrame`: DataFrame with crsp MSF file "cleaned"
-"""
-#=
-REDUNDANT WITH NEW FILES
-=#
# --------------------------------------------------------------------------------------------------
@@ -479,9 +453,9 @@ function import_DSF(wrds_conn::Connection;
postgre_query_dsf = """
SELECT PERMNO, DATE, RET, PRC, SHROUT, VOL
FROM crsp.dsf
- WHERE DATE >= '$(string(date_range[1]))' AND DATE <= '$(string(date_range[2]))'
+ WHERE DATE >= \$1 AND DATE <= \$2
"""
- df_dsf = execute(wrds_conn, postgre_query_dsf) |> DataFrame
+ df_dsf = execute(wrds_conn, postgre_query_dsf, (date_range[1], date_range[2])) |> DataFrame
# clean up the dataframe
transform!(df_dsf,
names(df_dsf, check_integer.(eachcol(df_dsf))) .=> (x->convert.(Union{Missing, Int}, x));
@@ -490,19 +464,14 @@ function import_DSF(wrds_conn::Connection;
return df_dsf
end
-# when there are no connections establisheds
+# when there are no connections established
function import_DSF(;
date_range::Tuple{Date, Date} = (Date("1900-01-01"), Dates.today()),
- variables::AbstractString = "",
user::AbstractString = "", password::AbstractString = "")
- if user == ""
- wrds_conn = open_wrds_pg()
- else
- wrds_conn = open_wrds_pg(user, password)
+ with_wrds_connection(user=user, password=password) do conn
+ import_DSF(conn; date_range=date_range)
end
-
- return import_DSF(wrds_conn, date_range=date_range, variables=variables)
end
# ------------------------------------------------------------------------------------------
@@ -523,9 +492,9 @@ function import_DSF_v2(wrds_conn::Connection;
postgre_query_dsf = """
SELECT PERMNO, DLYCALDT, DLYRET, DLYPRC, DLYVOL, DLYCAP
FROM crsp.stkdlysecuritydata
- WHERE DLYCALDT >= '$(string(date_range[1]))' AND DLYCALDT <= '$(string(date_range[2]))'
+ WHERE DLYCALDT >= \$1 AND DLYCALDT <= \$2
"""
- df_dsf_v2 = execute(wrds_conn, postgre_query_dsf) |> DataFrame
+ df_dsf_v2 = execute(wrds_conn, postgre_query_dsf, (date_range[1], date_range[2])) |> DataFrame
# clean up the dataframe
transform!(df_dsf_v2,
@@ -537,21 +506,15 @@ function import_DSF_v2(wrds_conn::Connection;
return df_dsf_v2
end
-# when there are no connections establisheds
+# when there are no connections established
function import_DSF_v2(;
date_range::Tuple{Date, Date} = (Date("1900-01-01"), Dates.today()),
- variables::AbstractString = "",
- user::AbstractString = "", password::AbstractString = "",
- logging_level::Symbol = :debug, # either none, debug, info etc... tbd
- )
+ logging_level::Symbol = :debug,
+ user::AbstractString = "", password::AbstractString = "")
- if user == ""
- wrds_conn = open_wrds_pg()
- else
- wrds_conn = open_wrds_pg(user, password)
+ with_wrds_connection(user=user, password=password) do conn
+ import_DSF_v2(conn; date_range=date_range, logging_level=logging_level)
end
-
- return import_DSF_v2(wrds_conn, date_range=date_range, variables=variables, logging_level=logging_level)
end
# ------------------------------------------------------------------------------------------
@@ -567,37 +530,40 @@ end
# """
# postgres_res = execute(wrds_conn, postgres_query, (table_schema,))
function _get_postgres_columns(table_schema, table_name; wrds_conn, prior_columns::Vector{String} = [""])
-
- # download potential columns
- postgres_query= """
- SELECT *
+
+ # Parameterized query to prevent SQL injection
+ postgres_query = """
+ SELECT column_name
FROM information_schema.columns
- WHERE table_schema = '$table_schema'
- AND table_name = '$table_name'
- ;
+ WHERE table_schema = \$1
+ AND table_name = \$2
"""
- postgres_res = execute(wrds_conn, postgres_query)
- postgres_columns = DataFrame(columntable(postgres_res)).column_name ;
+ postgres_res = execute(wrds_conn, postgres_query, (table_schema, table_name))
+ postgres_columns = DataFrame(columntable(postgres_res)).column_name
if isempty(prior_columns) || prior_columns == [""]
return uppercase.(postgres_columns)
- else
+ else
return intersect(uppercase.(postgres_columns), uppercase.(prior_columns))
end
end
function _get_postgres_table(table_schema, table_name; wrds_conn, prior_columns::Vector{String} = [""])
-
+
if isempty(prior_columns) || prior_columns == [""]
columns = "*"
- else
+ else
+ # Column names are validated against schema when used with _get_postgres_columns
columns = join(uppercase.(prior_columns), ", ")
end
+ # Quote identifiers to prevent SQL injection
+ schema_q = replace(table_schema, "\"" => "\"\"")
+ table_q = replace(table_name, "\"" => "\"\"")
postgres_query = """
SELECT $columns
- FROM $table_schema.$table_name
+ FROM \"$schema_q\".\"$table_q\"
"""
postgres_res = execute(wrds_conn, postgres_query)
diff --git a/src/ImportComp.jl b/src/ImportComp.jl
@@ -50,22 +50,15 @@ function import_Funda(wrds_conn::Connection;
size(var_check, 1) > 0 && (@warn "Queried variables not in dataset ... : $(join(var_check, ","))")
filter!(in(compd_funda), var_funda)
- # set up the query for funda
- postgre_query_funda_full = """
- SELECT *
- FROM comp.funda
- WHERE INDFMT = 'INDL' AND DATAFMT = 'STD' AND CONSOL = 'C' AND POPSRC = 'D'
- AND DATADATE >= '$(string(date_range[1]))'
- AND DATADATE <= '$(string(date_range[2]))'
- """
- postgre_query_funda_var = """
+ # set up the query for funda (dates parameterized to prevent SQL injection)
+ postgre_query_funda = """
SELECT $(join(unique(var_funda), ","))
FROM comp.funda
WHERE INDFMT = 'INDL' AND DATAFMT = 'STD' AND CONSOL = 'C' AND POPSRC = 'D'
- AND DATADATE >= '$(string(date_range[1]))'
- AND DATADATE <= '$(string(date_range[2]))'
+ AND DATADATE >= \$1
+ AND DATADATE <= \$2
"""
- res_q_funda = execute(wrds_conn, postgre_query_funda_var)
+ res_q_funda = execute(wrds_conn, postgre_query_funda, (date_range[1], date_range[2]))
df_funda = DataFrame(columntable(res_q_funda))
# run the filter
@@ -89,13 +82,9 @@ function import_Funda(;
filter_variables::Dict{Symbol,Any}=Dict(:CURCD => "USD"),
user::String="", password::String="")
- if user == ""
- wrds_conn = open_wrds_pg()
- else
- wrds_conn = open_wrds_pg(user, password)
+ with_wrds_connection(user=user, password=password) do conn
+ import_Funda(conn; date_range=date_range, variables=variables)
end
-
- import_Funda(wrds_conn, date_range=date_range, variables=variables)
end
# ------------------------------------------------------------------------------------------
diff --git a/src/ImportFamaFrench.jl b/src/ImportFamaFrench.jl
@@ -162,34 +162,43 @@ end
# --------------------------------------------------------------------------------------------------
function _parse_ff_monthly(zip_file; types=nothing)
-
+
# Read all lines from the zip file entry
file_lines = split(String(read(zip_file)), '\n')
- skipto = 6
+
+ # Find the first data line (starts with digits, like "192607")
+ # instead of hardcoding a skip count that breaks if the header changes
+ skipto = 1
+ for (i, line) in enumerate(file_lines)
+ if occursin(r"^\s*\d{6}", line)
+ skipto = i
+ break
+ end
+ end
# Collect data lines until we hit "Annual Factors"
data_lines = String[]
-
+
for i in skipto:length(file_lines)
line = file_lines[i]
-
+
# Stop when we hit Annual Factors section
if occursin(r"Annual Factors", line)
break
end
-
+
# Skip empty lines
if occursin(r"^\s*$", line)
continue
end
-
+
# Add non-empty data lines
push!(data_lines, line)
end
-
+
# Create IOBuffer with header + data
buffer = IOBuffer(join(data_lines, "\n"))
-
+
return CSV.File(buffer, header=false, delim=",", ntasks=1, types=types) |> DataFrame |>
df -> rename!(df, [:datem, :mktrf, :smb, :hml, :rf])
diff --git a/src/ImportYields.jl b/src/ImportYields.jl
@@ -46,36 +46,34 @@ println("Slope: ", params.β₁)
- Can be constructed from DataFrameRow for convenience
"""
struct GSWParameters
- β₀::Union{Float64, Missing} # Level
- β₁::Union{Float64, Missing} # Slope
- β₂::Union{Float64, Missing} # Curvature 1
+ β₀::Float64 # Level
+ β₁::Float64 # Slope
+ β₂::Float64 # Curvature 1
β₃::Union{Float64, Missing} # Curvature 2 (may be missing for 3-factor model)
- τ₁::Union{Float64, Missing} # Decay 1 (must be positive when present)
+ τ₁::Float64 # Decay 1 (must be positive)
τ₂::Union{Float64, Missing} # Decay 2 (may be missing for 3-factor model)
-
+
# Inner constructor with validation
+ # Returns `missing` (not a GSWParameters) when core fields are missing
function GSWParameters(β₀, β₁, β₂, β₃, τ₁, τ₂)
- # Check if core parameters are missing
+ # Check if core parameters are missing — return missing instead of constructing
if ismissing(β₀) || ismissing(β₁) || ismissing(β₂) || ismissing(τ₁)
return missing
end
- # Validate that non-missing decay parameters are positive
- if !ismissing(τ₁) && τ₁ <= 0
- throw(ArgumentError("First decay parameter τ₁ must be positive when present, got τ₁=$τ₁"))
+ # Validate that decay parameters are positive
+ if τ₁ <= 0
+ throw(ArgumentError("First decay parameter τ₁ must be positive, got τ₁=$τ₁"))
end
if !ismissing(τ₂) && τ₂ <= 0
throw(ArgumentError("Second decay parameter τ₂ must be positive when present, got τ₂=$τ₂"))
end
-
- # Convert to appropriate types
+
new(
- ismissing(β₀) ? missing : Float64(β₀),
- ismissing(β₁) ? missing : Float64(β₁),
- ismissing(β₂) ? missing : Float64(β₂),
+ Float64(β₀), Float64(β₁), Float64(β₂),
ismissing(β₃) ? missing : Float64(β₃),
- ismissing(τ₁) ? missing : Float64(τ₁),
+ Float64(τ₁),
ismissing(τ₂) ? missing : Float64(τ₂)
)
end
@@ -421,14 +419,9 @@ function gsw_yield(maturity::Real,
if maturity <= 0
throw(ArgumentError("Maturity must be positive, got $maturity"))
end
-
- # Handle any missing values
- if any(ismissing, [β₀, β₁, β₂, β₃, τ₁, τ₂])
- return missing
- end
-
+
# For 3-factor model compatibility: if β₃ is 0 or very small, skip the fourth term
- use_four_factor = !ismissing(β₃) && abs(β₃) > 1e-10 && !ismissing(τ₂) && τ₂ > 0
+ use_four_factor = abs(β₃) > 1e-10 && τ₂ > 0
# Nelson-Siegel-Svensson formula
t = Float64(maturity)
@@ -1195,24 +1188,64 @@ function add_excess_returns!(df::DataFrame, maturity::Real;
if validate
_validate_gsw_dataframe(df, check_date=true)
end
-
- # Add regular returns first (will be cleaned up)
- temp_df = copy(df)
- add_returns!(temp_df, maturity, frequency=frequency, return_type=return_type, validate=false)
- add_returns!(temp_df, risk_free_maturity, frequency=frequency, return_type=return_type, validate=false)
-
- # Calculate excess returns
- bond_ret_col = Symbol(string(_maturity_to_column_name("ret", maturity)) * "_" * string(frequency))
- rf_ret_col = Symbol(string(_maturity_to_column_name("ret", risk_free_maturity)) * "_" * string(frequency))
+
+ if maturity <= 0
+ throw(ArgumentError("Maturity must be positive, got $maturity"))
+ end
+
+ valid_frequencies = [:daily, :monthly, :annual]
+ if frequency ∉ valid_frequencies
+ throw(ArgumentError("frequency must be one of $valid_frequencies, got $frequency"))
+ end
+
+ valid_return_types = [:log, :arithmetic]
+ if return_type ∉ valid_return_types
+ throw(ArgumentError("return_type must be one of $valid_return_types, got $return_type"))
+ end
+
+ # Sort by date to ensure proper time series order
+ sort!(df, :date)
+
+ # Determine time step based on frequency
+ time_step = if frequency == :daily
+ Day(1)
+ elseif frequency == :monthly
+ Day(30)
+ elseif frequency == :annual
+ Day(360)
+ end
+
+ # Create lagged parameter columns once (shared for both bond and rf returns)
+ param_cols = [:BETA0, :BETA1, :BETA2, :BETA3, :TAU1, :TAU2]
+ for col in param_cols
+ lag_col = Symbol("lag_$col")
+ transform!(df, [:date, col] =>
+ ((dates, values) -> tlag(values, dates; n=time_step)) =>
+ lag_col)
+ end
+
+ # Calculate excess return directly in a single pass
excess_col = Symbol(string(_maturity_to_column_name("excess_ret", maturity)) * "_" * string(frequency))
-
- transform!(temp_df, [bond_ret_col, rf_ret_col] =>
- ByRow((bond_ret, rf_ret) -> ismissing(bond_ret) || ismissing(rf_ret) ? missing : bond_ret - rf_ret) =>
- excess_col)
-
- # Add only the excess return column to original DataFrame
- df[!, excess_col] = temp_df[!, excess_col]
-
+
+ transform!(df,
+ AsTable(vcat(param_cols, [Symbol("lag_$col") for col in param_cols])) =>
+ ByRow(params -> begin
+ current = GSWParameters(params.BETA0, params.BETA1, params.BETA2,
+ params.BETA3, params.TAU1, params.TAU2)
+ lagged = GSWParameters(params.lag_BETA0, params.lag_BETA1, params.lag_BETA2,
+ params.lag_BETA3, params.lag_TAU1, params.lag_TAU2)
+ if ismissing(current) || ismissing(lagged)
+ missing
+ else
+ gsw_excess_return(maturity, current, lagged;
+ risk_free_maturity=risk_free_maturity,
+ frequency=frequency, return_type=return_type)
+ end
+ end) => excess_col)
+
+ # Clean up temporary lagged columns
+ select!(df, Not([Symbol("lag_$col") for col in param_cols]))
+
return df
end
# --------------------------------------------------------------------------------------------------
@@ -1412,21 +1445,11 @@ ytm = bond_yield_excel(Date(2024, 3, 1), Date(2034, 3, 1),
# Notes
- Settlement date must be before maturity date
- Price and redemption are typically quoted per 100 of face value
-- The function uses `date_difference()` with `basis=1` (actual/actual) internally
- for time calculation, then applies the specified basis for other calculations
+- Uses actual coupon dates and the specified day-count basis, matching Excel's computation
- Results should match Excel's YIELD function within numerical precision
-- For bonds purchased between coupon dates, accrued interest is automatically handled
-
-# Relationship to Other Functions
-This function serves as a convenient wrapper around:
-```julia
-years = date_difference(settlement, maturity, basis=1)
-bond_yield(price, redemption, rate, years, frequency)
-```
-# Throws
+# Throws
- `ArgumentError`: If settlement ≥ maturity date
-- `DomainError`: If rate, price, or redemption are negative
- Convergence errors from underlying numerical root-finding
See also: [`bond_yield`](@ref)
@@ -1435,9 +1458,70 @@ function bond_yield_excel(
settlement::Date, maturity::Date, rate::Real, price::Real, redemption::Real;
frequency = 2, basis = 0)
- years = _date_difference(settlement, maturity, basis=1)
- return bond_yield(price, redemption, rate, years, frequency, method=:brent)
+ if settlement >= maturity
+ throw(ArgumentError("Settlement ($settlement) must be before maturity ($maturity)"))
+ end
+
+ # Compute coupon schedule by working backwards from maturity
+ period_months = div(12, frequency)
+
+ # Find next coupon date after settlement
+ next_coupon = maturity
+ while next_coupon - Month(period_months) > settlement
+ next_coupon -= Month(period_months)
+ end
+ prev_coupon = next_coupon - Month(period_months)
+
+ # Count remaining coupons (from next_coupon to maturity, inclusive)
+ N = 0
+ d = next_coupon
+ while d <= maturity
+ N += 1
+ d += Month(period_months)
+ end
+ # Day count fractions using the specified basis
+ A = _day_count_days(prev_coupon, settlement, basis) # accrued days
+ E = _day_count_days(prev_coupon, next_coupon, basis) # days in coupon period
+ DSC = E - A # Excel defines DSC = E - A to ensure consistency
+
+ α = DSC / E # fraction of period until next coupon
+ coupon = redemption * rate / frequency
+
+ # Excel's YIELD pricing formula
+ function price_from_yield(y)
+ if y <= 0
+ return Inf
+ end
+
+ dr = y / frequency
+
+ if N == 1
+ # Special case: single remaining coupon
+ return (redemption + coupon) / (1 + α * dr) - coupon * A / E
+ end
+
+ # General case: N > 1 coupons
+ # PV of coupon annuity: ∑(k=1..N) coupon/(1+dr)^(k-1+α) = coupon*(1+dr)^(1-α)/dr * [1-(1+dr)^(-N)]
+ pv_coupons = coupon * (1 + dr)^(1 - α) * (1 - (1 + dr)^(-N)) / dr
+ # PV of redemption
+ pv_redemption = redemption / (1 + dr)^(N - 1 + α)
+ # Subtract accrued interest
+ return pv_coupons + pv_redemption - coupon * A / E
+ end
+
+ price_diff(y) = price_from_yield(y) - price
+
+ try
+ return Roots.find_zero(price_diff, (1e-6, 2.0), Roots.Brent())
+ catch e
+ if isa(e, ArgumentError) && occursin("not a bracketing interval", sprint(showerror, e))
+ @warn "Brent failed: falling back to Order1" exception=e
+ return Roots.find_zero(price_diff, rate, Roots.Order1())
+ else
+ rethrow(e)
+ end
+ end
end
"""
@@ -1563,22 +1647,46 @@ function bond_yield(price, face_value, coupon_rate, years_to_maturity, frequency
end
+"""
+ _day_count_days(d1, d2, basis) -> Int
+
+Count the number of days between two dates using the specified day-count convention.
+Used internally for bond yield calculations.
+
+- `basis=0`: 30/360 (US)
+- `basis=1`: Actual/actual
+- `basis=2`: Actual/360
+- `basis=3`: Actual/365
+- `basis=4`: European 30/360
+"""
+function _day_count_days(d1::Date, d2::Date, basis::Int)
+ if basis == 0 # 30/360 US
+ day1, mon1, yr1 = Dates.day(d1), Dates.month(d1), Dates.year(d1)
+ day2, mon2, yr2 = Dates.day(d2), Dates.month(d2), Dates.year(d2)
+ if day1 == 31; day1 = 30; end
+ if day2 == 31 && day1 >= 30; day2 = 30; end
+ return 360 * (yr2 - yr1) + 30 * (mon2 - mon1) + (day2 - day1)
+ elseif basis == 4 # European 30/360
+ day1, mon1, yr1 = Dates.day(d1), Dates.month(d1), Dates.year(d1)
+ day2, mon2, yr2 = Dates.day(d2), Dates.month(d2), Dates.year(d2)
+ if day1 == 31; day1 = 30; end
+ if day2 == 31; day2 = 30; end
+ return 360 * (yr2 - yr1) + 30 * (mon2 - mon1) + (day2 - day1)
+ else # basis 1, 2, 3: actual days
+ return Dates.value(d2 - d1)
+ end
+end
+
function _date_difference(start_date, end_date; basis=1)
- if basis == 0 # 30/360
- d1, m1, y1 = Dates.day(start_date), Dates.month(start_date), Dates.year(start_date)
- d2, m2, y2 = Dates.day(end_date), Dates.month(end_date), Dates.year(end_date)
-
- if d1 == 31; d1 = 30; end
- if d2 == 31 && d1 >= 30; d2 = 30; end
-
- days = 360 * (y2 - y1) + 30 * (m2 - m1) + (d2 - d1)
+ days = _day_count_days(start_date, end_date, basis)
+ if basis == 0
+ return days / 360
+ elseif basis == 1
+ return days / 365.25
+ elseif basis == 2
return days / 360
- elseif basis == 1 # Actual/Actual
- return Dates.value(end_date - start_date) / 365.25
- elseif basis == 2 # Actual/360
- return Dates.value(end_date - start_date) / 360
- elseif basis == 3 # Actual/365
- return Dates.value(end_date - start_date) / 365
+ elseif basis == 3
+ return days / 365
else
error("Invalid basis: $basis")
end
diff --git a/src/Merge_CRSP_Comp.jl b/src/Merge_CRSP_Comp.jl
@@ -110,13 +110,9 @@ end
function import_ccm_link(;
user::String = "", password::String = "")
- if user == ""
- wrds_conn = open_wrds_pg()
- else
- wrds_conn = open_wrds_pg(user, password)
+ with_wrds_connection(user=user, password=password) do conn
+ import_ccm_link(conn)
end
-
- return import_ccm_link(wrds_conn)
end
# --------------------------------------------------------------------------------------------------
diff --git a/src/Utilities.jl b/src/Utilities.jl
@@ -41,6 +41,28 @@ function open_wrds_pg()
fill!(password_bytes, 0x00) # zero out the password bytes
end
end
+
+
+"""
+ with_wrds_connection(f; user="", password="")
+
+Open a WRDS PostgreSQL connection, execute `f(conn)`, and ensure the connection is closed.
+
+# Examples
+```julia
+df = with_wrds_connection(user="myuser", password="mypwd") do conn
+ import_MSF(conn; date_range=(Date("2020-01-01"), Date("2023-12-31")))
+end
+```
+"""
+function with_wrds_connection(f::Function; user::AbstractString="", password::AbstractString="")
+ conn = user == "" ? open_wrds_pg() : open_wrds_pg(user, password)
+ try
+ return f(conn)
+ finally
+ close(conn)
+ end
+end
# --------------------------------------------------------------------------------------------------
diff --git a/src/betas.jl b/src/betas.jl
@@ -21,50 +21,33 @@ function calculate_rolling_betas(X, y;
n, p = size(X)
_min_data = something(min_data, p)
- res = Array{Union{Missing, Float64}}(missing, (n,p));
-
+ res = Array{Union{Missing, Float64}}(missing, (n,p))
- if method == :linalg
- # Preallocate arrays for intermediate results
- XᵗX = zeros(p, p)
- Xᵗy = zeros(p)
+ if method == :linalg
for i in window:n
- # Create views for the current window
X_window = @view X[i-window+1:i, :]
y_window = @view y[i-window+1:i]
-
- # Check for missing values
- non_missing = .!any(ismissing.(X_window), dims=2)[:] .& .!ismissing.(y_window);
- (sum(non_missing) < p) && continue # Not enough data for estimation
-
- # Extract non-missing rows
- X_non_missing = @view X_window[non_missing, :]
- y_non_missing = @view y_window[non_missing]
-
- res[i, :] = qr(X_non_missing) \ y_non_missing
+ non_missing = .!any(ismissing.(X_window), dims=2)[:] .& .!ismissing.(y_window)
+ (sum(non_missing) < _min_data) && continue
- # Compute XᵗX and Xᵗy using in-place operations
- # mul!(XᵗX, X_non_missing', X_non_missing)
- # mul!(Xᵗy, X_non_missing', y_non_missing)
- # # Solve for β
- # res[i, :] = XᵗX \ Xᵗy
-
+ X_non_missing = @view X_window[non_missing, :]
+ y_non_missing = @view y_window[non_missing]
+ res[i, :] = qr(X_non_missing) \ y_non_missing
end
elseif method == :lm
for i in window:n
X_window = @view X[i-window+1:i, :]
y_window = @view y[i-window+1:i]
- non_missing = .!any(ismissing.(X_window), dims=2)[:] .& .!ismissing.(y_window);
- (sum(non_missing) < p) && continue # Not enough data for estimation
+ non_missing = .!any(ismissing.(X_window), dims=2)[:] .& .!ismissing.(y_window)
+ (sum(non_missing) < _min_data) && continue
- X_non_missing = @view X_window[non_missing, :]
- y_non_missing = @view y_window[non_missing]
+ X_non_missing = @view X_window[non_missing, :]
+ y_non_missing = @view y_window[non_missing]
res[i, :] = coef(lm(disallowmissing(X_non_missing), disallowmissing(y_non_missing)))
-
end
else
throw(ArgumentError("method must be one of: :linalg, :lm"))
diff --git a/test/UnitTests/Yields.jl b/test/UnitTests/Yields.jl
@@ -300,7 +300,7 @@
maturity = Date(2016, 11, 15)
ytm_excel = FinanceRoutines.bond_yield_excel(settlement, maturity, 0.0575, 95.04287, 100.0,
frequency=2, basis=0)
- @test ytm_excel ≈ 0.06 atol=5e-3 # this is not exactly same
+ @test ytm_excel ≈ 0.065 atol=5e-4 # Excel YIELD returns 0.065 (6.5%)
# Test Excel API consistency with direct bond_yield
years = 8.75 # approximate years between Feb 2008 to Nov 2016