FinanceRoutines.jl

Financial data routines for Julia
Log | Files | Refs | README | LICENSE

commit f8e2820045f2e9154061c1ecfd0ccc9fe009f59d
parent c12513ffdb5a3e3ddf38f7429ffac9a84fee8b2d
Author: Erik Loualiche <eloualiche@users.noreply.github.com>
Date:   Sun, 15 Feb 2026 18:49:31 -0600

Fix SQL injection, connection leaks, bond pricing, and cleanup (#26)

* Code review improvements: security, correctness, and cleanup

- Fix SQL injection: parameterize queries in ImportCRSP and ImportComp
- Fix connection leaks: add with_wrds_connection helper, use try/finally
- Fix bond_yield_excel: correct coupon annuity PV formula (missing (1+dr)
  factor), compute DSC=E-A for 30/360 day count consistency, add
  _day_count_days helper — all 3 pre-existing test failures now pass
- Fix betas: use _min_data threshold instead of parameter count p
- Fix build_MSF!: replace dead code with actual column validation
- Fix import_DSF convenience methods: remove invalid variables kwarg
- Tighten GSWParameters types: core fields now concrete Float64
- Make FF monthly parser robust to header format changes
- Optimize add_excess_returns! to single lag computation pass
- Export import_ccm_link, remove unused Statistics dependency
- Remove orphaned build_MSF_v2 docstring and dead code

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* Add Statistics to test extras in Project.toml

Statistics was removed from [deps] (unused by package) but the test
files import it. Move to [extras]/[targets] so tests can find it.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat:
MProject.toml | 4++--
Msrc/FinanceRoutines.jl | 1+
Msrc/ImportCRSP.jl | 110+++++++++++++++++++++++++++----------------------------------------------------
Msrc/ImportComp.jl | 25+++++++------------------
Msrc/ImportFamaFrench.jl | 25+++++++++++++++++--------
Msrc/ImportYields.jl | 242+++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------
Msrc/Merge_CRSP_Comp.jl | 8++------
Msrc/Utilities.jl | 22++++++++++++++++++++++
Msrc/betas.jl | 39+++++++++++----------------------------
Mtest/UnitTests/Yields.jl | 2+-
10 files changed, 276 insertions(+), 202 deletions(-)

diff --git a/Project.toml b/Project.toml @@ -20,7 +20,6 @@ Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" PeriodicalDates = "276e7ca9-e0d7-440b-97bc-a6ae82f545b1" Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665" -Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" WeakRefStrings = "ea10d353-3f73-51f8-a26c-33c1cb351aa5" ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" @@ -44,7 +43,8 @@ ZipFile = "0.10" julia = "1.10" [extras] +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test"] +test = ["Statistics", "Test"] diff --git a/src/FinanceRoutines.jl b/src/FinanceRoutines.jl @@ -62,6 +62,7 @@ export build_MSF, build_MSF! # clean Monthly Stock File export import_Funda export build_Funda!, build_Funda # -- Link +export import_ccm_link export link_Funda export link_MSF diff --git a/src/ImportCRSP.jl b/src/ImportCRSP.jl @@ -61,9 +61,9 @@ function import_MSF(wrds_conn::Connection; postgre_query_msf = """ SELECT $msf_columns FROM crsp.msf - WHERE DATE >= '$(string(date_range[1]))' AND DATE <= '$(string(date_range[2]))' + WHERE DATE >= \$1 AND DATE <= \$2 """ - res_q_msf = execute(wrds_conn, postgre_query_msf) + res_q_msf = execute(wrds_conn, postgre_query_msf, (date_range[1], date_range[2])) df_msf = DataFrame(columntable(res_q_msf)) transform!(df_msf, # clean up the dataframe names(df_msf, check_integer.(eachcol(df_msf))) .=> (x->convert.(Union{Missing, Int}, x)); @@ -136,13 +136,9 @@ function import_MSF(; variables::Vector{String} = [""], user::AbstractString = "", password::AbstractString = "") - if user == "" - wrds_conn = open_wrds_pg() - else - wrds_conn = open_wrds_pg(user, password) + with_wrds_connection(user=user, password=password) do conn + import_MSF(conn; date_range=date_range, variables=variables) end - import_MSF(wrds_conn, date_range=date_range, variables=variables) - end # ------------------------------------------------------------------------------------------ @@ -172,10 +168,9 @@ function build_MSF!( verbose::Bool=false ) - # Check that all necessary variables are in - ["mktcap", "shrout", "prc", "permno", "datem", "dlstcd", "ret", "dlret", - "cfacpr", "cfacshr"] - + required = ["shrout", "prc", "permno", "datem", "dlstcd", "ret", "dlret"] + missing_cols = setdiff(required, names(df)) + !isempty(missing_cols) && throw(ArgumentError("Missing required columns: $(join(missing_cols, ", "))")) # Create marketcap: transform!(df, [:shrout, :prc] => ByRow( (s,p) -> s * abs(p) ) => :mktcap) @@ -316,12 +311,12 @@ function import_MSF_v2(wrds_conn::Connection; postgre_query_msf = """ SELECT $(join(col_query, ", ")) FROM crsp.msf_v2 - WHERE MTHCALDT >= '$(string(date_range[1]))' AND MTHCALDT <= '$(string(date_range[2]))' - AND SHARETYPE = 'NS' AND SECURITYTYPE = 'EQTY' AND SECURITYSUBTYPE = 'COM' + WHERE MTHCALDT >= \$1 AND MTHCALDT <= \$2 + AND SHARETYPE = 'NS' AND SECURITYTYPE = 'EQTY' AND SECURITYSUBTYPE = 'COM' AND USINCFLG = 'Y' AND ISSUERTYPE IN ('ACOR', 'CORP') AND PRIMARYEXCH IN ('N', 'A', 'Q') AND CONDITIONALTYPE = 'RW' AND TRADINGSTATUSFLG = 'A' """ - df_msf_v2 = execute(wrds_conn, postgre_query_msf) |> DataFrame; + df_msf_v2 = execute(wrds_conn, postgre_query_msf, (date_range[1], date_range[2])) |> DataFrame; transform!(df_msf_v2, # clean up the dataframe names(df_msf_v2, check_integer.(eachcol(df_msf_v2))) .=> (x->convert.(Union{Missing, Int}, x)); @@ -443,27 +438,6 @@ end # ------------------------------------------------------------------------------------------ -""" - build_MSF_v2(df_msf::DataFrame; save, trim_cols, clean_cols, verbose) - -Clean up the CRSP Monthly Stock File (see `import_MSF`) - -# Arguments -- `df::DataFrame`: A standard dataframe with compustat data (minimum variables are in `import_Funda`) - -# Keywords -- `save::String`: Save a gzip version of the data on path `\$save/funda.csv.gz`; Default does not save the data. -- `trim_cols::Bool`: Only keep a subset of relevant columns in the final dataset -- `clean_cols::Bool`: Clean up the columns of the dataframe to be of type Float64; Default is `false` and leaves the Decimal type intact -- `logging_level::Symbol`: How to log results - - -# Returns -- `df::DataFrame`: DataFrame with crsp MSF file "cleaned" -""" -#= -REDUNDANT WITH NEW FILES -=# # -------------------------------------------------------------------------------------------------- @@ -479,9 +453,9 @@ function import_DSF(wrds_conn::Connection; postgre_query_dsf = """ SELECT PERMNO, DATE, RET, PRC, SHROUT, VOL FROM crsp.dsf - WHERE DATE >= '$(string(date_range[1]))' AND DATE <= '$(string(date_range[2]))' + WHERE DATE >= \$1 AND DATE <= \$2 """ - df_dsf = execute(wrds_conn, postgre_query_dsf) |> DataFrame + df_dsf = execute(wrds_conn, postgre_query_dsf, (date_range[1], date_range[2])) |> DataFrame # clean up the dataframe transform!(df_dsf, names(df_dsf, check_integer.(eachcol(df_dsf))) .=> (x->convert.(Union{Missing, Int}, x)); @@ -490,19 +464,14 @@ function import_DSF(wrds_conn::Connection; return df_dsf end -# when there are no connections establisheds +# when there are no connections established function import_DSF(; date_range::Tuple{Date, Date} = (Date("1900-01-01"), Dates.today()), - variables::AbstractString = "", user::AbstractString = "", password::AbstractString = "") - if user == "" - wrds_conn = open_wrds_pg() - else - wrds_conn = open_wrds_pg(user, password) + with_wrds_connection(user=user, password=password) do conn + import_DSF(conn; date_range=date_range) end - - return import_DSF(wrds_conn, date_range=date_range, variables=variables) end # ------------------------------------------------------------------------------------------ @@ -523,9 +492,9 @@ function import_DSF_v2(wrds_conn::Connection; postgre_query_dsf = """ SELECT PERMNO, DLYCALDT, DLYRET, DLYPRC, DLYVOL, DLYCAP FROM crsp.stkdlysecuritydata - WHERE DLYCALDT >= '$(string(date_range[1]))' AND DLYCALDT <= '$(string(date_range[2]))' + WHERE DLYCALDT >= \$1 AND DLYCALDT <= \$2 """ - df_dsf_v2 = execute(wrds_conn, postgre_query_dsf) |> DataFrame + df_dsf_v2 = execute(wrds_conn, postgre_query_dsf, (date_range[1], date_range[2])) |> DataFrame # clean up the dataframe transform!(df_dsf_v2, @@ -537,21 +506,15 @@ function import_DSF_v2(wrds_conn::Connection; return df_dsf_v2 end -# when there are no connections establisheds +# when there are no connections established function import_DSF_v2(; date_range::Tuple{Date, Date} = (Date("1900-01-01"), Dates.today()), - variables::AbstractString = "", - user::AbstractString = "", password::AbstractString = "", - logging_level::Symbol = :debug, # either none, debug, info etc... tbd - ) + logging_level::Symbol = :debug, + user::AbstractString = "", password::AbstractString = "") - if user == "" - wrds_conn = open_wrds_pg() - else - wrds_conn = open_wrds_pg(user, password) + with_wrds_connection(user=user, password=password) do conn + import_DSF_v2(conn; date_range=date_range, logging_level=logging_level) end - - return import_DSF_v2(wrds_conn, date_range=date_range, variables=variables, logging_level=logging_level) end # ------------------------------------------------------------------------------------------ @@ -567,37 +530,40 @@ end # """ # postgres_res = execute(wrds_conn, postgres_query, (table_schema,)) function _get_postgres_columns(table_schema, table_name; wrds_conn, prior_columns::Vector{String} = [""]) - - # download potential columns - postgres_query= """ - SELECT * + + # Parameterized query to prevent SQL injection + postgres_query = """ + SELECT column_name FROM information_schema.columns - WHERE table_schema = '$table_schema' - AND table_name = '$table_name' - ; + WHERE table_schema = \$1 + AND table_name = \$2 """ - postgres_res = execute(wrds_conn, postgres_query) - postgres_columns = DataFrame(columntable(postgres_res)).column_name ; + postgres_res = execute(wrds_conn, postgres_query, (table_schema, table_name)) + postgres_columns = DataFrame(columntable(postgres_res)).column_name if isempty(prior_columns) || prior_columns == [""] return uppercase.(postgres_columns) - else + else return intersect(uppercase.(postgres_columns), uppercase.(prior_columns)) end end function _get_postgres_table(table_schema, table_name; wrds_conn, prior_columns::Vector{String} = [""]) - + if isempty(prior_columns) || prior_columns == [""] columns = "*" - else + else + # Column names are validated against schema when used with _get_postgres_columns columns = join(uppercase.(prior_columns), ", ") end + # Quote identifiers to prevent SQL injection + schema_q = replace(table_schema, "\"" => "\"\"") + table_q = replace(table_name, "\"" => "\"\"") postgres_query = """ SELECT $columns - FROM $table_schema.$table_name + FROM \"$schema_q\".\"$table_q\" """ postgres_res = execute(wrds_conn, postgres_query) diff --git a/src/ImportComp.jl b/src/ImportComp.jl @@ -50,22 +50,15 @@ function import_Funda(wrds_conn::Connection; size(var_check, 1) > 0 && (@warn "Queried variables not in dataset ... : $(join(var_check, ","))") filter!(in(compd_funda), var_funda) - # set up the query for funda - postgre_query_funda_full = """ - SELECT * - FROM comp.funda - WHERE INDFMT = 'INDL' AND DATAFMT = 'STD' AND CONSOL = 'C' AND POPSRC = 'D' - AND DATADATE >= '$(string(date_range[1]))' - AND DATADATE <= '$(string(date_range[2]))' - """ - postgre_query_funda_var = """ + # set up the query for funda (dates parameterized to prevent SQL injection) + postgre_query_funda = """ SELECT $(join(unique(var_funda), ",")) FROM comp.funda WHERE INDFMT = 'INDL' AND DATAFMT = 'STD' AND CONSOL = 'C' AND POPSRC = 'D' - AND DATADATE >= '$(string(date_range[1]))' - AND DATADATE <= '$(string(date_range[2]))' + AND DATADATE >= \$1 + AND DATADATE <= \$2 """ - res_q_funda = execute(wrds_conn, postgre_query_funda_var) + res_q_funda = execute(wrds_conn, postgre_query_funda, (date_range[1], date_range[2])) df_funda = DataFrame(columntable(res_q_funda)) # run the filter @@ -89,13 +82,9 @@ function import_Funda(; filter_variables::Dict{Symbol,Any}=Dict(:CURCD => "USD"), user::String="", password::String="") - if user == "" - wrds_conn = open_wrds_pg() - else - wrds_conn = open_wrds_pg(user, password) + with_wrds_connection(user=user, password=password) do conn + import_Funda(conn; date_range=date_range, variables=variables) end - - import_Funda(wrds_conn, date_range=date_range, variables=variables) end # ------------------------------------------------------------------------------------------ diff --git a/src/ImportFamaFrench.jl b/src/ImportFamaFrench.jl @@ -162,34 +162,43 @@ end # -------------------------------------------------------------------------------------------------- function _parse_ff_monthly(zip_file; types=nothing) - + # Read all lines from the zip file entry file_lines = split(String(read(zip_file)), '\n') - skipto = 6 + + # Find the first data line (starts with digits, like "192607") + # instead of hardcoding a skip count that breaks if the header changes + skipto = 1 + for (i, line) in enumerate(file_lines) + if occursin(r"^\s*\d{6}", line) + skipto = i + break + end + end # Collect data lines until we hit "Annual Factors" data_lines = String[] - + for i in skipto:length(file_lines) line = file_lines[i] - + # Stop when we hit Annual Factors section if occursin(r"Annual Factors", line) break end - + # Skip empty lines if occursin(r"^\s*$", line) continue end - + # Add non-empty data lines push!(data_lines, line) end - + # Create IOBuffer with header + data buffer = IOBuffer(join(data_lines, "\n")) - + return CSV.File(buffer, header=false, delim=",", ntasks=1, types=types) |> DataFrame |> df -> rename!(df, [:datem, :mktrf, :smb, :hml, :rf]) diff --git a/src/ImportYields.jl b/src/ImportYields.jl @@ -46,36 +46,34 @@ println("Slope: ", params.β₁) - Can be constructed from DataFrameRow for convenience """ struct GSWParameters - β₀::Union{Float64, Missing} # Level - β₁::Union{Float64, Missing} # Slope - β₂::Union{Float64, Missing} # Curvature 1 + β₀::Float64 # Level + β₁::Float64 # Slope + β₂::Float64 # Curvature 1 β₃::Union{Float64, Missing} # Curvature 2 (may be missing for 3-factor model) - τ₁::Union{Float64, Missing} # Decay 1 (must be positive when present) + τ₁::Float64 # Decay 1 (must be positive) τ₂::Union{Float64, Missing} # Decay 2 (may be missing for 3-factor model) - + # Inner constructor with validation + # Returns `missing` (not a GSWParameters) when core fields are missing function GSWParameters(β₀, β₁, β₂, β₃, τ₁, τ₂) - # Check if core parameters are missing + # Check if core parameters are missing — return missing instead of constructing if ismissing(β₀) || ismissing(β₁) || ismissing(β₂) || ismissing(τ₁) return missing end - # Validate that non-missing decay parameters are positive - if !ismissing(τ₁) && τ₁ <= 0 - throw(ArgumentError("First decay parameter τ₁ must be positive when present, got τ₁=$τ₁")) + # Validate that decay parameters are positive + if τ₁ <= 0 + throw(ArgumentError("First decay parameter τ₁ must be positive, got τ₁=$τ₁")) end if !ismissing(τ₂) && τ₂ <= 0 throw(ArgumentError("Second decay parameter τ₂ must be positive when present, got τ₂=$τ₂")) end - - # Convert to appropriate types + new( - ismissing(β₀) ? missing : Float64(β₀), - ismissing(β₁) ? missing : Float64(β₁), - ismissing(β₂) ? missing : Float64(β₂), + Float64(β₀), Float64(β₁), Float64(β₂), ismissing(β₃) ? missing : Float64(β₃), - ismissing(τ₁) ? missing : Float64(τ₁), + Float64(τ₁), ismissing(τ₂) ? missing : Float64(τ₂) ) end @@ -421,14 +419,9 @@ function gsw_yield(maturity::Real, if maturity <= 0 throw(ArgumentError("Maturity must be positive, got $maturity")) end - - # Handle any missing values - if any(ismissing, [β₀, β₁, β₂, β₃, τ₁, τ₂]) - return missing - end - + # For 3-factor model compatibility: if β₃ is 0 or very small, skip the fourth term - use_four_factor = !ismissing(β₃) && abs(β₃) > 1e-10 && !ismissing(τ₂) && τ₂ > 0 + use_four_factor = abs(β₃) > 1e-10 && τ₂ > 0 # Nelson-Siegel-Svensson formula t = Float64(maturity) @@ -1195,24 +1188,64 @@ function add_excess_returns!(df::DataFrame, maturity::Real; if validate _validate_gsw_dataframe(df, check_date=true) end - - # Add regular returns first (will be cleaned up) - temp_df = copy(df) - add_returns!(temp_df, maturity, frequency=frequency, return_type=return_type, validate=false) - add_returns!(temp_df, risk_free_maturity, frequency=frequency, return_type=return_type, validate=false) - - # Calculate excess returns - bond_ret_col = Symbol(string(_maturity_to_column_name("ret", maturity)) * "_" * string(frequency)) - rf_ret_col = Symbol(string(_maturity_to_column_name("ret", risk_free_maturity)) * "_" * string(frequency)) + + if maturity <= 0 + throw(ArgumentError("Maturity must be positive, got $maturity")) + end + + valid_frequencies = [:daily, :monthly, :annual] + if frequency ∉ valid_frequencies + throw(ArgumentError("frequency must be one of $valid_frequencies, got $frequency")) + end + + valid_return_types = [:log, :arithmetic] + if return_type ∉ valid_return_types + throw(ArgumentError("return_type must be one of $valid_return_types, got $return_type")) + end + + # Sort by date to ensure proper time series order + sort!(df, :date) + + # Determine time step based on frequency + time_step = if frequency == :daily + Day(1) + elseif frequency == :monthly + Day(30) + elseif frequency == :annual + Day(360) + end + + # Create lagged parameter columns once (shared for both bond and rf returns) + param_cols = [:BETA0, :BETA1, :BETA2, :BETA3, :TAU1, :TAU2] + for col in param_cols + lag_col = Symbol("lag_$col") + transform!(df, [:date, col] => + ((dates, values) -> tlag(values, dates; n=time_step)) => + lag_col) + end + + # Calculate excess return directly in a single pass excess_col = Symbol(string(_maturity_to_column_name("excess_ret", maturity)) * "_" * string(frequency)) - - transform!(temp_df, [bond_ret_col, rf_ret_col] => - ByRow((bond_ret, rf_ret) -> ismissing(bond_ret) || ismissing(rf_ret) ? missing : bond_ret - rf_ret) => - excess_col) - - # Add only the excess return column to original DataFrame - df[!, excess_col] = temp_df[!, excess_col] - + + transform!(df, + AsTable(vcat(param_cols, [Symbol("lag_$col") for col in param_cols])) => + ByRow(params -> begin + current = GSWParameters(params.BETA0, params.BETA1, params.BETA2, + params.BETA3, params.TAU1, params.TAU2) + lagged = GSWParameters(params.lag_BETA0, params.lag_BETA1, params.lag_BETA2, + params.lag_BETA3, params.lag_TAU1, params.lag_TAU2) + if ismissing(current) || ismissing(lagged) + missing + else + gsw_excess_return(maturity, current, lagged; + risk_free_maturity=risk_free_maturity, + frequency=frequency, return_type=return_type) + end + end) => excess_col) + + # Clean up temporary lagged columns + select!(df, Not([Symbol("lag_$col") for col in param_cols])) + return df end # -------------------------------------------------------------------------------------------------- @@ -1412,21 +1445,11 @@ ytm = bond_yield_excel(Date(2024, 3, 1), Date(2034, 3, 1), # Notes - Settlement date must be before maturity date - Price and redemption are typically quoted per 100 of face value -- The function uses `date_difference()` with `basis=1` (actual/actual) internally - for time calculation, then applies the specified basis for other calculations +- Uses actual coupon dates and the specified day-count basis, matching Excel's computation - Results should match Excel's YIELD function within numerical precision -- For bonds purchased between coupon dates, accrued interest is automatically handled - -# Relationship to Other Functions -This function serves as a convenient wrapper around: -```julia -years = date_difference(settlement, maturity, basis=1) -bond_yield(price, redemption, rate, years, frequency) -``` -# Throws +# Throws - `ArgumentError`: If settlement ≥ maturity date -- `DomainError`: If rate, price, or redemption are negative - Convergence errors from underlying numerical root-finding See also: [`bond_yield`](@ref) @@ -1435,9 +1458,70 @@ function bond_yield_excel( settlement::Date, maturity::Date, rate::Real, price::Real, redemption::Real; frequency = 2, basis = 0) - years = _date_difference(settlement, maturity, basis=1) - return bond_yield(price, redemption, rate, years, frequency, method=:brent) + if settlement >= maturity + throw(ArgumentError("Settlement ($settlement) must be before maturity ($maturity)")) + end + + # Compute coupon schedule by working backwards from maturity + period_months = div(12, frequency) + + # Find next coupon date after settlement + next_coupon = maturity + while next_coupon - Month(period_months) > settlement + next_coupon -= Month(period_months) + end + prev_coupon = next_coupon - Month(period_months) + + # Count remaining coupons (from next_coupon to maturity, inclusive) + N = 0 + d = next_coupon + while d <= maturity + N += 1 + d += Month(period_months) + end + # Day count fractions using the specified basis + A = _day_count_days(prev_coupon, settlement, basis) # accrued days + E = _day_count_days(prev_coupon, next_coupon, basis) # days in coupon period + DSC = E - A # Excel defines DSC = E - A to ensure consistency + + α = DSC / E # fraction of period until next coupon + coupon = redemption * rate / frequency + + # Excel's YIELD pricing formula + function price_from_yield(y) + if y <= 0 + return Inf + end + + dr = y / frequency + + if N == 1 + # Special case: single remaining coupon + return (redemption + coupon) / (1 + α * dr) - coupon * A / E + end + + # General case: N > 1 coupons + # PV of coupon annuity: ∑(k=1..N) coupon/(1+dr)^(k-1+α) = coupon*(1+dr)^(1-α)/dr * [1-(1+dr)^(-N)] + pv_coupons = coupon * (1 + dr)^(1 - α) * (1 - (1 + dr)^(-N)) / dr + # PV of redemption + pv_redemption = redemption / (1 + dr)^(N - 1 + α) + # Subtract accrued interest + return pv_coupons + pv_redemption - coupon * A / E + end + + price_diff(y) = price_from_yield(y) - price + + try + return Roots.find_zero(price_diff, (1e-6, 2.0), Roots.Brent()) + catch e + if isa(e, ArgumentError) && occursin("not a bracketing interval", sprint(showerror, e)) + @warn "Brent failed: falling back to Order1" exception=e + return Roots.find_zero(price_diff, rate, Roots.Order1()) + else + rethrow(e) + end + end end """ @@ -1563,22 +1647,46 @@ function bond_yield(price, face_value, coupon_rate, years_to_maturity, frequency end +""" + _day_count_days(d1, d2, basis) -> Int + +Count the number of days between two dates using the specified day-count convention. +Used internally for bond yield calculations. + +- `basis=0`: 30/360 (US) +- `basis=1`: Actual/actual +- `basis=2`: Actual/360 +- `basis=3`: Actual/365 +- `basis=4`: European 30/360 +""" +function _day_count_days(d1::Date, d2::Date, basis::Int) + if basis == 0 # 30/360 US + day1, mon1, yr1 = Dates.day(d1), Dates.month(d1), Dates.year(d1) + day2, mon2, yr2 = Dates.day(d2), Dates.month(d2), Dates.year(d2) + if day1 == 31; day1 = 30; end + if day2 == 31 && day1 >= 30; day2 = 30; end + return 360 * (yr2 - yr1) + 30 * (mon2 - mon1) + (day2 - day1) + elseif basis == 4 # European 30/360 + day1, mon1, yr1 = Dates.day(d1), Dates.month(d1), Dates.year(d1) + day2, mon2, yr2 = Dates.day(d2), Dates.month(d2), Dates.year(d2) + if day1 == 31; day1 = 30; end + if day2 == 31; day2 = 30; end + return 360 * (yr2 - yr1) + 30 * (mon2 - mon1) + (day2 - day1) + else # basis 1, 2, 3: actual days + return Dates.value(d2 - d1) + end +end + function _date_difference(start_date, end_date; basis=1) - if basis == 0 # 30/360 - d1, m1, y1 = Dates.day(start_date), Dates.month(start_date), Dates.year(start_date) - d2, m2, y2 = Dates.day(end_date), Dates.month(end_date), Dates.year(end_date) - - if d1 == 31; d1 = 30; end - if d2 == 31 && d1 >= 30; d2 = 30; end - - days = 360 * (y2 - y1) + 30 * (m2 - m1) + (d2 - d1) + days = _day_count_days(start_date, end_date, basis) + if basis == 0 + return days / 360 + elseif basis == 1 + return days / 365.25 + elseif basis == 2 return days / 360 - elseif basis == 1 # Actual/Actual - return Dates.value(end_date - start_date) / 365.25 - elseif basis == 2 # Actual/360 - return Dates.value(end_date - start_date) / 360 - elseif basis == 3 # Actual/365 - return Dates.value(end_date - start_date) / 365 + elseif basis == 3 + return days / 365 else error("Invalid basis: $basis") end diff --git a/src/Merge_CRSP_Comp.jl b/src/Merge_CRSP_Comp.jl @@ -110,13 +110,9 @@ end function import_ccm_link(; user::String = "", password::String = "") - if user == "" - wrds_conn = open_wrds_pg() - else - wrds_conn = open_wrds_pg(user, password) + with_wrds_connection(user=user, password=password) do conn + import_ccm_link(conn) end - - return import_ccm_link(wrds_conn) end # -------------------------------------------------------------------------------------------------- diff --git a/src/Utilities.jl b/src/Utilities.jl @@ -41,6 +41,28 @@ function open_wrds_pg() fill!(password_bytes, 0x00) # zero out the password bytes end end + + +""" + with_wrds_connection(f; user="", password="") + +Open a WRDS PostgreSQL connection, execute `f(conn)`, and ensure the connection is closed. + +# Examples +```julia +df = with_wrds_connection(user="myuser", password="mypwd") do conn + import_MSF(conn; date_range=(Date("2020-01-01"), Date("2023-12-31"))) +end +``` +""" +function with_wrds_connection(f::Function; user::AbstractString="", password::AbstractString="") + conn = user == "" ? open_wrds_pg() : open_wrds_pg(user, password) + try + return f(conn) + finally + close(conn) + end +end # -------------------------------------------------------------------------------------------------- diff --git a/src/betas.jl b/src/betas.jl @@ -21,50 +21,33 @@ function calculate_rolling_betas(X, y; n, p = size(X) _min_data = something(min_data, p) - res = Array{Union{Missing, Float64}}(missing, (n,p)); - + res = Array{Union{Missing, Float64}}(missing, (n,p)) - if method == :linalg - # Preallocate arrays for intermediate results - XᵗX = zeros(p, p) - Xᵗy = zeros(p) + if method == :linalg for i in window:n - # Create views for the current window X_window = @view X[i-window+1:i, :] y_window = @view y[i-window+1:i] - - # Check for missing values - non_missing = .!any(ismissing.(X_window), dims=2)[:] .& .!ismissing.(y_window); - (sum(non_missing) < p) && continue # Not enough data for estimation - - # Extract non-missing rows - X_non_missing = @view X_window[non_missing, :] - y_non_missing = @view y_window[non_missing] - - res[i, :] = qr(X_non_missing) \ y_non_missing + non_missing = .!any(ismissing.(X_window), dims=2)[:] .& .!ismissing.(y_window) + (sum(non_missing) < _min_data) && continue - # Compute XᵗX and Xᵗy using in-place operations - # mul!(XᵗX, X_non_missing', X_non_missing) - # mul!(Xᵗy, X_non_missing', y_non_missing) - # # Solve for β - # res[i, :] = XᵗX \ Xᵗy - + X_non_missing = @view X_window[non_missing, :] + y_non_missing = @view y_window[non_missing] + res[i, :] = qr(X_non_missing) \ y_non_missing end elseif method == :lm for i in window:n X_window = @view X[i-window+1:i, :] y_window = @view y[i-window+1:i] - non_missing = .!any(ismissing.(X_window), dims=2)[:] .& .!ismissing.(y_window); - (sum(non_missing) < p) && continue # Not enough data for estimation + non_missing = .!any(ismissing.(X_window), dims=2)[:] .& .!ismissing.(y_window) + (sum(non_missing) < _min_data) && continue - X_non_missing = @view X_window[non_missing, :] - y_non_missing = @view y_window[non_missing] + X_non_missing = @view X_window[non_missing, :] + y_non_missing = @view y_window[non_missing] res[i, :] = coef(lm(disallowmissing(X_non_missing), disallowmissing(y_non_missing))) - end else throw(ArgumentError("method must be one of: :linalg, :lm")) diff --git a/test/UnitTests/Yields.jl b/test/UnitTests/Yields.jl @@ -300,7 +300,7 @@ maturity = Date(2016, 11, 15) ytm_excel = FinanceRoutines.bond_yield_excel(settlement, maturity, 0.0575, 95.04287, 100.0, frequency=2, basis=0) - @test ytm_excel ≈ 0.06 atol=5e-3 # this is not exactly same + @test ytm_excel ≈ 0.065 atol=5e-4 # Excel YIELD returns 0.065 (6.5%) # Test Excel API consistency with direct bond_yield years = 8.75 # approximate years between Feb 2008 to Nov 2016