FinanceRoutines.jl

Financial data routines for Julia
Log | Files | Refs | README | LICENSE

Diagnostics.jl (1411B)


      1 @testset "Data Quality Diagnostics" begin
      2 
      3     import Dates: Date
      4 
      5     # Create test data with known issues
      6     df = DataFrame(
      7         permno = [1, 1, 1, 2, 2, 2],
      8         date = [Date(2020,1,1), Date(2020,2,1), Date(2020,2,1),  # duplicate for permno 1
      9                 Date(2020,1,1), Date(2020,3,1), Date(2020,4,1)],  # gap for permno 2
     10         ret = [0.05, missing, 0.03, -1.5, 0.02, 150.0],  # suspicious: -1.5, 150.0
     11         prc = [10.0, 20.0, 20.0, -5.0, 30.0, 40.0]  # negative price
     12     )
     13     allowmissing!(df, :ret)
     14 
     15     report = diagnose(df)
     16 
     17     # Basic structure
     18     @test report[:nrow] == 6
     19     @test report[:ncol] == 4
     20 
     21     # Missing rates
     22     @test haskey(report, :missing_rates)
     23     @test report[:missing_rates][:ret] ≈ 1/6
     24     @test report[:missing_rates][:permno] == 0.0
     25 
     26     # Duplicates
     27     @test haskey(report, :duplicate_keys)
     28     @test report[:duplicate_keys] == 1  # one duplicate (permno=1, date=2020-02-01)
     29 
     30     # Suspicious values
     31     @test haskey(report, :suspicious_values)
     32     @test length(report[:suspicious_values]) == 2  # extreme returns + negative prices
     33     @test any(s -> occursin("returns outside", s), report[:suspicious_values])
     34     @test any(s -> occursin("negative prices", s), report[:suspicious_values])
     35 
     36     # Test with custom columns / no ret/prc
     37     report2 = diagnose(df; ret_col=nothing, price_col=nothing)
     38     @test isempty(report2[:suspicious_values])
     39 
     40 end