Skip to content

Commit

Permalink
initial implementation of the robust hat matrix based robust regressi…
Browse files Browse the repository at this point in the history
…on estimator
  • Loading branch information
jbytecode committed Oct 1, 2024
1 parent 78051c7 commit a08e0ad
Show file tree
Hide file tree
Showing 5 changed files with 150 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# v0.11.5 (Upcoming Release)

- Initial implementation of the robust hat matrix regression estimator

# v0.11.4

Expand Down
7 changes: 6 additions & 1 deletion src/LinRegOutliers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,10 @@ import .TheilSen: theilsen
include("deepestregression.jl")
import .DeepestRegression: deepestregression

# Robust Hat matrix based regression
include("robhatreg.jl")
import .RobustHatRegression: robhatreg

# All-in-one
include("summary.jl")
import .Summary: detectOutliers
Expand Down Expand Up @@ -275,10 +279,11 @@ export bacon
export cm97
export theilsen
export deepestregression
export robhatreg


# Snoop-Precompile
include("precompile/precompile.jl")
# include("precompile/precompile.jl")


end # module
107 changes: 107 additions & 0 deletions src/robhatreg.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
module RobustHatRegression


export robhatreg

import ..Basis: RegressionSetting, @extractRegressionSetting, designMatrix, responseVector
import ..OrdinaryLeastSquares: ols, residuals, coef
import ..LTS: iterateCSteps

import Distributions: quantile
import LinearAlgebra: inv, diag


function trimean(u::AbstractVector{T})::Float64 where T <: Real
return (quantile(u, 0.25) + 2.0 * quantile(u, 0.50) + quantile(u, 0.75)) / 4.0
end

function m(v::Vector, u::Vector)::Float64
return trimean(u .* v) * length(u)
end

function m(mat::AbstractMatrix, u::AbstractVector)::AbstractMatrix
L = length(u)
y = zeros(Float64, L, 1)
for i in 1:L
y[i, 1] = u[i]
end
result = m(mat, y)
return result
end

function m(m1::AbstractMatrix, m2::AbstractMatrix)
n1, _ = size(m1)
_ , p2 = size(m2)
newmat = zeros(Float64, n1, p2)
for i in 1:n1
for j in 1:p2
newmat[i, j] = m(m1[i, :], m2[:, j])
end
end
return newmat
end

function hatrob(x::AbstractMatrix)
return x * inv(m(x', x)) * x'
end


"""
robhatreg(setting::RegressionSetting)
Perform robust regression using the robust hat matrix method.
# Arguments
- `setting::RegressionSetting`: The regression setting.
# Returns
- A dictionary containing the following
- `betas::AbstractVector`: The estimated coefficients.
# References
Satman, Mehmet Hakan, A robust initial basic subset selection
method for outlier detection algorithms in linear regression, In Press
"""
function robhatreg(setting::RegressionSetting)
X, y = @extractRegressionSetting setting
return robhatreg(X, y)
end


"""
robhatreg(X, y)
Perform robust regression using the robust hat matrix method.
# Arguments
- `X::AbstractMatrix`: The design matrix.
- `y::AbstractVector`: The response vector.
# Returns
- A dictionary containing the following
- `betas::AbstractVector`: The estimated coefficients.
# References
Satman, Mehmet Hakan, A robust initial basic subset selection
method for outlier detection algorithms in linear regression, In Press
"""
function robhatreg(X, y)
n, p = size(X)
h = Int(ceil((n + p + 1)/2))
myhat = hatrob(X)
diagonals = diag(myhat)
prms = sortperm(diagonals)
bestindices = prms[1:(p+1)]
_, indices = iterateCSteps(X, y, bestindices, h)
betas = X[indices, :] \ y[indices]
return Dict("betas" => betas)
end



end # end of module RobustHatRegression
3 changes: 2 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ include("testbacon2000.jl")
include("testdataimage.jl")
include("testtheilsen.jl")
include("testsummary.jl")
include("testdeepestregression.jl")
include("testdeepestregression.jl")
include("testrobhatreg.jl")
34 changes: 34 additions & 0 deletions test/testrobhatreg.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@


@testset "Robust Hat Matrix based Robust Regression" begin
# Create simple data
rng = MersenneTwister(12345)
n = 50
x = collect(1:n)
e = randn(rng, n) .* 2.0
y = 5 .+ 5 .* x .+ e

# Contaminate some values
y[n] = y[n] * 2.0
y[n-1] = y[n-1] * 2.0
y[n-2] = y[n-2] * 2.0
y[n-3] = y[n-3] * 2.0
y[n-4] = y[n-4] * 2.0

df = DataFrame(x=x, y=y)

reg = createRegressionSetting(@formula(y ~ x), df)
result = robhatreg(reg)

betas = result["betas"]

atol = 1.0

@test isapprox(betas[1], 5.0, atol=atol)
@test isapprox(betas[2], 5.0, atol=atol)
end





0 comments on commit a08e0ad

Please sign in to comment.