-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
initial implementation of the robust hat matrix based robust regressi…
…on estimator
- Loading branch information
Showing
5 changed files
with
150 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
# v0.11.5 (Upcoming Release) | ||
|
||
- Initial implementation of the robust hat matrix regression estimator | ||
|
||
# v0.11.4 | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
module RobustHatRegression | ||
|
||
|
||
export robhatreg | ||
|
||
import ..Basis: RegressionSetting, @extractRegressionSetting, designMatrix, responseVector | ||
import ..OrdinaryLeastSquares: ols, residuals, coef | ||
import ..LTS: iterateCSteps | ||
|
||
import Distributions: quantile | ||
import LinearAlgebra: inv, diag | ||
|
||
|
||
function trimean(u::AbstractVector{T})::Float64 where T <: Real | ||
return (quantile(u, 0.25) + 2.0 * quantile(u, 0.50) + quantile(u, 0.75)) / 4.0 | ||
end | ||
|
||
function m(v::Vector, u::Vector)::Float64 | ||
return trimean(u .* v) * length(u) | ||
end | ||
|
||
function m(mat::AbstractMatrix, u::AbstractVector)::AbstractMatrix | ||
L = length(u) | ||
y = zeros(Float64, L, 1) | ||
for i in 1:L | ||
y[i, 1] = u[i] | ||
end | ||
result = m(mat, y) | ||
return result | ||
end | ||
|
||
function m(m1::AbstractMatrix, m2::AbstractMatrix) | ||
n1, _ = size(m1) | ||
_ , p2 = size(m2) | ||
newmat = zeros(Float64, n1, p2) | ||
for i in 1:n1 | ||
for j in 1:p2 | ||
newmat[i, j] = m(m1[i, :], m2[:, j]) | ||
end | ||
end | ||
return newmat | ||
end | ||
|
||
function hatrob(x::AbstractMatrix) | ||
return x * inv(m(x', x)) * x' | ||
end | ||
|
||
|
||
""" | ||
robhatreg(setting::RegressionSetting) | ||
Perform robust regression using the robust hat matrix method. | ||
# Arguments | ||
- `setting::RegressionSetting`: The regression setting. | ||
# Returns | ||
- A dictionary containing the following | ||
- `betas::AbstractVector`: The estimated coefficients. | ||
# References | ||
Satman, Mehmet Hakan, A robust initial basic subset selection | ||
method for outlier detection algorithms in linear regression, In Press | ||
""" | ||
function robhatreg(setting::RegressionSetting) | ||
X, y = @extractRegressionSetting setting | ||
return robhatreg(X, y) | ||
end | ||
|
||
|
||
""" | ||
robhatreg(X, y) | ||
Perform robust regression using the robust hat matrix method. | ||
# Arguments | ||
- `X::AbstractMatrix`: The design matrix. | ||
- `y::AbstractVector`: The response vector. | ||
# Returns | ||
- A dictionary containing the following | ||
- `betas::AbstractVector`: The estimated coefficients. | ||
# References | ||
Satman, Mehmet Hakan, A robust initial basic subset selection | ||
method for outlier detection algorithms in linear regression, In Press | ||
""" | ||
function robhatreg(X, y) | ||
n, p = size(X) | ||
h = Int(ceil((n + p + 1)/2)) | ||
myhat = hatrob(X) | ||
diagonals = diag(myhat) | ||
prms = sortperm(diagonals) | ||
bestindices = prms[1:(p+1)] | ||
_, indices = iterateCSteps(X, y, bestindices, h) | ||
betas = X[indices, :] \ y[indices] | ||
return Dict("betas" => betas) | ||
end | ||
|
||
|
||
|
||
end # end of module RobustHatRegression |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
|
||
|
||
@testset "Robust Hat Matrix based Robust Regression" begin | ||
# Create simple data | ||
rng = MersenneTwister(12345) | ||
n = 50 | ||
x = collect(1:n) | ||
e = randn(rng, n) .* 2.0 | ||
y = 5 .+ 5 .* x .+ e | ||
|
||
# Contaminate some values | ||
y[n] = y[n] * 2.0 | ||
y[n-1] = y[n-1] * 2.0 | ||
y[n-2] = y[n-2] * 2.0 | ||
y[n-3] = y[n-3] * 2.0 | ||
y[n-4] = y[n-4] * 2.0 | ||
|
||
df = DataFrame(x=x, y=y) | ||
|
||
reg = createRegressionSetting(@formula(y ~ x), df) | ||
result = robhatreg(reg) | ||
|
||
betas = result["betas"] | ||
|
||
atol = 1.0 | ||
|
||
@test isapprox(betas[1], 5.0, atol=atol) | ||
@test isapprox(betas[2], 5.0, atol=atol) | ||
end | ||
|
||
|
||
|
||
|
||
|