Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Trying out julia profiling #1

Draft
wants to merge 5 commits into
base: soumikd
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ DynamicQuantities = "06fc5a27-2a28-4c7c-a15d-362465fb6821"
LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
LossFunctions = "30fc2ffe-d236-52d8-8643-a9d8f7c094a7"
MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
Optim = "429524aa-4258-5aef-a3af-852621145aeb"
Expand All @@ -28,12 +29,13 @@ Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
SymbolicUtils = "d1185830-fcd6-423d-90d6-eec64667417b"
Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7"
TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"

[weakdeps]
Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
SymbolicUtils = "d1185830-fcd6-423d-90d6-eec64667417b"

[extensions]
SymbolicRegressionEnzymeExt = "Enzyme"
Expand All @@ -55,6 +57,7 @@ JSON3 = "1"
LineSearches = "7"
Logging = "1"
LossFunctions = "0.10, 0.11"
MLJ = "0.20.7"
MLJModelInterface = "~1.5, ~1.6, ~1.7, ~1.8, ~1.9, ~1.10, ~1.11"
MacroTools = "0.4, 0.5"
Optim = "~1.8, ~1.9, ~1.10"
Expand All @@ -68,5 +71,6 @@ SpecialFunctions = "0.10.1, 1, 2"
StatsBase = "0.33, 0.34"
StyledStrings = "1"
SymbolicUtils = "0.19, ^1.0.5, 2, 3"
Symbolics = "6.22.0"
TOML = "<0.0.1, 1"
julia = "1.10"
14 changes: 14 additions & 0 deletions pixi.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[project]
channels = ["conda-forge"]
name = "SymbolicRegression.jl"
platforms = ["linux-64"]

[tasks.test]
cwd = "soumikd/scripts"
cmd = "julia --project test_poly.jl"

[tasks.test-parallel]
cwd = "soumikd/scripts"
cmd = "julia --project --threads 16 test_poly.jl"

[dependencies]
200 changes: 167 additions & 33 deletions soumikd/scripts/test_poly.jl
Original file line number Diff line number Diff line change
@@ -1,13 +1,107 @@
using Profile
using SymbolicUtils
using SymbolicRegression

using DynamicExpressions: string_tree
include("/home/soumikd/symbolic_regression/SymbolicRegression.jl/src/Utils.jl")
import .UtilsModule: eval_limit, eval_derivative
using Symbolics

import SymbolicRegression: SRRegressor
import SymbolicRegression: SRRegressor, UtilsModule
# import .UtilsModule: eval_limit, eval_derivative, fnFromString
import MLJ: machine, fit!, predict, report

function fnFromString(s)
f = eval(Meta.parse("x -> " * s))
return x -> Base.invokelatest(f, x)
end

function eval_limit(f::Function, val::Float64)
@syms x
var = "x"

# if (occursin(var, eq_str))
try
f_val = f(val)

if (f_val == NaN)
fn_value_x = f(x)
return limit(fn_value_x, x, val)
else
return f_val
end
catch e
if e isa DomainError
return 10000 # an arbitrary large number
else
rethrow(e)
end
end
# else
# return 10000 # an arbitrary large number
# end
end

function get_deriv_from_fn(f::Function)
@syms x

# g = fnFromString(f)

deriv = expand_derivatives(Differential(x)(eval(f)(x)))
return build_function(deriv, x; expression=Val{false})
end

function eval_derivative(f::Function, val::Float64)
@syms x

# if (occursin(var, eq_str))

# new_eq_str = replace(eq_str, var => "x")

# if(eq_str == "x")
# return NaN, 1
# end

# f = fnFromString(new_eq_str)

try
df = get_deriv_from_fn(f)

if (occursin("x", repr(df)))
df_val = df(val)
df_x = df(x)
if (df_val == NaN)
return repr(df_x), limit(df_x, x, val)
else
return repr(df_x), df_val
end
else
try
return NaN, parse(Float64, @show simplify(df))
catch e
if e isa ArgumentError
return NaN, 10000
else
rethrow(e)
end
end
end
catch e
if e isa DomainError
return NaN, 10000
else
rethrow(e)
end
end

# else
# return NaN, 10000 # a random large number
# end
end

function my_custom_profiling_objective(tree, dataset::Dataset{T,L}, options)::L where {T,L}
# @profile mse_loss(tree, dataset, options)
@profile my_custom_objective(tree, dataset, options)
end

function my_custom_objective(tree, dataset::Dataset{T,L}, options)::L where {T,L}

prediction, flag = eval_tree_array(tree, dataset.X, options)
Expand All @@ -20,6 +114,9 @@ function my_custom_objective(tree, dataset::Dataset{T,L}, options)::L where {T,L

vrble = "x1"

eq_str = replace(eq_str, vrble => "x")
vrble = "x"

prediction_loss = mse_loss(tree, dataset, options)

lambda1 = 1000
Expand All @@ -29,30 +126,32 @@ function my_custom_objective(tree, dataset::Dataset{T,L}, options)::L where {T,L

if (occursin(vrble, eq_str))

lim1 = eval_limit(eq_str, vrble, 0.0)
f = fnFromString(eq_str)

lim1 = eval_limit(f, 0.0)
lim_loss1 = abs(1 - lim1)

f2, lim2 = eval_derivative(eq_str, vrble, 0.0)
f2, lim2 = eval_derivative(f, 0.0)
lim_loss2 = abs(1 - lim2)

if (f2 != NaN)
f3, lim3 = eval_derivative(repr(f2), vrble, 0.0)
lim_loss3 = abs(1 + lim3)
# if (f2 != NaN)
# f3, lim3 = eval_derivative(repr(f2), vrble, 0.0)
# lim_loss3 = abs(1 + lim3)

if (f3 != NaN)
f4, lim4 = eval_derivative(repr(f3), vrble, 0.0)
lim_loss4 = abs(2 + lim4)
# if (f3 != NaN)
# f4, lim4 = eval_derivative(repr(f3), vrble, 0.0)
# lim_loss4 = abs(2 + lim4)

return prediction_loss + lambda1*lim_loss1 + lambda2*lim_loss2 + lambda3*lim_loss3 + lambda4*lim_loss4
# return prediction_loss + lambda1*lim_loss1 + lambda2*lim_loss2 + lambda3*lim_loss3 + lambda4*lim_loss4

else
return prediction_loss + lambda1*lim_loss1 + lambda2*lim_loss2 + lambda3*lim_loss3
end
# else
# return prediction_loss + lambda1*lim_loss1 + lambda2*lim_loss2 + lambda3*lim_loss3
# end

else
return prediction_loss + lambda1*lim_loss1 + lambda2*lim_loss2
end
# return prediction_loss + lambda1*lim_loss1 + lambda2*lim_loss2
# else
# return prediction_loss + lambda1*lim_loss1 + lambda2*lim_loss2
# end
return prediction_loss + lambda1*lim_loss1 + lambda2*lim_loss2
else
return prediction_loss + 10000
end
Expand All @@ -77,19 +176,54 @@ X = Array{Float64}(X)
X = reshape(X, 11,1)
f = -X.^3/3 - X.^2/2 + X .+ 1

model = SRRegressor(
niterations= 100,
populations= 10,
ncycles_per_iteration= 10,
binary_operators=[+, *, /, -],
# unary_operators=[],
maxsize=20,
# procs=16,
parallelism=:multithreading,
loss_function=my_custom_objective,


# model = SRRegressor(
# niterations= 1,
# populations= 2,
# # population_size= 3,
# ncycles_per_iteration= 1,
# # binary_operators=[+, *, /, -],
# binary_operators=[+, *, -],
# # unary_operators=[],
# maxsize=8,
# # procs=16,
# parallelism=:multithreading,
# loss_function=my_custom_objective,
# # loss_function=mse_loss,
# )

# mach = machine(model, X, f, scitype_check_level=0)
# Profile.init(delay=0.1)
# # using ProfileView
# # ProfileView.view()
# @profile fit!(mach)
# open("profile_output.txt", "w") do f
# Profile.print(IOContext(f, :displaysize => (24, 500)), mincount=100, maxdepth=100)
# end
# Profile.clear()
# report(mach)
# predict(mach, X)

using Symbolics
include("/home/joshkamm/SymbolicRegression/SymbolicRegression.jl/test/test_params.jl")

_inv(x) = 1 / x
options = Options(;
default_params...,
binary_operators=(+, *, ^, /),
unary_operators=(_inv,),
constraints=(_inv => 4,),
populations=4,
)
@extend_operators options
tree = Node(1, (^)(Node(; val=3.0) * Node(1, Node("x1")), 2.0), Node(; val=-1.2))

node_to_symbolic(tree, options; variable_names=["energy"], index_functions=true)

dataset = Dataset(randn(3, 32), randn(Float32, 32); weights=randn(Float32, 32))

mach = machine(model, X, f, scitype_check_level=0)
fit!(mach)
report(mach)
predict(mach, X)
@time mse_loss(tree, dataset, options)
@time mse_loss(tree, dataset, options)
@time my_custom_objective(tree, dataset, options)
@time my_custom_objective(tree, dataset, options)
Loading