diff --git a/Project.toml b/Project.toml index 5caa471d..d07def0b 100644 --- a/Project.toml +++ b/Project.toml @@ -16,6 +16,7 @@ DynamicQuantities = "06fc5a27-2a28-4c7c-a15d-362465fb6821" LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" LossFunctions = "30fc2ffe-d236-52d8-8643-a9d8f7c094a7" +MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7" MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea" MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" Optim = "429524aa-4258-5aef-a3af-852621145aeb" @@ -28,12 +29,13 @@ Reexport = "189a3867-3050-52da-a836-e630ba90ab69" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b" +SymbolicUtils = "d1185830-fcd6-423d-90d6-eec64667417b" +Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7" TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76" [weakdeps] Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" -SymbolicUtils = "d1185830-fcd6-423d-90d6-eec64667417b" [extensions] SymbolicRegressionEnzymeExt = "Enzyme" @@ -55,6 +57,7 @@ JSON3 = "1" LineSearches = "7" Logging = "1" LossFunctions = "0.10, 0.11" +MLJ = "0.20.7" MLJModelInterface = "~1.5, ~1.6, ~1.7, ~1.8, ~1.9, ~1.10, ~1.11" MacroTools = "0.4, 0.5" Optim = "~1.8, ~1.9, ~1.10" @@ -68,5 +71,6 @@ SpecialFunctions = "0.10.1, 1, 2" StatsBase = "0.33, 0.34" StyledStrings = "1" SymbolicUtils = "0.19, ^1.0.5, 2, 3" +Symbolics = "6.22.0" TOML = "<0.0.1, 1" julia = "1.10" diff --git a/pixi.toml b/pixi.toml new file mode 100644 index 00000000..4dcedd6c --- /dev/null +++ b/pixi.toml @@ -0,0 +1,14 @@ +[project] +channels = ["conda-forge"] +name = "SymbolicRegression.jl" +platforms = ["linux-64"] + +[tasks.test] +cwd = "soumikd/scripts" +cmd = "julia --project test_poly.jl" + +[tasks.test-parallel] +cwd = "soumikd/scripts" +cmd = "julia --project --threads 16 test_poly.jl" + +[dependencies] diff --git a/soumikd/scripts/test_poly.jl b/soumikd/scripts/test_poly.jl index bace2488..22ebe5e8 100644 --- a/soumikd/scripts/test_poly.jl +++ b/soumikd/scripts/test_poly.jl @@ -1,13 +1,107 @@ +using Profile +using SymbolicUtils using SymbolicRegression using DynamicExpressions: string_tree -include("/home/soumikd/symbolic_regression/SymbolicRegression.jl/src/Utils.jl") -import .UtilsModule: eval_limit, eval_derivative using Symbolics -import SymbolicRegression: SRRegressor +import SymbolicRegression: SRRegressor, UtilsModule +# import .UtilsModule: eval_limit, eval_derivative, fnFromString import MLJ: machine, fit!, predict, report +function fnFromString(s) + f = eval(Meta.parse("x -> " * s)) + return x -> Base.invokelatest(f, x) +end + +function eval_limit(f::Function, val::Float64) + @syms x + var = "x" + + # if (occursin(var, eq_str)) + try + f_val = f(val) + + if (f_val == NaN) + fn_value_x = f(x) + return limit(fn_value_x, x, val) + else + return f_val + end + catch e + if e isa DomainError + return 10000 # an arbitrary large number + else + rethrow(e) + end + end + # else + # return 10000 # an arbitrary large number + # end +end + +function get_deriv_from_fn(f::Function) + @syms x + + # g = fnFromString(f) + + deriv = expand_derivatives(Differential(x)(eval(f)(x))) + return build_function(deriv, x; expression=Val{false}) +end + +function eval_derivative(f::Function, val::Float64) + @syms x + + # if (occursin(var, eq_str)) + + # new_eq_str = replace(eq_str, var => "x") + + # if(eq_str == "x") + # return NaN, 1 + # end + + # f = fnFromString(new_eq_str) + + try + df = get_deriv_from_fn(f) + + if (occursin("x", repr(df))) + df_val = df(val) + df_x = df(x) + if (df_val == NaN) + return repr(df_x), limit(df_x, x, val) + else + return repr(df_x), df_val + end + else + try + return NaN, parse(Float64, @show simplify(df)) + catch e + if e isa ArgumentError + return NaN, 10000 + else + rethrow(e) + end + end + end + catch e + if e isa DomainError + return NaN, 10000 + else + rethrow(e) + end + end + + # else + # return NaN, 10000 # a random large number + # end +end + +function my_custom_profiling_objective(tree, dataset::Dataset{T,L}, options)::L where {T,L} + # @profile mse_loss(tree, dataset, options) + @profile my_custom_objective(tree, dataset, options) +end + function my_custom_objective(tree, dataset::Dataset{T,L}, options)::L where {T,L} prediction, flag = eval_tree_array(tree, dataset.X, options) @@ -20,6 +114,9 @@ function my_custom_objective(tree, dataset::Dataset{T,L}, options)::L where {T,L vrble = "x1" + eq_str = replace(eq_str, vrble => "x") + vrble = "x" + prediction_loss = mse_loss(tree, dataset, options) lambda1 = 1000 @@ -29,30 +126,32 @@ function my_custom_objective(tree, dataset::Dataset{T,L}, options)::L where {T,L if (occursin(vrble, eq_str)) - lim1 = eval_limit(eq_str, vrble, 0.0) + f = fnFromString(eq_str) + + lim1 = eval_limit(f, 0.0) lim_loss1 = abs(1 - lim1) - f2, lim2 = eval_derivative(eq_str, vrble, 0.0) + f2, lim2 = eval_derivative(f, 0.0) lim_loss2 = abs(1 - lim2) - if (f2 != NaN) - f3, lim3 = eval_derivative(repr(f2), vrble, 0.0) - lim_loss3 = abs(1 + lim3) + # if (f2 != NaN) + # f3, lim3 = eval_derivative(repr(f2), vrble, 0.0) + # lim_loss3 = abs(1 + lim3) - if (f3 != NaN) - f4, lim4 = eval_derivative(repr(f3), vrble, 0.0) - lim_loss4 = abs(2 + lim4) + # if (f3 != NaN) + # f4, lim4 = eval_derivative(repr(f3), vrble, 0.0) + # lim_loss4 = abs(2 + lim4) - return prediction_loss + lambda1*lim_loss1 + lambda2*lim_loss2 + lambda3*lim_loss3 + lambda4*lim_loss4 + # return prediction_loss + lambda1*lim_loss1 + lambda2*lim_loss2 + lambda3*lim_loss3 + lambda4*lim_loss4 - else - return prediction_loss + lambda1*lim_loss1 + lambda2*lim_loss2 + lambda3*lim_loss3 - end + # else + # return prediction_loss + lambda1*lim_loss1 + lambda2*lim_loss2 + lambda3*lim_loss3 + # end - else - return prediction_loss + lambda1*lim_loss1 + lambda2*lim_loss2 - end - # return prediction_loss + lambda1*lim_loss1 + lambda2*lim_loss2 + # else + # return prediction_loss + lambda1*lim_loss1 + lambda2*lim_loss2 + # end + return prediction_loss + lambda1*lim_loss1 + lambda2*lim_loss2 else return prediction_loss + 10000 end @@ -77,19 +176,54 @@ X = Array{Float64}(X) X = reshape(X, 11,1) f = -X.^3/3 - X.^2/2 + X .+ 1 -model = SRRegressor( - niterations= 100, - populations= 10, - ncycles_per_iteration= 10, - binary_operators=[+, *, /, -], - # unary_operators=[], - maxsize=20, - # procs=16, - parallelism=:multithreading, - loss_function=my_custom_objective, + + +# model = SRRegressor( +# niterations= 1, +# populations= 2, +# # population_size= 3, +# ncycles_per_iteration= 1, +# # binary_operators=[+, *, /, -], +# binary_operators=[+, *, -], +# # unary_operators=[], +# maxsize=8, +# # procs=16, +# parallelism=:multithreading, +# loss_function=my_custom_objective, +# # loss_function=mse_loss, +# ) + +# mach = machine(model, X, f, scitype_check_level=0) +# Profile.init(delay=0.1) +# # using ProfileView +# # ProfileView.view() +# @profile fit!(mach) +# open("profile_output.txt", "w") do f +# Profile.print(IOContext(f, :displaysize => (24, 500)), mincount=100, maxdepth=100) +# end +# Profile.clear() +# report(mach) +# predict(mach, X) + +using Symbolics +include("/home/joshkamm/SymbolicRegression/SymbolicRegression.jl/test/test_params.jl") + +_inv(x) = 1 / x +options = Options(; +default_params..., +binary_operators=(+, *, ^, /), +unary_operators=(_inv,), +constraints=(_inv => 4,), +populations=4, ) +@extend_operators options +tree = Node(1, (^)(Node(; val=3.0) * Node(1, Node("x1")), 2.0), Node(; val=-1.2)) + +node_to_symbolic(tree, options; variable_names=["energy"], index_functions=true) + +dataset = Dataset(randn(3, 32), randn(Float32, 32); weights=randn(Float32, 32)) -mach = machine(model, X, f, scitype_check_level=0) -fit!(mach) -report(mach) -predict(mach, X) \ No newline at end of file +@time mse_loss(tree, dataset, options) +@time mse_loss(tree, dataset, options) +@time my_custom_objective(tree, dataset, options) +@time my_custom_objective(tree, dataset, options) diff --git a/src/Utils.jl b/src/Utils.jl index e724cd59..2cc50494 100644 --- a/src/Utils.jl +++ b/src/Utils.jl @@ -286,110 +286,4 @@ function dump_buffer(buffer::AnnotatedIOBuffer) return AnnotatedString(dump_buffer(buffer.io), buffer.annotations) end -function fnFromString(s) - f = eval(Meta.parse("x -> " * s)) - return x -> Base.invokelatest(f, x) -end - -function eval_limit(eq_str::String, var::String, val::Float64) - - @syms x - - if (occursin(var, eq_str)) - - new_eq_str = replace(eq_str, var => "x") - - if (new_eq_str == "x") - return val - end - - f = fnFromString(new_eq_str) - - try - f_val = f(val) - fn_value_x = f(x) - - if (f_val == NaN) - return limit(fn_value_x, x, val) - else - return f_val - end - catch e - if e isa DomainError - return 10000 # a random large number - else - rethrow(e) - end - end - else - return 10000 # a random large number - end -end - -function get_deriv_from_fn_string(f::String) - - @syms x - - g = fnFromString(f) - - deriv = repr( - expand_derivatives( - Differential(x)(eval(g)(x)))) - - return deriv -end - -function eval_derivative(eq_str::String, var::String, val::Float64) - - @syms x - @syms x1 - - if (occursin(var, eq_str)) - - new_eq_str = replace(eq_str, var => "x") - - if(eq_str == "x") - return NaN, 1 - end - - f = fnFromString(new_eq_str) - - try - deriv = get_deriv_from_fn_string(repr(f(x))) - - if (occursin("x", deriv)) - df = fnFromString(deriv) - - df_val = df(val) - df_x = df(x) - df_x1 = df(x1) - if (df_val == NaN) - return repr(df_x1), limit(df_x, x, val) - else - return repr(df_x1), df_val - end - else - try - return NaN, parse(Float64, simplify(deriv)) - catch e - if e isa ArgumentError - return NaN, 10000 - else - rethrow(e) - end - end - end - catch e - if e isa DomainError - return NaN, 10000 - else - rethrow(e) - end - end - - else - return NaN, 10000 # a random large number - end -end - end