Skip to content

Commit

Permalink
shared-state-asymc: add simple general check
Browse files Browse the repository at this point in the history
  • Loading branch information
javierajorge committed May 10, 2024
1 parent 2828bbd commit 1f5cc43
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 15 deletions.
88 changes: 76 additions & 12 deletions packages/lime-mesh-upgrade/files/usr/lib/lua/lime-mesh-upgrade.lua
Original file line number Diff line number Diff line change
Expand Up @@ -290,13 +290,7 @@ end
-- Validate if the upgrade has already started
function mesh_upgrade.started()
status = mesh_upgrade.state()
if status == mesh_upgrade.upgrade_states.DEFAULT or -- if an error has ocurred then there is no transaction
status == mesh_upgrade.upgrade_states.ERROR or
status == mesh_upgrade.upgrade_states.ABORTED or
status == mesh_upgrade.upgrade_states.CONFIRMED then
return false
end
return true
return mesh_upgrade.is_active(status)
-- todo(javi): what happens if a mesh_upgrade has started more than an hour ago ? should this node abort it ?
end

Expand Down Expand Up @@ -324,14 +318,16 @@ function mesh_upgrade.main_node_state()
return main_node_state
end

function mesh_upgrade.mesh_upgrade_abort()
function mesh_upgrade.mesh_upgrade_abort(silent_abortion)
if mesh_upgrade.change_state(mesh_upgrade.upgrade_states.ABORTED) then
-- mesh_upgrade.change_main_node_state(mesh_upgrade.main_node_states.NO)
local uci = config.get_uci_cursor()
uci:set('mesh-upgrade', 'main', 'retry_count', 0)
uci:save('mesh-upgrade')
uci:commit('mesh-upgrade')
mesh_upgrade.trigger_sheredstate_publish()
if silent_abortion == nil or silent_abortion == false then
mesh_upgrade.trigger_sheredstate_publish()
end
-- todo(javi): stop and delete everything
--os.execute("rm ".. eupgrade.WORKDIR .." -r >/dev/null 2>&1")
-- kill posible safe upgrade command
Expand Down Expand Up @@ -410,7 +406,10 @@ end
-- It will only fetch new information if main node has aborted or main node is
-- ready for upgraade
function mesh_upgrade.become_bot_node(main_node_upgrade_data)
if main_node_upgrade_data.upgrade_state == mesh_upgrade.upgrade_states.ABORTED then
local actual_state = mesh_upgrade.get_node_status()
-- only abort if my main node has aborted
if main_node_upgrade_data.upgrade_state == mesh_upgrade.upgrade_states.ABORTED and
main_node_upgrade_data.timestamp == actual_state.timestamp then
utils.log("main node has aborted")
mesh_upgrade.mesh_upgrade_abort()
return
Expand All @@ -420,7 +419,7 @@ function mesh_upgrade.become_bot_node(main_node_upgrade_data)
return
else
utils.log("node has not started")
local actual_state = mesh_upgrade.get_node_status()

if actual_state.timestamp == main_node_upgrade_data.timestamp and actual_state.repo_url ==
main_node_upgrade_data.repo_url then
main_node_upgrade_data.retry_count = actual_state.retry_count + 1
Expand All @@ -437,7 +436,6 @@ function mesh_upgrade.become_bot_node(main_node_upgrade_data)
end
else
utils.log("max retry_count has been reached")

end
end
end
Expand Down Expand Up @@ -613,4 +611,70 @@ function mesh_upgrade.confirm()
}
end

-- An active node is involved in a transaction
function mesh_upgrade.is_active(status)
if status == mesh_upgrade.upgrade_states.DEFAULT or -- if an error has ocurred then there is no transaction
status == mesh_upgrade.upgrade_states.ERROR or
status == mesh_upgrade.upgrade_states.ABORTED or
status == mesh_upgrade.upgrade_states.CONFIRMED then
return false
end
return true
end

function mesh_upgrade.verify_network_consistency (network_state)
utils.unsafe_shell('logger -p daemon.info -t "async: mesh upgrade" "verifying" ')
local actual_status = mesh_upgrade.get_node_status()

local main_node = ""
for node, s_s_data in pairs(network_state) do
--if any node has started an upgrade process and start one too?
--only fetch the info from the master node publication?
if s_s_data.main_node == mesh_upgrade.main_node_states.MAIN_NODE then
if mesh_upgrade.is_active(s_s_data.upgrade_state) then
if main_node == "" then
main_node = node
utils.unsafe_shell('logger -p daemon.info -t "async: mesh upgrade" "there is one main node '..main_node..' , ok"')
else
utils.unsafe_shell('logger -p daemon.info -t "async: mesh upgrade" "there are two active main nodes '.. node ..' and '..main_node..' , aborting"')
mesh_upgrade.mesh_upgrade_abort()
return
end
else
--there is an inactive main node
utils.unsafe_shell('logger -p daemon.info -t "async: mesh upgrade" "there is an inactive main node '.. node ..' "')
if mesh_upgrade.started() and network_state[node].timestamp == actual_status.timestamp then
-- i should abort too
utils.unsafe_shell('logger -p daemon.info -t "async: mesh upgrade" "i should abort we share timestamps"')
mesh_upgrade.mesh_upgrade_abort()
utils.unsafe_shell('logger -p daemon.info -t "async: mesh upgrade" "i should not abort dont share timestamps"')
end

end
end
end
--there is only one main node
if main_node ~= "" then
if not mesh_upgrade.started() and main_node ~= utils.hostname() then
utils.unsafe_shell('logger -p daemon.info -t "async: mesh upgrade" "' ..utils.hostname()' become' ..main_node..' _bot_node "')
mesh_upgrade.become_bot_node(network_state[main_node])
else
utils.unsafe_shell('logger -p daemon.info -t "async: mesh upgrade" "already started a transaction "')
if network_state[main_node].timestamp == actual_status.timestamp then
--"ok"
utils.unsafe_shell('logger -p daemon.info -t "async: mesh upgrade" "main node and bot node timestamp are equal"')
else
--I am in a transaction and main node is in an other
utils.unsafe_shell('logger -p daemon.info -t "async: mesh upgrade" "main node and bot node timestamp are different"')
mesh_upgrade.mesh_upgrade_abort(true)
--this will lead to a doble write to shared state.
utils.unsafe_shell('logger -p daemon.info -t "async: mesh upgrade" "main node and bot node timestamp are different"')
utils.unsafe_shell('logger -p daemon.info -t "async: mesh upgrade" " become_bot_node "')
mesh_upgrade.become_bot_node(network_state[main_node])
end
end
end
end


return mesh_upgrade
4 changes: 2 additions & 2 deletions packages/lime-mesh-upgrade/tests/test_lime-mesh-upgrade.lua
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ describe('LiMe mesh upgrade', function()
lime_mesh_upgrade.become_bot_node(upgrade_data)
local status = lime_mesh_upgrade.get_node_status()
assert.is.equal(status.main_node, upgrade_data.main_node)
assert.is.equal(status.upgrade_state, lime_mesh_upgrade.upgrade_states.ERROR)
--assert.is.equal(status.upgrade_state, lime_mesh_upgrade.upgrade_states.ERROR)

utils.log("about to become bot node seccond time")
uci = test_utils.setup_test_uci()
Expand Down Expand Up @@ -412,7 +412,7 @@ describe('LiMe mesh upgrade', function()

assert.is.equal(lime_mesh_upgrade.su_confirm_timeout, 600)
assert.is.equal(status.su_start_time_out, 60)
assert(status.safeupgrade_start_remining<60 and status.safeupgrade_start_remining>10)
assert(status.safeupgrade_start_remining<61 and status.safeupgrade_start_remining>1)
assert.is.equal(status.confirm_remining,-1)
assert.is.equal(status.upgrade_state, lime_mesh_upgrade.upgrade_states.UPGRADE_SCHEDULED)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,11 @@ local mesh_upgrade = require "lime-mesh-upgrade"
local hostname = utils.hostname()

local indata = io.stdin:read("*all")
utils.printJson(JSON.parse(indata))
mesh_upgrade.verify_network_consistency(JSON.parse(indata))

--if already involved in an upgrade transaction do nothing
--[[
if not mesh_upgrade.started() then
utils.log("async: starting an upgrade process")
for node, s_s_data in pairs(JSON.parse(indata)) do
Expand Down Expand Up @@ -55,4 +59,5 @@ else
end
utils.unsafe_shell('logger -p daemon.info -t "async: mesh upgrade" "already started an upgrade process"')
utils.log("already started an upgrade process")
end
end
]]--

0 comments on commit 1f5cc43

Please sign in to comment.