Skip to content

Commit

Permalink
fix(get_group0_members): catch exception for cql commands
Browse files Browse the repository at this point in the history
run_cqlsh command could failed during getting group0 members
and raised exception failed the nemesis thread during cluster
health validation.

Process exception raised in get_group0_members and publish
appropriate event
  • Loading branch information
aleksbykov authored and soyacz committed Apr 18, 2023
1 parent 3c70a84 commit ab08651
Showing 1 changed file with 55 additions and 48 deletions.
103 changes: 55 additions & 48 deletions sdcm/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -3042,56 +3042,63 @@ def get_token_ring_members(self) -> list[dict[str, str]]:
def get_group0_members(self) -> list[dict[str, str]]:
self.log.debug("Get group0 members")
group0_members = []
result = self.run_cqlsh("select value from system.scylla_local where key = 'raft_group0_id'",
split=True, num_retry_on_failure=3)
# run_cqlsh return splitted ouput if data was found:
# [
# ""
# "value"
# "----------"
# "<value> "
# ""
# "Rows ..."
# ]
#
# 4th element is needed only
#
# And if was not found (raft disbled):
# [
# ""
# "value"
# "-------"
# ""
# "(0 rows)"
# ]
if not result or len(result) <= 3:
return []
raft_group0_id = result[3].strip()
if not raft_group0_id or "0 rows" in raft_group0_id:
return []
try:

result = self.run_cqlsh(
f"select server_id, can_vote from system.raft_state where group_id = {raft_group0_id} and disposition = 'CURRENT'",
split=True)
# run_cqlsh return splitted ouput if data was found:
# [
# ""
# "server_id | can_vote"
# "----------"
# "<value1> | True"
# "<value2> | False"
# ""
# "Rows ..."
# ]
#
# Start parsing from 4th line
result = self.run_cqlsh("select value from system.scylla_local where key = 'raft_group0_id'",
split=True, num_retry_on_failure=3)
# run_cqlsh return splitted ouput if data was found:
# [
# ""
# "value"
# "----------"
# "<value> "
# ""
# "Rows ..."
# ]
#
# 4th element is needed only
#
# And if was not found (raft disbled):
# [
# ""
# "value"
# "-------"
# ""
# "(0 rows)"
# ]
if not result or len(result) <= 3:
return []
raft_group0_id = result[3].strip()
if not raft_group0_id or "0 rows" in raft_group0_id:
return []

result = self.run_cqlsh(
f"select server_id, can_vote from system.raft_state where group_id = {raft_group0_id} and disposition = 'CURRENT'",
split=True)
# run_cqlsh return splitted ouput if data was found:
# [
# ""
# "server_id | can_vote"
# "----------"
# "<value1> | True"
# "<value2> | False"
# ""
# "Rows ..."
# ]
#
# Start parsing from 4th line

for line in result[3:]:
member = line.split("|")
if not member or len(member) != 2:
break
group0_members.append({"host_id": member[0].strip(),
"voter": member[1].strip() == "True"})
except Exception as exc: # pylint: disable=broad-except
err_msg = f"Get group0 members failed with error: {exc}"
self.log.error(err_msg)
InfoEvent(message=err_msg, severity=Severity.ERROR).publish()

for line in result[3:]:
member = line.split("|")
if not member or len(member) != 2:
break
group0_members.append({"host_id": member[0].strip(),
"voter": member[1].strip() == "True"})
self.log.debug("Group0 members: %s", group0_members)
return group0_members

Expand Down

0 comments on commit ab08651

Please sign in to comment.