Skip to content
This repository has been archived by the owner on Jun 7, 2024. It is now read-only.

Commit

Permalink
Merge pull request #973 from zalando/bug-1996
Browse files Browse the repository at this point in the history
Fix subscription disconnection because of race conditions
  • Loading branch information
Kunal-Jha authored Nov 1, 2018
2 parents 67e95a4 + d865d31 commit 02bf998
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@
import org.zalando.nakadi.domain.EventTypePartition;
import org.zalando.nakadi.exceptions.runtime.NakadiBaseException;
import org.zalando.nakadi.exceptions.runtime.NakadiRuntimeException;
import org.zalando.nakadi.exceptions.runtime.UnableProcessException;
import org.zalando.nakadi.exceptions.runtime.OperationInterruptedException;
import org.zalando.nakadi.exceptions.runtime.OperationTimeoutException;
import org.zalando.nakadi.exceptions.runtime.RequestInProgressException;
import org.zalando.nakadi.exceptions.runtime.ServiceTemporarilyUnavailableException;
import org.zalando.nakadi.exceptions.runtime.UnableProcessException;
import org.zalando.nakadi.exceptions.runtime.ZookeeperException;
import org.zalando.nakadi.service.subscription.model.Session;
import org.zalando.nakadi.view.SubscriptionCursorWithoutToken;
Expand Down Expand Up @@ -46,17 +46,17 @@
import static org.echocat.jomon.runtime.concurrent.Retryer.executeWithRetry;

public abstract class AbstractZkSubscriptionClient implements ZkSubscriptionClient {
public static final int SECONDS_TO_WAIT_FOR_LOCK = 15;
protected static final String NODE_TOPOLOGY = "/topology";
private static final String STATE_INITIALIZED = "INITIALIZED";
private static final int COMMIT_CONFLICT_RETRY_TIMES = 5;
protected static final String NODE_TOPOLOGY = "/topology";
public static final int SECONDS_TO_WAIT_FOR_LOCK = 15;
private static final int MAX_ZK_RESPONSE_SECONDS = 5;

private final String subscriptionId;
private final CuratorFramework curatorFramework;
private InterProcessSemaphoreMutex lock;
private final String resetCursorPath;
private final Logger log;
private InterProcessSemaphoreMutex lock;

public AbstractZkSubscriptionClient(
final String subscriptionId,
Expand Down Expand Up @@ -206,6 +206,8 @@ protected <K, V> Map<K, V> loadDataAsync(final Collection<K> keys,
synchronized (result) {
result.put(key, value);
}
} else if (event.getResultCode() == KeeperException.Code.NONODE.intValue()) {
getLog().warn("Unable to get {} data from zk. Node not found ", zkKey);
} else {
getLog().error(
"Failed to get {} data from zk. status code: {}",
Expand All @@ -229,35 +231,29 @@ protected <K, V> Map<K, V> loadDataAsync(final Collection<K> keys,
Thread.currentThread().interrupt();
throw new ServiceTemporarilyUnavailableException("Failed to wait for zk response", ex);
}
if (result.size() != keys.size()) {
throw new ServiceTemporarilyUnavailableException("Failed to wait for keys " +
keys.stream()
.filter(v -> !result.containsKey(v))
.map(String::valueOf)
.collect(Collectors.joining(", "))
+ " to be in response", null);
}
return result;
}

@Override
public final Collection<Session> listSessions()
throws SubscriptionNotInitializedException, NakadiRuntimeException, ServiceTemporarilyUnavailableException {
getLog().info("fetching sessions information");
final List<String> zkSessions;
try {
zkSessions = getCurator().getChildren().forPath(getSubscriptionPath("/sessions"));
} catch (final KeeperException.NoNodeException e) {
throw new SubscriptionNotInitializedException(getSubscriptionId());
} catch (Exception ex) {
throw new NakadiRuntimeException(ex);
for (int i = 0; i < 5; i++) {
try {
final List<String> sessions = getCurator().getChildren().forPath(getSubscriptionPath("/sessions"));
final Map <String,Session> result = loadDataAsync(sessions,
key -> getSubscriptionPath("/sessions/" + key),
this::deserializeSession);
if (result.size() == sessions.size()) {
return result.values();
}
} catch (final KeeperException.NoNodeException e) {
throw new SubscriptionNotInitializedException(getSubscriptionId());
} catch (Exception ex) {
throw new NakadiRuntimeException(ex);
}
}

return loadDataAsync(
zkSessions,
key -> getSubscriptionPath("/sessions/" + key),
this::deserializeSession
).values();
throw new ServiceTemporarilyUnavailableException("Failed to get all keys from ZK", null);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static com.google.common.base.Charsets.UTF_8;
Expand Down Expand Up @@ -170,8 +171,21 @@ protected String getOffsetPath(final EventTypePartition etp) {
public Map<EventTypePartition, SubscriptionCursorWithoutToken> getOffsets(
final Collection<EventTypePartition> keys)
throws NakadiRuntimeException, ServiceTemporarilyUnavailableException {
return loadDataAsync(keys, this::getOffsetPath, (etp, value) ->
new SubscriptionCursorWithoutToken(etp.getEventType(), etp.getPartition(), new String(value, UTF_8)));
final Map<EventTypePartition, SubscriptionCursorWithoutToken> offSets = loadDataAsync(keys,
this::getOffsetPath, (etp, value) ->
new SubscriptionCursorWithoutToken(etp.getEventType(), etp.getPartition(),
new String(value, UTF_8)));

if (offSets.size() != keys.size()) {
throw new ServiceTemporarilyUnavailableException("Failed to get all the keys " +
keys.stream()
.filter(v -> !offSets.containsKey(v))
.map(String::valueOf)
.collect(Collectors.joining(", "))
+ " from ZK.", null);
}

return offSets;
}

@Override
Expand Down

0 comments on commit 02bf998

Please sign in to comment.