This repository has been archived by the owner on Jun 4, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 225
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Port of knative-extensions/eventing-kafka#182 (#1672)
Set subscription readiness based on consumer group status (#182) * WIP: Set subscription readiness based on consumer group status * Refactor KafkaWatcher to an edge-triggered, level driven design * Refactor the test to prevent a map race * goimports and gofmt comments * Add tests for the KafkaWatcher and reconnect for AdminClient * Improve consumer group watcher code aethetics and robustness * Make variable names more descriptive for consumer group watcher and adimn client * Forget watcher when channel is deleted * Terminate old watcher and free the cache when config is created * Set the KafkaChannel consolidated dispatcher reconciler to be read-only * Enable HA for KafkaChannel(consolidated) dispatcher * Run update-codegen.sh * Optimize consumer group watcher * Use sets.String for internal cache instead of string slice * Allow for a single callback per watcher * Synchronization on Terminate and Forget
- Loading branch information
Showing
9 changed files
with
662 additions
and
62 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
/* | ||
Copyright 2020 The Knative Authors | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package kafka | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"math" | ||
"sync" | ||
"time" | ||
|
||
"go.uber.org/zap" | ||
"k8s.io/apimachinery/pkg/util/sets" | ||
|
||
"github.com/Shopify/sarama" | ||
"knative.dev/pkg/logging" | ||
) | ||
|
||
var mutex sync.Mutex | ||
|
||
type ClusterAdminFactory func() (sarama.ClusterAdmin, error) | ||
|
||
type AdminClient interface { | ||
// ListConsumerGroups Lists the consumer groups | ||
ListConsumerGroups() ([]string, error) | ||
} | ||
|
||
// AdminClientManager manages a ClusterAdmin connection and recreates one when needed | ||
// it is made to overcome https://github.com/Shopify/sarama/issues/1162 | ||
type AdminClientManager struct { | ||
logger *zap.SugaredLogger | ||
adminFactory ClusterAdminFactory | ||
clusterAdmin sarama.ClusterAdmin | ||
} | ||
|
||
func NewAdminClient(ctx context.Context, caFactory ClusterAdminFactory) (AdminClient, error) { | ||
logger := logging.FromContext(ctx) | ||
logger.Info("Creating a new AdminClient") | ||
kafkaClusterAdmin, err := caFactory() | ||
if err != nil { | ||
logger.Errorw("error while creating ClusterAdmin", zap.Error(err)) | ||
return nil, err | ||
} | ||
return &AdminClientManager{ | ||
logger: logger, | ||
adminFactory: caFactory, | ||
clusterAdmin: kafkaClusterAdmin, | ||
}, nil | ||
} | ||
|
||
// ListConsumerGroups Returns a list of the consumer groups. | ||
// | ||
// In the occasion of errors, there will be a retry with an exponential backoff. | ||
// Due to a known issue in Sarama ClusterAdmin https://github.com/Shopify/sarama/issues/1162, | ||
// a new ClusterAdmin will be created with every retry until the call succeeds or | ||
// the timeout is reached. | ||
func (c *AdminClientManager) ListConsumerGroups() ([]string, error) { | ||
c.logger.Info("Attempting to list consumer group") | ||
mutex.Lock() | ||
defer mutex.Unlock() | ||
r := 0 | ||
// This gives us around ~13min of exponential backoff | ||
max := 13 | ||
cgsMap, err := c.clusterAdmin.ListConsumerGroups() | ||
for err != nil && r <= max { | ||
// There's on error, let's retry and presume a new ClusterAdmin can fix it | ||
|
||
// Calculate incremental delay following this https://docs.aws.amazon.com/general/latest/gr/api-retries.html | ||
t := int(math.Pow(2, float64(r)) * 100) | ||
d := time.Duration(t) * time.Millisecond | ||
c.logger.Errorw("listing consumer group failed. Refreshing the ClusterAdmin and retrying.", | ||
zap.Error(err), | ||
zap.Duration("retry after", d), | ||
zap.Int("Retry attempt", r), | ||
zap.Int("Max retries", max), | ||
) | ||
time.Sleep(d) | ||
|
||
// let's reconnect and try again | ||
c.clusterAdmin, err = c.adminFactory() | ||
r += 1 | ||
if err != nil { | ||
// skip this attempt | ||
continue | ||
} | ||
cgsMap, err = c.clusterAdmin.ListConsumerGroups() | ||
} | ||
|
||
if r > max { | ||
return nil, fmt.Errorf("failed to refresh the culster admin and retry: %v", err) | ||
} | ||
|
||
return sets.StringKeySet(cgsMap).List(), nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
/* | ||
Copyright 2020 The Knative Authors | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package kafka | ||
|
||
import ( | ||
"fmt" | ||
"sync" | ||
"testing" | ||
"time" | ||
|
||
"github.com/Shopify/sarama" | ||
|
||
pkgtesting "knative.dev/pkg/logging/testing" | ||
) | ||
|
||
const testCG = "cg1" | ||
|
||
var m sync.RWMutex | ||
|
||
type FakeClusterAdmin struct { | ||
sarama.ClusterAdmin | ||
faulty bool | ||
} | ||
|
||
func (f *FakeClusterAdmin) ListConsumerGroups() (map[string]string, error) { | ||
cgs := map[string]string{ | ||
testCG: "cg", | ||
} | ||
m.RLock() | ||
defer m.RUnlock() | ||
if f.faulty { | ||
return nil, fmt.Errorf("Error") | ||
} | ||
return cgs, nil | ||
} | ||
|
||
func TestAdminClient(t *testing.T) { | ||
var wg sync.WaitGroup | ||
wg.Add(10) | ||
ctx := pkgtesting.TestContextWithLogger(t) | ||
f := &FakeClusterAdmin{} | ||
ac, err := NewAdminClient(ctx, func() (sarama.ClusterAdmin, error) { | ||
return f, nil | ||
}) | ||
if err != nil { | ||
t.Error("failed to obtain new client", err) | ||
} | ||
for i := 0; i < 10; i += 1 { | ||
go func() { | ||
doList(t, ac) | ||
check := make(chan struct{}) | ||
go func() { | ||
m.Lock() | ||
f.faulty = true | ||
m.Unlock() | ||
check <- struct{}{} | ||
time.Sleep(2 * time.Second) | ||
m.Lock() | ||
f.faulty = false | ||
m.Unlock() | ||
check <- struct{}{} | ||
}() | ||
<-check | ||
doList(t, ac) | ||
<-check | ||
wg.Done() | ||
}() | ||
} | ||
wg.Wait() | ||
} | ||
|
||
func doList(t *testing.T, ac AdminClient) { | ||
cgs, _ := ac.ListConsumerGroups() | ||
if len(cgs) != 1 { | ||
t.Fatalf("list consumer group: got %d, want %d", len(cgs), 1) | ||
} | ||
if cgs[0] != testCG { | ||
t.Fatalf("consumer group: got %s, want %s", cgs[0], testCG) | ||
} | ||
} |
Oops, something went wrong.