Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nftables implementation #1881

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions Documentation/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ The value of the config is a JSON dictionary with the following keys:
* `EnableIPv6` (bool): Enables ipv6 support
Defaults to `false`

* `EnableNFTables` (bool): (EXPERIMENTAL) If set to true, flannel uses nftables instead of iptables to masquerade the traffic.
Default to `false`

* `SubnetLen` (integer): The size of the subnet allocated to each host.
Defaults to 24 (i.e. /24) unless `Network` was configured to be smaller than a /22 in which case it is two less than the network.

Expand Down Expand Up @@ -128,3 +131,22 @@ FLANNEL_IPMASQ=true
## IPv6 only
To use an IPv6-only environment use the same configuration of the Dual-stack section to enable IPv6 and add "EnableIPv4": false in the net-conf.json of the kube-flannel-cfg ConfigMap. In case of IPv6-only setup, please use the docker.io IPv6-only endpoint as described in the following link: https://www.docker.com/blog/beta-ipv6-support-on-docker-hub-registry/
## nftables mode
To enable `nftables` mode in flannel, set `EnableNFTables` to true in flannel configuration.
Note: to test with kube-proxy, use kubeadm with the following configuration:
```yaml
apiVersion: kubeadm.k8s.io/v1beta3
kind: ClusterConfiguration
kubernetesVersion: v1.29.0
controllerManager:
extraArgs:
feature-gates: NFTablesProxyMode=true
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
mode: "nftables"
featureGates:
NFTablesProxyMode: true
```
1 change: 1 addition & 0 deletions Documentation/kube-flannel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ data:
net-conf.json: |
{
"Network": "10.244.0.0/16",
"EnableNFTables": false,
"Backend": {
"Type": "vxlan"
}
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ QEMU_VERSION=v3.0.0
BASH_UNIT_VERSION=v2.3.0

# Default tag and architecture. Can be overridden
TAG?=$(shell git describe --tags --always)
TAG?=$(shell git describe --tags --dirty --always)
thomasferrandiz marked this conversation as resolved.
Show resolved Hide resolved
ARCH?=amd64
# Only enable CGO (and build the UDP backend) on AMD64
ifeq ($(ARCH),amd64)
Expand Down
1 change: 1 addition & 0 deletions e2e/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ RUN set -x \
curl \
tar gzip\
iptables \
nftables \
iproute2 \
iputils \
&& if [ "${ARCH?required}" != "amd64" ]; then \
Expand Down
123 changes: 120 additions & 3 deletions e2e/run-e2e-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,12 @@ EOF

write-flannel-conf(){
local backend=$1
local enable_nftables=$2
cp ../Documentation/kube-flannel.yml ./kube-flannel.yml
yq -i 'select(.kind == "DaemonSet").spec.template.spec.containers[0].image |= strenv(FLANNEL_IMAGE)' ./kube-flannel.yml
yq -i 'select(.kind == "DaemonSet").spec.template.spec.initContainers[1].image |= strenv(FLANNEL_IMAGE)' ./kube-flannel.yml

export flannel_conf="{ \"Network\": \"$FLANNEL_NET\", \"Backend\": { \"Type\": \"${backend}\" } }"
export flannel_conf="{ \"Network\": \"$FLANNEL_NET\", \"Backend\": { \"Type\": \"${backend}\" }, \"EnableNFTables\": ${enable_nftables} }"

yq -i 'select(.metadata.name == "kube-flannel-cfg").data."net-conf.json" |= strenv(flannel_conf)' ./kube-flannel.yml

Expand All @@ -55,10 +56,11 @@ write-flannel-conf(){
# This is not used at the moment since github runners don't support dual-stack networking
write-flannel-conf-dual-stack(){
local backend=$1
local enable_nftables=$2
cp ../Documentation/kube-flannel.yml ./kube-flannel.yml
yq -i 'select(.kind == "DaemonSet").spec.template.spec.containers[0].image |= strenv(FLANNEL_IMAGE)' ./kube-flannel.yml

export flannel_conf="{ \"EnableIPv6\": true, \"Network\": \"$FLANNEL_NET\", \"IPv6Network\":\"${FLANNEL_IP6NET}\", \"Backend\": { \"Type\": \"${backend}\" } }"
export flannel_conf="{ \"EnableIPv6\": true, \"Network\": \"$FLANNEL_NET\", \"IPv6Network\":\"${FLANNEL_IP6NET}\", \"Backend\": { \"Type\": \"${backend}\" }, \"EnableNFTables\": ${enable_nftables} }"

yq -i 'select(.metadata.name == "kube-flannel-cfg").data."net-conf.json" |= strenv(flannel_conf)' ./kube-flannel.yml
}
Expand All @@ -67,6 +69,10 @@ install-flannel() {
kubectl --kubeconfig="${HOME}/.kube/config" apply -f ./kube-flannel.yml
}

delete-flannel() {
kubectl --kubeconfig="${HOME}/.kube/config" delete -f ./kube-flannel.yml
}

get_pod_ip() {
local pod_name=$1
kubectl --kubeconfig="${HOME}/.kube/config" get pod ${pod_name} --template '{{.status.podIP}}'
Expand Down Expand Up @@ -125,8 +131,9 @@ perf() {

prepare_test() {
local backend=$1
local enable_nftables=${2:-false}
# install flannel version to test
write-flannel-conf ${backend}
write-flannel-conf ${backend} ${enable_nftables}

install-flannel
# wait for nodes to be ready
Expand All @@ -150,32 +157,50 @@ test_vxlan() {
prepare_test vxlan
pings
check_iptables
delete-flannel
check_iptables_removed
}

test_vxlan_nft() {
prepare_test vxlan true
pings
check_nftables
delete-flannel
check_nftables_removed
}

test_wireguard() {
prepare_test wireguard
pings
check_iptables
delete-flannel
check_iptables_removed
}

test_host-gw() {
prepare_test host-gw
pings
check_iptables
delete-flannel
check_iptables_removed
}

if [[ ${ARCH} == "amd64" ]]; then
test_udp() {
prepare_test udp
pings
check_iptables
delete-flannel
check_iptables_removed
}
fi

test_ipip() {
prepare_test ipip
pings
check_iptables
delete-flannel
check_iptables_removed
}

test_perf_vxlan() {
Expand Down Expand Up @@ -260,3 +285,95 @@ $(docker exec --privileged local-worker /usr/sbin/iptables -t filter -S FLANNEL-
"$(docker exec --privileged local-leader /usr/sbin/iptables -t filter -S FORWARD)
$(docker exec --privileged local-leader /usr/sbin/iptables -t filter -S FLANNEL-FWD)" "Host 2 has not expected forward rules"
}

check_iptables_removed() {
local worker_podcidr=$(get_pod_cidr local-worker)
local leader_pod_cidr=$(get_pod_cidr local-leader)
read -r -d '' POSTROUTING_RULES_WORKER << EOM
-N FLANNEL-POSTRTG
EOM
read -r -d '' POSTROUTING_RULES_LEADER << EOM
-N FLANNEL-POSTRTG
EOM
read -r -d '' FORWARD_RULES << EOM
-P FORWARD ACCEPT
-A FORWARD -m conntrack --ctstate NEW -m comment --comment "kubernetes load balancer firewall" -j KUBE-PROXY-FIREWALL
-A FORWARD -m comment --comment "kubernetes forwarding rules" -j KUBE-FORWARD
-A FORWARD -m conntrack --ctstate NEW -m comment --comment "kubernetes service portals" -j KUBE-SERVICES
-A FORWARD -m conntrack --ctstate NEW -m comment --comment "kubernetes externally-visible service portals" -j KUBE-EXTERNAL-SERVICES
-N FLANNEL-FWD
EOM
# check that masquerade & forward rules have been removed
assert_equals "$POSTROUTING_RULES_WORKER" \
"$(docker exec --privileged local-worker /usr/sbin/iptables -t nat -S POSTROUTING | grep FLANNEL)$(docker exec --privileged local-worker /usr/sbin/iptables -t nat -S FLANNEL-POSTRTG)" "Host 1 has not expected postrouting rules"
assert_equals "$POSTROUTING_RULES_LEADER" \
"$(docker exec --privileged local-leader /usr/sbin/iptables -t nat -S POSTROUTING | grep FLANNEL)$(docker exec --privileged local-leader /usr/sbin/iptables -t nat -S FLANNEL-POSTRTG)" "Host 2 has not expected postrouting rules"
assert_equals "$FORWARD_RULES" \
"$(docker exec --privileged local-worker /usr/sbin/iptables -t filter -S FORWARD)
$(docker exec --privileged local-worker /usr/sbin/iptables -t filter -S FLANNEL-FWD -w 5)" "Host 1 has not expected forward rules"
assert_equals "$FORWARD_RULES" \
"$(docker exec --privileged local-leader /usr/sbin/iptables -t filter -S FORWARD)
$(docker exec --privileged local-leader /usr/sbin/iptables -t filter -S FLANNEL-FWD)" "Host 2 has not expected forward rules"
}

###nftables
check_nftables() {
local worker_podcidr=$(get_pod_cidr local-worker)
local leader_podcidr=$(get_pod_cidr local-leader)
read -d '' POSTROUTING_RULES_WORKER << EOM
table ip flannel-ipv4 {
chain postrtg {
type nat hook postrouting priority srcnat; policy accept;
meta mark 0x00004000 return
ip saddr ${worker_podcidr} ip daddr 10.42.0.0/16 return
ip saddr 10.42.0.0/16 ip daddr ${worker_podcidr} return
ip saddr != ${worker_podcidr} ip daddr 10.42.0.0/16 return
ip saddr 10.42.0.0/16 ip daddr != 224.0.0.0/4 masquerade fully-random
ip saddr != 10.42.0.0/16 ip daddr 10.42.0.0/16 masquerade fully-random
}
}
EOM
read -r -d '' POSTROUTING_RULES_LEADER << EOM
table ip flannel-ipv4 {
chain postrtg {
type nat hook postrouting priority srcnat; policy accept;
meta mark 0x00004000 return
ip saddr ${leader_podcidr} ip daddr 10.42.0.0/16 return
ip saddr 10.42.0.0/16 ip daddr ${leader_podcidr} return
ip saddr != ${leader_podcidr} ip daddr 10.42.0.0/16 return
ip saddr 10.42.0.0/16 ip daddr != 224.0.0.0/4 masquerade fully-random
ip saddr != 10.42.0.0/16 ip daddr 10.42.0.0/16 masquerade fully-random
}
}
EOM
read -r -d '' FORWARD_RULES << EOM
table ip flannel-ipv4 {
chain forward {
type filter hook forward priority filter; policy accept;
ip saddr 10.42.0.0/16 accept
ip daddr 10.42.0.0/16 accept
}
}
EOM
# check masquerade & forward rules
assert_equals "$POSTROUTING_RULES_WORKER" \
"$(docker exec --privileged local-worker /usr/sbin/nft list chain flannel-ipv4 postrtg)" "Node worker does not have expected postrouting rules"
assert_equals "$POSTROUTING_RULES_LEADER" \
"$(docker exec --privileged local-leader /usr/sbin/nft list chain flannel-ipv4 postrtg)" "Node leader does not have expected postrouting rules"
assert_equals "$FORWARD_RULES" \
"$(docker exec --privileged local-worker /usr/sbin/nft list chain flannel-ipv4 forward)" "Node worker does not have expected forward rules"
assert_equals "$FORWARD_RULES" \
"$(docker exec --privileged local-leader /usr/sbin/nft list chain flannel-ipv4 forward)" "Node leader does not have expected forward rules"
}

check_nftables_removed() {
# check masquerade & forward rules
assert_equals "" \
"$(docker exec --privileged local-worker /usr/sbin/nft list chain flannel-ipv4 postrtg)" "Node worker has unexpected postrouting rules"
assert_equals "" \
"$(docker exec --privileged local-leader /usr/sbin/nft list chain flannel-ipv4 postrtg)" "Node leader has unexpected postrouting rules"
assert_equals "" \
"$(docker exec --privileged local-worker /usr/sbin/nft list chain flannel-ipv4 forward)" "Node worker has unexpected forward rules"
assert_equals "" \
"$(docker exec --privileged local-leader /usr/sbin/nft list chain flannel-ipv4 forward)" "Node leader has unexpected forward rules"
}
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ require (
github.com/avast/retry-go/v4 v4.5.1
github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common v1.0.872
github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/vpc v1.0.872
sigs.k8s.io/knftables v0.0.14
)

require (
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/lithammer/dedent v1.1.0 h1:VNzHMVCBNG1j0fh3OrsFRkVUwStdDArbgBWoPAffktY=
github.com/lithammer/dedent v1.1.0/go.mod h1:jrXYCQtgg0nJiN+StA2KgR7w6CiQNv9Fd/Z9BP0jIOc=
github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
github.com/mailru/easyjson v0.7.6 h1:8yTIVnZgCoiM1TgqoeTl+LfU5Jg6/xL3QhGQnimLYnA=
Expand Down Expand Up @@ -781,6 +783,8 @@ rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0=
rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA=
sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2 h1:iXTIw73aPyC+oRdyqqvVJuloN1p0AC/kzH07hu3NE+k=
sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0=
sigs.k8s.io/knftables v0.0.14 h1:VzKQoDMCGBOH8c85sGrWSXSPCS0XrIpEfOlcCLBXiC0=
sigs.k8s.io/knftables v0.0.14/go.mod h1:f/5ZLKYEUPUhVjUCg6l80ACdL7CIIyeL0DxfgojGRTk=
sigs.k8s.io/structured-merge-diff/v4 v4.2.3 h1:PRbqxJClWWYMNV1dhaG4NsibJbArud9kFxnAMREiWFE=
sigs.k8s.io/structured-merge-diff/v4 v4.2.3/go.mod h1:qjx8mGObPmV2aSZepjQjbmb2ihdVs8cGKBraizNC69E=
sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo=
Expand Down
2 changes: 1 addition & 1 deletion images/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ RUN export GOOS=$(xx-info os) &&\

FROM alpine:20240315
RUN apk update && apk upgrade
RUN apk add --no-cache iproute2 ca-certificates iptables strongswan iptables-legacy && update-ca-certificates
RUN apk add --no-cache iproute2 ca-certificates nftables iptables strongswan iptables-legacy && update-ca-certificates
RUN apk add wireguard-tools --no-cache --repository http://dl-cdn.alpinelinux.org/alpine/edge/community
COPY --from=build /build/dist/flanneld /opt/bin/flanneld
COPY dist/mk-docker-opts.sh /opt/bin/
Expand Down
27 changes: 22 additions & 5 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
"github.com/flannel-io/flannel/pkg/subnet/kube"
"github.com/flannel-io/flannel/pkg/trafficmngr"
"github.com/flannel-io/flannel/pkg/trafficmngr/iptables"
"github.com/flannel-io/flannel/pkg/trafficmngr/nftables"
"github.com/flannel-io/flannel/pkg/version"
"golang.org/x/net/context"
log "k8s.io/klog/v2"
Expand Down Expand Up @@ -336,7 +337,15 @@ func main() {
}

//Create TrafficManager and instantiate it based on whether we use iptables or nftables
trafficMngr := newTrafficManager()
trafficMngr := newTrafficManager(config.EnableNFTables)
err = trafficMngr.Init(ctx, &wg)
thomasferrandiz marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
log.Error(err)
cancel()
wg.Wait()
os.Exit(1)
}

flannelIPv4Net := ip.IP4Net{}
flannelIpv6Net := ip.IP6Net{}
if config.EnableIPv4 {
Expand Down Expand Up @@ -365,7 +374,8 @@ func main() {
prevIPv6Networks := ReadIP6CIDRsFromSubnetFile(opts.subnetFile, "FLANNEL_IPV6_NETWORK")
prevIPv6Subnet := ReadIP6CIDRFromSubnetFile(opts.subnetFile, "FLANNEL_IPV6_SUBNET")

err = trafficMngr.SetupAndEnsureMasqRules(flannelIPv4Net, prevSubnet,
err = trafficMngr.SetupAndEnsureMasqRules(ctx,
flannelIPv4Net, prevSubnet,
prevNetworks,
flannelIpv6Net, prevIPv6Subnet,
prevIPv6Networks,
Expand All @@ -383,7 +393,7 @@ func main() {
// In Docker 1.12 and earlier, the default FORWARD chain policy was ACCEPT.
// In Docker 1.13 and later, Docker sets the default policy of the FORWARD chain to DROP.
if opts.iptablesForwardRules {
trafficMngr.SetupAndEnsureForwardRules(
trafficMngr.SetupAndEnsureForwardRules(ctx,
flannelIPv4Net,
flannelIpv6Net,
opts.iptablesResyncSeconds)
Expand Down Expand Up @@ -569,6 +579,13 @@ func ReadIP6CIDRsFromSubnetFile(path string, CIDRKey string) []ip.IP6Net {
return prevCIDRs
}

func newTrafficManager() trafficmngr.TrafficManager {
return iptables.IPTablesManager{}
func newTrafficManager(useNftables bool) trafficmngr.TrafficManager {
if useNftables {
log.Info("Starting flannel in nftables mode")
return &nftables.NFTablesManager{}
} else {
log.Info("Starting flannel in iptables mode")
return &iptables.IPTablesManager{}

}
}
29 changes: 15 additions & 14 deletions pkg/subnet/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,21 @@ import (
)

type Config struct {
EnableIPv4 bool
EnableIPv6 bool
Network ip.IP4Net
IPv6Network ip.IP6Net
Networks []ip.IP4Net
IPv6Networks []ip.IP6Net
SubnetMin ip.IP4
SubnetMax ip.IP4
IPv6SubnetMin *ip.IP6
IPv6SubnetMax *ip.IP6
SubnetLen uint
IPv6SubnetLen uint
BackendType string `json:"-"`
Backend json.RawMessage `json:",omitempty"`
EnableIPv4 bool
EnableIPv6 bool
EnableNFTables bool
Network ip.IP4Net
IPv6Network ip.IP6Net
Networks []ip.IP4Net
IPv6Networks []ip.IP6Net
SubnetMin ip.IP4
SubnetMax ip.IP4
IPv6SubnetMin *ip.IP6
IPv6SubnetMax *ip.IP6
SubnetLen uint
IPv6SubnetLen uint
BackendType string `json:"-"`
Backend json.RawMessage `json:",omitempty"`
}

func parseBackendType(be json.RawMessage) (string, error) {
Expand Down
Loading
Loading