diff --git a/.github/workflows/build_push_concheck.yaml b/.github/workflows/build_push_concheck.yaml index c07ac86f..9978102b 100644 --- a/.github/workflows/build_push_concheck.yaml +++ b/.github/workflows/build_push_concheck.yaml @@ -4,12 +4,12 @@ on: push: branches: - main - - v1.2.1 + - v1.2.2 paths: - connection-check/** env: - IMAGE_VERSION: '1.2.1' + IMAGE_VERSION: '1.2.2' IMAGE_REGISTRY: ghcr.io/${{ github.repository_owner }} jobs: diff --git a/.github/workflows/build_push_controller.yaml b/.github/workflows/build_push_controller.yaml index 21feb6a7..b3b53d72 100644 --- a/.github/workflows/build_push_controller.yaml +++ b/.github/workflows/build_push_controller.yaml @@ -4,7 +4,7 @@ on: push: branches: - main - - v1.2.1 + - v1.2.2 paths: - controllers/** - compute/** @@ -17,7 +17,7 @@ on: - ./Makefile env: - VERSION: '1.2.1' + VERSION: '1.2.2' IMAGE_REGISTRY: ghcr.io/${{ github.repository_owner }} DAEMON_REGISTRY: ghcr.io/${{ github.repository_owner }} diff --git a/.github/workflows/build_push_daemon.yaml b/.github/workflows/build_push_daemon.yaml index adc0ca11..9e1f9458 100644 --- a/.github/workflows/build_push_daemon.yaml +++ b/.github/workflows/build_push_daemon.yaml @@ -4,14 +4,14 @@ on: push: branches: - main - - v1.2.1 + - v1.2.2 paths: - daemon/** - cni/** - Makefile env: - IMAGE_VERSION: '1.2.1' + IMAGE_VERSION: '1.2.2' DAEMON_REGISTRY: ghcr.io/${{ github.repository_owner }} jobs: diff --git a/Makefile b/Makefile index 48c13066..bd86a36b 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,7 @@ endif # - use the VERSION as arg of the bundle target (e.g make bundle VERSION=0.0.2) # - use environment variables to overwrite this value (e.g export VERSION=0.0.2) # VERSION ?= 0.0.1 -VERSION ?= 1.2.1 +VERSION ?= 1.2.2 export CHANNELS = "alpha" # CHANNELS define the bundle channels used in the bundle. diff --git a/README.md b/README.md index 5733b0f1..9e9409b4 100644 --- a/README.md +++ b/README.md @@ -119,7 +119,7 @@ Recommended to deploy in the same default namespace for [health check service](. ``` ##### by bundle with operator-sdk ```bash - operator-sdk run bundle ghcr.io/foundation-model-stack/multi-nic-cni-bundle:v1.2.1 -n multi-nic-cni-operator + operator-sdk run bundle ghcr.io/foundation-model-stack/multi-nic-cni-bundle:v1.2.2 -n multi-nic-cni-operator ``` #### Deploy MultiNicNetwork resource 1. Prepare `network.yaml` as shown in the [example](#multinicnetwork) diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 481d6cb6..3f9630b9 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -13,4 +13,4 @@ kind: Kustomization images: - name: controller newName: ghcr.io/foundation-model-stack/multi-nic-cni-controller - newTag: v1.2.1 + newTag: v1.2.2 diff --git a/config/samples/config.yaml b/config/samples/config.yaml index 5625faa2..699d18f8 100644 --- a/config/samples/config.yaml +++ b/config/samples/config.yaml @@ -11,7 +11,7 @@ spec: value: "11000" - name: RT_TABLE_PATH value: /opt/rt_tables - image: ghcr.io/foundation-model-stack/multi-nic-cni-daemon:v1.2.1 + image: ghcr.io/foundation-model-stack/multi-nic-cni-daemon:v1.2.2 imagePullPolicy: Always mounts: - hostpath: /var/lib/cni/bin diff --git a/config/samples/kustomization.yaml b/config/samples/kustomization.yaml index 89c79434..4a933746 100644 --- a/config/samples/kustomization.yaml +++ b/config/samples/kustomization.yaml @@ -15,4 +15,4 @@ kind: Kustomization images: - name: multi-nic-cni-daemon newName: ghcr.io/foundation-model-stack/multi-nic-cni-daemon - newTag: v1.2.1 + newTag: v1.2.2 diff --git a/connection-check/concheck.yaml b/connection-check/concheck.yaml index 07f35691..979fd3e3 100644 --- a/connection-check/concheck.yaml +++ b/connection-check/concheck.yaml @@ -71,7 +71,7 @@ spec: serviceAccountName: multi-nic-concheck-account containers: - name: concheck - image: ghcr.io/foundation-model-stack/multi-nic-cni-concheck:v1.2.1 + image: ghcr.io/foundation-model-stack/multi-nic-cni-concheck:v1.2.2 imagePullPolicy: Always securityContext: privileged: true diff --git a/controllers/vars/vars.go b/controllers/vars/vars.go index 9cf2ea18..6ae6a701 100644 --- a/controllers/vars/vars.go +++ b/controllers/vars/vars.go @@ -39,7 +39,7 @@ const ( DefaultOperatorNamespace = "multi-nic-cni-operator" DefaultCNIType = "multi-nic" DefaultIPAMType = "multi-nic-ipam" - DefaultDaemonImage = "ghcr.io/foundation-model-stack/multi-nic-cni-daemon:v1.2.1" + DefaultDaemonImage = "ghcr.io/foundation-model-stack/multi-nic-cni-daemon:v1.2.2" DefaultJoinPath = "/join" DefaultInterfacePath = "/interface" DefaultAddRoutePath = "/addl3" diff --git a/daemon/Makefile b/daemon/Makefile index 85733a8c..1de05681 100644 --- a/daemon/Makefile +++ b/daemon/Makefile @@ -6,7 +6,7 @@ export DAEMON_REGISTRY ?= ghcr.io/foundation-model-stack # DAEMON_IMG defines the image:tag used for daemon IMAGE_TAG_BASE = $(DAEMON_REGISTRY)/multi-nic-cni -IMAGE_VERSION ?= 1.2.1 +IMAGE_VERSION ?= 1.2.2 DAEMON_IMG ?= $(IMAGE_TAG_BASE)-daemon:v$(IMAGE_VERSION) diff --git a/daemon/src/iface/iface.go b/daemon/src/iface/iface.go index c4861262..f49a543a 100644 --- a/daemon/src/iface/iface.go +++ b/daemon/src/iface/iface.go @@ -164,3 +164,28 @@ func GetInterfaces() []backend.InterfaceInfoType { } return interfaces } + +func getNetAddressFromDevice(devName string) (string, error) { + devLink, err := netlink.LinkByName(devName) + if err != nil { + log.Printf("cannot find link %s: %v", devName, err) + return "", err + } + addrs, err := netlink.AddrList(devLink, netlink.FAMILY_V4) + if err != nil || len(addrs) == 0 { + log.Printf("cannot list address on %s: %v", devName, err) + return "", err + } + addr := addrs[0].IPNet + if addr == nil { + log.Printf("no address set on %s", devName) + return "", err + } + if devLink.Attrs().Flags&net.FlagUp == 0 { + // interface down + log.Printf("%s down", devName) + return "", err + } + netAddress := getNetAddress(addr) + return netAddress, nil +} diff --git a/daemon/src/iface/pci.go b/daemon/src/iface/pci.go index 295dae78..d2fe4c69 100644 --- a/daemon/src/iface/pci.go +++ b/daemon/src/iface/pci.go @@ -26,6 +26,8 @@ const ( var CheckPointfile string = "/var/lib/kubelet/device-plugins/kubelet_internal_checkpoint" +var deviceMapCache = InitSafeCache() + // modify from https://github.com/k8snetworkplumbingwg/multus-cni/blob/9b45d4b211728aa0db44a1624aac8e61843390cf/pkg/checkpoint/checkpoint.go#L72 // DeviceIDs can map[string]string or []string type PodDevicesEntry struct { @@ -121,10 +123,32 @@ func GetDeviceMap(resourceMap map[string][]string, resourceName string) map[stri if deviceIDs, exist := resourceMap[resourceName]; exist { for _, deviceID := range deviceIDs { - masterName, err := GetPfName(deviceID) - if err == nil { - if netAddress, exist := nameNetMap[masterName]; exist { + var masterName string + deviceNameInterface := deviceMapCache.GetCache(deviceID) + if deviceNameInterface != nil { + masterName = deviceNameInterface.(string) + } else { + var err error + masterName, err = GetPfName(deviceID) + if err != nil { + log.Printf("cannot get physical device %s: %v\n", deviceID, err) + } else { + log.Printf("set deviceMapCache %s=%s\n", deviceID, masterName) + deviceMapCache.SetCache(deviceID, masterName) + } + } + if netAddress, exist := nameNetMap[masterName]; exist { + deviceMap[netAddress] = deviceID + } else { + netAddress, err := getNetAddressFromDevice(masterName) + if err != nil { + log.Printf("cannot get network address of device %s: %v\n", masterName, err) + } else { deviceMap[netAddress] = deviceID + // found new device, update interfaces + GetInterfaces() + nameNetMap = GetNameNetMap() + log.Printf("updated nameNetMap map: %v\n", nameNetMap) } } } diff --git a/daemon/src/selector/selector.go b/daemon/src/selector/selector.go index d66903a8..e5016044 100644 --- a/daemon/src/selector/selector.go +++ b/daemon/src/selector/selector.go @@ -71,8 +71,6 @@ func getDefaultResponse(req NICSelectRequest, masterNameMap map[string]string, n } func Select(req NICSelectRequest) NICSelectResponse { - masterNameMap := iface.GetInterfaceNameMap() - nameNetMap := iface.GetNameNetMap() deviceMap := make(map[string]string) resourceMap := make(map[string][]string) @@ -93,6 +91,8 @@ func Select(req NICSelectRequest) NICSelectResponse { log.Printf("Cannot get pod: %v\n", err) } + masterNameMap := iface.GetInterfaceNameMap() + nameNetMap := iface.GetNameNetMap() netSpec, err := MultinicnetHandler.Get(req.NetAttachDefName, req.PodNamespace) if err != nil { return getDefaultResponse(req, masterNameMap, nameNetMap, deviceMap, resourceMap)