From 9de563c0b1d2f655e037993e1b9594f5070cd584 Mon Sep 17 00:00:00 2001 From: Lukas Sismis Date: Sun, 2 Jun 2024 13:17:28 +0200 Subject: [PATCH 1/5] wip: affiinity --- configure.ac | 2 +- src/util-affinity.c | 104 ++++++++++++++++++++++++++++++++++++++++++++ src/util-affinity.h | 2 + 3 files changed, 107 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index daad418208bc..6b1f6b8707f7 100644 --- a/configure.ac +++ b/configure.ac @@ -1413,7 +1413,7 @@ exit 1 fi CFLAGS="${CFLAGS} `pkg-config --cflags libdpdk`" - LIBS="${LIBS} -Wl,-R,`pkg-config --libs-only-L libdpdk | cut -c 3-` -lnuma `pkg-config --libs libdpdk`" + LIBS="${LIBS} -Wl,-R,`pkg-config --libs-only-L libdpdk | cut -c 3-` -lnuma `pkg-config --libs libdpdk` -lhwloc" if test ! -z "$(ldconfig -p | grep librte_net_bond)"; then AC_DEFINE([HAVE_DPDK_BOND],[1],(DPDK Bond PMD support enabled)) diff --git a/src/util-affinity.c b/src/util-affinity.c index 06256db5b8cd..736d26639dfa 100644 --- a/src/util-affinity.c +++ b/src/util-affinity.c @@ -307,6 +307,110 @@ uint16_t AffinityGetNextCPU(ThreadsAffinityType *taf) return ncpu; } +static hwloc_topology_t topology; + +// Function to get the NUMA node of a network device using hwloc +int get_numa_node_for_net_device(hwloc_topology_t topology, const char *net_device) { + hwloc_obj_t obj = NULL; + char sysfs_path[256]; + char pci_address[256]; + FILE *fp; + + // Get the PCI address of the network device + snprintf(sysfs_path, sizeof(sysfs_path), "/sys/class/net/%s/device/uevent", net_device); + fp = fopen(sysfs_path, "r"); + if (!fp) { + perror("fopen"); + return -1; + } + + while (fgets(pci_address, sizeof(pci_address), fp)) { + if (strncmp(pci_address, "PCI_SLOT_NAME=", 14) == 0) { + strcpy(pci_address, pci_address + 14); + pci_address[strcspn(pci_address, "\n")] = '\0'; // Remove newline character + break; + } + } + fclose(fp); + + if (strlen(pci_address) == 0) { + fprintf(stderr, "Failed to get PCI address for device %s\n", net_device); + return -1; + } + + // Iterate through PCI devices to find the matching one + while ((obj = hwloc_get_next_pcidev(topology, obj)) != NULL) { + char pci_name[128]; + hwloc_obj_snprintf(pci_name, sizeof(pci_name), topology, obj, "#", 0); + + if (strcmp(pci_name, pci_address) == 0) { + // Find the NUMA node associated with this PCI device + hwloc_obj_t parent = obj; + while (parent && parent->type != HWLOC_OBJ_NUMANODE) { + parent = parent->parent; + } + + if (parent) { + return parent->logical_index; + } else { + fprintf(stderr, "No NUMA node found for device %s\n", net_device); + return -1; + } + } + } + + fprintf(stderr, "No matching PCI device found for %s\n", net_device); + return -1; +} + +uint16_t AffinityGetNextCPUFromNUMANode(ThreadsAffinityType *taf, int numa_node) { + uint16_t ncpu = 0; +#if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun + int iter = 0; + SCMutexLock(&taf->taf_mutex); + ncpu = taf->lcpu; + + // Check for CPUs within the preferred NUMA node first + while (!CPU_ISSET(ncpu, &taf->cpu_set) || hwloc_get_obj_by_os_index(topology, HWLOC_OBJ_PU, ncpu)->nodeset->first != numa_node) { + ncpu++; + if (ncpu >= UtilCpuGetNumProcessorsOnline()) { + ncpu = 0; + iter++; + } + if (iter >= 2) { + break; + } + } + + if (iter == 2) { + // Fallback to any available CPU if no CPU found within the preferred NUMA node + ncpu = taf->lcpu; + while (!CPU_ISSET(ncpu, &taf->cpu_set) && iter < 2) { + ncpu++; + if (ncpu >= UtilCpuGetNumProcessorsOnline()) { + ncpu = 0; + iter++; + } + } + if (iter == 2) { + SCLogError("cpu_set does not contain " + "available cpus, cpu affinity conf is invalid"); + } + } + + taf->lcpu = ncpu + 1; + if (taf->lcpu >= UtilCpuGetNumProcessorsOnline()) + taf->lcpu = 0; + SCMutexUnlock(&taf->taf_mutex); + SCLogDebug("Setting affinity on CPU %d", ncpu); +#endif /* OS_WIN32 and __OpenBSD__ */ + return ncpu; +} + +/** + * \brief Return the total number of CPUs in a given affinity + * \retval the number of affined CPUs + */ uint16_t UtilAffinityGetAffinedCPUNum(ThreadsAffinityType *taf) { uint16_t ncpu = 0; diff --git a/src/util-affinity.h b/src/util-affinity.h index 2fa4509ffa2c..9ea75fa3713f 100644 --- a/src/util-affinity.h +++ b/src/util-affinity.h @@ -27,6 +27,8 @@ #include "conf.h" #include "threads.h" +#include + #if defined OS_FREEBSD #include #include From 0facffbaa4ff9fa52aa3bd9a7b1fbe7439c9d381 Mon Sep 17 00:00:00 2001 From: Lukas Sismis Date: Sat, 8 Jun 2024 12:07:01 +0200 Subject: [PATCH 2/5] fixup! wip: affiinity --- src/util-affinity.c | 299 +++++++++++++++++++++++++++++++------------- 1 file changed, 211 insertions(+), 88 deletions(-) diff --git a/src/util-affinity.c b/src/util-affinity.c index 736d26639dfa..6bc0c6fae8a1 100644 --- a/src/util-affinity.c +++ b/src/util-affinity.c @@ -276,128 +276,206 @@ void AffinitySetupLoadFromConfig(void) #endif /* OS_WIN32 and __OpenBSD__ */ } -/** - * \brief Return next cpu to use for a given thread family - * \retval the cpu to used given by its id - */ -uint16_t AffinityGetNextCPU(ThreadsAffinityType *taf) -{ - uint16_t ncpu = 0; -#if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun - int iter = 0; - SCMutexLock(&taf->taf_mutex); - ncpu = taf->lcpu; - while (!CPU_ISSET(ncpu, &taf->cpu_set) && iter < 2) { - ncpu++; - if (ncpu >= UtilCpuGetNumProcessorsOnline()) { - ncpu = 0; - iter++; - } +static hwloc_topology_t topology = NULL; + +int DeviceGetNumaID(hwloc_topology_t topology, hwloc_obj_t obj) { + hwloc_obj_t numa_node = NULL; + while ((numa_node = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, numa_node)) != NULL) { + SCLogNotice("another numa"); } - if (iter == 2) { - SCLogError("cpu_set does not contain " - "available cpus, cpu affinity conf is invalid"); + + hwloc_obj_t parent = obj->parent; + while (parent) { + printf("Object type: %s\n", hwloc_obj_type_string(parent->type)); + if (parent->type == HWLOC_OBJ_NUMANODE) { + return parent->logical_index; + } + parent = parent->parent; } - taf->lcpu = ncpu + 1; - if (taf->lcpu >= UtilCpuGetNumProcessorsOnline()) - taf->lcpu = 0; - SCMutexUnlock(&taf->taf_mutex); - SCLogDebug("Setting affinity on CPU %d", ncpu); -#endif /* OS_WIN32 and __OpenBSD__ */ - return ncpu; -} -static hwloc_topology_t topology; + return -1; +} -// Function to get the NUMA node of a network device using hwloc -int get_numa_node_for_net_device(hwloc_topology_t topology, const char *net_device) { - hwloc_obj_t obj = NULL; - char sysfs_path[256]; - char pci_address[256]; - FILE *fp; - - // Get the PCI address of the network device - snprintf(sysfs_path, sizeof(sysfs_path), "/sys/class/net/%s/device/uevent", net_device); - fp = fopen(sysfs_path, "r"); - if (!fp) { - perror("fopen"); - return -1; +// Static function to find the NUMA node of a given hwloc object +static hwloc_obj_t find_numa_node(hwloc_topology_t topology, hwloc_obj_t obj) { + if (!obj) { + fprintf(stderr, "Invalid hwloc object.\n"); + return NULL; } - while (fgets(pci_address, sizeof(pci_address), fp)) { - if (strncmp(pci_address, "PCI_SLOT_NAME=", 14) == 0) { - strcpy(pci_address, pci_address + 14); - pci_address[strcspn(pci_address, "\n")] = '\0'; // Remove newline character + hwloc_obj_t parent = obj->parent; + while (parent) { + printf("Object type: %s\n", hwloc_obj_type_string(parent->type)); + if (parent->type == HWLOC_PACKAGE || parent->type == HWLOC_NUMANODE) { break; } + parent = parent->parent; } - fclose(fp); - if (strlen(pci_address) == 0) { - fprintf(stderr, "Failed to get PCI address for device %s\n", net_device); - return -1; + if (parent == NULL) { + fprintf(stderr, "No parent found for the given object.\n"); + return NULL; } - // Iterate through PCI devices to find the matching one - while ((obj = hwloc_get_next_pcidev(topology, obj)) != NULL) { - char pci_name[128]; - hwloc_obj_snprintf(pci_name, sizeof(pci_name), topology, obj, "#", 0); + // Iterate over all NUMA nodes and check if they intersect with the given object + hwloc_obj_t numa_node = NULL; + while ((numa_node = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, numa_node)) != NULL) { + if (hwloc_bitmap_intersects(parent->cpuset, numa_node->cpuset)) { + return numa_node; + } + } + + return NULL; +} + +hwloc_obj_t find_pcie_address(hwloc_topology_t topology, const char *interface_name) { + hwloc_obj_t obj = NULL; - if (strcmp(pci_name, pci_address) == 0) { - // Find the NUMA node associated with this PCI device - hwloc_obj_t parent = obj; - while (parent && parent->type != HWLOC_OBJ_NUMANODE) { + while ((obj = hwloc_get_next_osdev(topology, obj)) != NULL) { + if (obj->attr->osdev.type == HWLOC_OBJ_OSDEV_NETWORK && strcmp(obj->name, interface_name) == 0) { + hwloc_obj_t parent = obj->parent; + while (parent) { + if (parent->type == HWLOC_OBJ_PCI_DEVICE) { + return parent; + } parent = parent->parent; } + } + } + return NULL; +} - if (parent) { - return parent->logical_index; - } else { - fprintf(stderr, "No NUMA node found for device %s\n", net_device); - return -1; - } +// Static function to deparse PCIe interface string name to individual components +static void deparse_pcie_address(const char *pcie_address, unsigned int *domain, unsigned int *bus, unsigned int *device, unsigned int *function) { + *domain = 0; // Default domain to 0 if not provided + + // Handle both full and short PCIe address formats + if (sscanf(pcie_address, "%x:%x:%x.%x", domain, bus, device, function) != 4) { + if (sscanf(pcie_address, "%x:%x.%x", bus, device, function) != 3) { + fprintf(stderr, "Error parsing PCIe address: %s\n", pcie_address); + exit(EXIT_FAILURE); } } +} - fprintf(stderr, "No matching PCI device found for %s\n", net_device); - return -1; +// Function to convert PCIe address to hwloc object +hwloc_obj_t get_hwloc_object_from_pcie_address(hwloc_topology_t topology, const char *pcie_address) { + hwloc_obj_t obj = NULL; + unsigned int domain, bus, device, function; + deparse_pcie_address(pcie_address, &domain, &bus, &device, &function); + while ((obj = hwloc_get_next_pcidev(topology, obj)) != NULL) { + if (obj->attr->pcidev.domain == domain && obj->attr->pcidev.bus == bus && obj->attr->pcidev.dev == device && obj->attr->pcidev.func == function) { + return obj; + } + } + return NULL; +} + +// Function to print hwloc object attributes +void print_hwloc_object(hwloc_obj_t obj) { + if (!obj) { + printf("No object found for the given PCIe address.\n"); + return; + } + + printf("Object type: %s\n", hwloc_obj_type_string(obj->type)); + printf("Logical index: %u\n", obj->logical_index); + printf("Depth: %u\n", obj->depth); + printf("Attributes:\n"); + if (obj->type == HWLOC_OBJ_PCI_DEVICE) { + printf(" Domain: %04x\n", obj->attr->pcidev.domain); + printf(" Bus: %02x\n", obj->attr->pcidev.bus); + printf(" Device: %02x\n", obj->attr->pcidev.dev); + printf(" Function: %01x\n", obj->attr->pcidev.func); + printf(" Class ID: %04x\n", obj->attr->pcidev.class_id); + printf(" Vendor ID: %04x\n", obj->attr->pcidev.vendor_id); + printf(" Device ID: %04x\n", obj->attr->pcidev.device_id); + printf(" Subvendor ID: %04x\n", obj->attr->pcidev.subvendor_id); + printf(" Subdevice ID: %04x\n", obj->attr->pcidev.subdevice_id); + printf(" Revision: %02x\n", obj->attr->pcidev.revision); + printf(" Link speed: %f GB/s\n", obj->attr->pcidev.linkspeed); + } else { + printf(" No PCI device attributes available.\n"); + } } -uint16_t AffinityGetNextCPUFromNUMANode(ThreadsAffinityType *taf, int numa_node) { + +/** + * \brief Return next cpu to use for a given thread family + * \retval the cpu to used given by its id + */ +uint16_t AffinityGetNextCPU(ThreadsAffinityType *taf) +{ + if (topology == NULL) { + if (hwloc_topology_init(&topology) == -1) { + FatalError("Failed to initialize topology"); + } + + // hwloc_topology_get_flags + + int ret = hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM); + ret = hwloc_topology_set_io_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_ALL); + if (ret == -1) { + FatalError("Failed to set topology flags"); + hwloc_topology_destroy(topology); + } + + if (hwloc_topology_load(topology) == -1) { + FatalError("Failed to load topology"); + hwloc_topology_destroy(topology); + } + } + + // hwloc_topology_export_xml(topology, "/tmp/hwloc_topology.xml", HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1); + + + hwloc_obj_t obj1 = NULL; + // while ((obj1 = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_PCI_DEVICE, obj1)) != NULL) { + // // print out all attributes + // if (obj1->name != NULL && strncmp(obj1->name, "ens1f0", MIN(strlen(obj1->name), strlen("ens1f0"))) == 0) { + // SCLogNotice("Found PCI device: %s\n", obj1->name); + // SCLogNotice("Domain: %d\n", obj1->attr->pcidev.domain); + // SCLogNotice("Bus: %d\n", obj1->attr->pcidev.bus); + // SCLogNotice("Dev: %d\n", obj1->attr->pcidev.dev); + // } + // // infos = hwloc_obj_get_info_by_name(obj1, "PCIBusID"); + // } + + obj1 = get_hwloc_object_from_pcie_address(topology, "0000:3b:00.0"); + print_hwloc_object(obj1); + SCLogNotice("PCI device not found, went over all devices"); + + obj1 = find_pcie_address(topology, "ens1f0"); + if (obj1 != NULL) { + static char pcie_address[32]; + snprintf(pcie_address, sizeof(pcie_address), "%04x:%02x:%02x.%x", obj1->attr->pcidev.domain, obj1->attr->pcidev.bus, obj1->attr->pcidev.dev, obj1->attr->pcidev.func); + SCLogNotice("PCIe addr of ens1f0 is %s with NUMA id %d or %p", pcie_address, DeviceGetNumaID(topology, obj1), find_numa_node(topology, obj1)); + } + + + + // if (topology != NULL) { + // int numa = get_numa_node_for_net_device(topology, "ens1f0"); + // FatalError("NUMA node for ens1f0: %d\n", numa); + // } + // hwloc_topology_destroy(topology); + uint16_t ncpu = 0; #if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun int iter = 0; SCMutexLock(&taf->taf_mutex); ncpu = taf->lcpu; - - // Check for CPUs within the preferred NUMA node first - while (!CPU_ISSET(ncpu, &taf->cpu_set) || hwloc_get_obj_by_os_index(topology, HWLOC_OBJ_PU, ncpu)->nodeset->first != numa_node) { + while (!CPU_ISSET(ncpu, &taf->cpu_set) && iter < 2) { ncpu++; if (ncpu >= UtilCpuGetNumProcessorsOnline()) { ncpu = 0; iter++; } - if (iter >= 2) { - break; - } } - if (iter == 2) { - // Fallback to any available CPU if no CPU found within the preferred NUMA node - ncpu = taf->lcpu; - while (!CPU_ISSET(ncpu, &taf->cpu_set) && iter < 2) { - ncpu++; - if (ncpu >= UtilCpuGetNumProcessorsOnline()) { - ncpu = 0; - iter++; - } - } - if (iter == 2) { - SCLogError("cpu_set does not contain " - "available cpus, cpu affinity conf is invalid"); - } + SCLogError("cpu_set does not contain " + "available cpus, cpu affinity conf is invalid"); } - taf->lcpu = ncpu + 1; if (taf->lcpu >= UtilCpuGetNumProcessorsOnline()) taf->lcpu = 0; @@ -407,6 +485,51 @@ uint16_t AffinityGetNextCPUFromNUMANode(ThreadsAffinityType *taf, int numa_node) return ncpu; } + +// uint16_t AffinityGetNextCPUFromNUMANode(ThreadsAffinityType *taf, int numa_node) { +// uint16_t ncpu = 0; +// #if !defined __CYGWIN__ && !defined OS_WIN32 && !defined __OpenBSD__ && !defined sun +// int iter = 0; +// SCMutexLock(&taf->taf_mutex); +// ncpu = taf->lcpu; + +// // Check for CPUs within the preferred NUMA node first +// while (!CPU_ISSET(ncpu, &taf->cpu_set) || hwloc_get_obj_by_os_index(topology, HWLOC_OBJ_PU, ncpu)->nodeset->first != numa_node) { +// ncpu++; +// if (ncpu >= UtilCpuGetNumProcessorsOnline()) { +// ncpu = 0; +// iter++; +// } +// if (iter >= 2) { +// break; +// } +// } + +// if (iter == 2) { +// // Fallback to any available CPU if no CPU found within the preferred NUMA node +// ncpu = taf->lcpu; +// while (!CPU_ISSET(ncpu, &taf->cpu_set) && iter < 2) { +// ncpu++; +// if (ncpu >= UtilCpuGetNumProcessorsOnline()) { +// ncpu = 0; +// iter++; +// } +// } +// if (iter == 2) { +// SCLogError("cpu_set does not contain " +// "available cpus, cpu affinity conf is invalid"); +// } +// } + +// taf->lcpu = ncpu + 1; +// if (taf->lcpu >= UtilCpuGetNumProcessorsOnline()) +// taf->lcpu = 0; +// SCMutexUnlock(&taf->taf_mutex); +// SCLogDebug("Setting affinity on CPU %d", ncpu); +// #endif /* OS_WIN32 and __OpenBSD__ */ +// return ncpu; +// } + /** * \brief Return the total number of CPUs in a given affinity * \retval the number of affined CPUs From f157b600c24d17b2858da864b8b5823ecbf2589b Mon Sep 17 00:00:00 2001 From: Lukas Sismis Date: Wed, 17 Jul 2024 23:15:36 +0200 Subject: [PATCH 3/5] fixup! fixup! wip: affiinity --- src/util-affinity.c | 87 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 75 insertions(+), 12 deletions(-) diff --git a/src/util-affinity.c b/src/util-affinity.c index 6bc0c6fae8a1..107a06c1567b 100644 --- a/src/util-affinity.c +++ b/src/util-affinity.c @@ -278,19 +278,36 @@ void AffinitySetupLoadFromConfig(void) static hwloc_topology_t topology = NULL; +// int DeviceGetNumaID(hwloc_topology_t topology, hwloc_obj_t obj) { +// hwloc_obj_t numa_node = NULL; +// while ((numa_node = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, numa_node)) != NULL) { +// SCLogNotice("another numa"); +// } + +// hwloc_obj_t parent = obj->parent; +// while (parent) { +// printf("Object type: %s\n", hwloc_obj_type_string(parent->type)); +// if (parent->type == HWLOC_OBJ_NUMANODE) { +// return parent->logical_index; +// } +// parent = parent->parent; +// } + +// return -1; +// } int DeviceGetNumaID(hwloc_topology_t topology, hwloc_obj_t obj) { - hwloc_obj_t numa_node = NULL; - while ((numa_node = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, numa_node)) != NULL) { - SCLogNotice("another numa"); + hwloc_obj_t non_io_ancestor = hwloc_get_non_io_ancestor_obj(topology, obj); + if (non_io_ancestor == NULL) { + fprintf(stderr, "Failed to find non-IO ancestor object.\n"); + return -1; } - hwloc_obj_t parent = obj->parent; - while (parent) { - printf("Object type: %s\n", hwloc_obj_type_string(parent->type)); - if (parent->type == HWLOC_OBJ_NUMANODE) { - return parent->logical_index; + // Iterate over NUMA nodes and check their nodeset + hwloc_obj_t numa_node = NULL; + while ((numa_node = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, numa_node)) != NULL) { + if (hwloc_bitmap_isset(non_io_ancestor->nodeset, numa_node->os_index)) { + return numa_node->logical_index; } - parent = parent->parent; } return -1; @@ -306,7 +323,7 @@ static hwloc_obj_t find_numa_node(hwloc_topology_t topology, hwloc_obj_t obj) { hwloc_obj_t parent = obj->parent; while (parent) { printf("Object type: %s\n", hwloc_obj_type_string(parent->type)); - if (parent->type == HWLOC_PACKAGE || parent->type == HWLOC_NUMANODE) { + if (parent->type == HWLOC_OBJ_PACKAGE || parent->type == HWLOC_OBJ_NUMANODE) { break; } parent = parent->parent; @@ -441,17 +458,63 @@ uint16_t AffinityGetNextCPU(ThreadsAffinityType *taf) // // infos = hwloc_obj_get_info_by_name(obj1, "PCIBusID"); // } - obj1 = get_hwloc_object_from_pcie_address(topology, "0000:3b:00.0"); + obj1 = get_hwloc_object_from_pcie_address(topology, "0000:17:00.0"); print_hwloc_object(obj1); SCLogNotice("PCI device not found, went over all devices"); - obj1 = find_pcie_address(topology, "ens1f0"); + obj1 = find_pcie_address(topology, "ens1f1"); if (obj1 != NULL) { static char pcie_address[32]; snprintf(pcie_address, sizeof(pcie_address), "%04x:%02x:%02x.%x", obj1->attr->pcidev.domain, obj1->attr->pcidev.bus, obj1->attr->pcidev.dev, obj1->attr->pcidev.func); SCLogNotice("PCIe addr of ens1f0 is %s with NUMA id %d or %p", pcie_address, DeviceGetNumaID(topology, obj1), find_numa_node(topology, obj1)); } + // hwloc_obj_t ancestor = hwloc_get_non_io_ancestor_obj(topology, obj1); + // if (ancestor && ancestor->type == HWLOC_OBJ_NUMANODE) { + // printf("NUMA node: %d\n", ancestor->logical_index); + // } else { + // // Traverse further up to ensure finding the NUMA node + // while (ancestor && ancestor->type != HWLOC_OBJ_NUMANODE) { + // ancestor = ancestor->parent; + // } + // if (ancestor && ancestor->type == HWLOC_OBJ_NUMANODE) { + // printf("NUMA node: %d\n", ancestor->logical_index); + // } else { + // printf("No NUMA node found for the given PCI device.\n"); + // } } + + hwloc_obj_t ancestor = obj1; + while (ancestor) { + if (ancestor->type == HWLOC_OBJ_NUMANODE) { + printf("NUMA node: %d\n", ancestor->logical_index); + break; + } + ancestor = ancestor->parent; + } + + // Step 6: Handle the case where no NUMA node is found + if (!ancestor || ancestor->type != HWLOC_OBJ_NUMANODE) { + printf("No NUMA node found for the given PCI device.\n"); + } + + int core_id = 3; // Example core ID + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); + hwloc_obj_t numa_node = NULL; + + while ((numa_node = hwloc_get_next_obj_by_depth(topology, depth, numa_node)) != NULL) { + hwloc_cpuset_t cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_copy(cpuset, numa_node->cpuset); + + if (hwloc_bitmap_isset(cpuset, core_id)) { + printf("Core %d belongs to NUMA node %d\n", core_id, numa_node->logical_index); + hwloc_bitmap_free(cpuset); + break; + } + hwloc_bitmap_free(cpuset); + } + + FatalError("ok enough"); + // if (topology != NULL) { From b81138661ebf605e0b4ef467993053dc22fcb6b1 Mon Sep 17 00:00:00 2001 From: Lukas Sismis Date: Wed, 17 Jul 2024 23:57:04 +0200 Subject: [PATCH 4/5] fixup! fixup! fixup! wip: affiinity --- src/util-affinity.c | 71 ++++++++++++++++----------------------------- 1 file changed, 25 insertions(+), 46 deletions(-) diff --git a/src/util-affinity.c b/src/util-affinity.c index 107a06c1567b..65e65f80c948 100644 --- a/src/util-affinity.c +++ b/src/util-affinity.c @@ -313,6 +313,27 @@ int DeviceGetNumaID(hwloc_topology_t topology, hwloc_obj_t obj) { return -1; } +void get_numa_nodes_from_pcie(hwloc_topology_t topology, hwloc_obj_t pcie_obj) { + hwloc_obj_t nodes[16]; // Assuming a maximum of 16 NUMA nodes + unsigned num_nodes = 16; + struct hwloc_location location; + + location.type = HWLOC_LOCATION_TYPE_OBJECT; + location.location.object = pcie_obj; + + int result = hwloc_get_local_numanode_objs(topology, &location, &num_nodes, nodes, 0); + if (result == 0 && num_nodes > 0) { + printf("NUMA nodes for PCIe device:\n"); + for (unsigned i = 0; i < num_nodes; i++) { + printf("NUMA node %d\n", nodes[i]->logical_index); + } + } else { + printf("No NUMA node found for PCIe device.\n"); + } +} + + + // Static function to find the NUMA node of a given hwloc object static hwloc_obj_t find_numa_node(hwloc_topology_t topology, hwloc_obj_t obj) { if (!obj) { @@ -443,24 +464,8 @@ uint16_t AffinityGetNextCPU(ThreadsAffinityType *taf) } } - // hwloc_topology_export_xml(topology, "/tmp/hwloc_topology.xml", HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1); - - - hwloc_obj_t obj1 = NULL; - // while ((obj1 = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_PCI_DEVICE, obj1)) != NULL) { - // // print out all attributes - // if (obj1->name != NULL && strncmp(obj1->name, "ens1f0", MIN(strlen(obj1->name), strlen("ens1f0"))) == 0) { - // SCLogNotice("Found PCI device: %s\n", obj1->name); - // SCLogNotice("Domain: %d\n", obj1->attr->pcidev.domain); - // SCLogNotice("Bus: %d\n", obj1->attr->pcidev.bus); - // SCLogNotice("Dev: %d\n", obj1->attr->pcidev.dev); - // } - // // infos = hwloc_obj_get_info_by_name(obj1, "PCIBusID"); - // } - - obj1 = get_hwloc_object_from_pcie_address(topology, "0000:17:00.0"); - print_hwloc_object(obj1); - SCLogNotice("PCI device not found, went over all devices"); + hwloc_obj_t obj1 = get_hwloc_object_from_pcie_address(topology, "0000:17:00.0"); + // print_hwloc_object(obj1); obj1 = find_pcie_address(topology, "ens1f1"); if (obj1 != NULL) { @@ -468,35 +473,9 @@ uint16_t AffinityGetNextCPU(ThreadsAffinityType *taf) snprintf(pcie_address, sizeof(pcie_address), "%04x:%02x:%02x.%x", obj1->attr->pcidev.domain, obj1->attr->pcidev.bus, obj1->attr->pcidev.dev, obj1->attr->pcidev.func); SCLogNotice("PCIe addr of ens1f0 is %s with NUMA id %d or %p", pcie_address, DeviceGetNumaID(topology, obj1), find_numa_node(topology, obj1)); } - - // hwloc_obj_t ancestor = hwloc_get_non_io_ancestor_obj(topology, obj1); - // if (ancestor && ancestor->type == HWLOC_OBJ_NUMANODE) { - // printf("NUMA node: %d\n", ancestor->logical_index); - // } else { - // // Traverse further up to ensure finding the NUMA node - // while (ancestor && ancestor->type != HWLOC_OBJ_NUMANODE) { - // ancestor = ancestor->parent; - // } - // if (ancestor && ancestor->type == HWLOC_OBJ_NUMANODE) { - // printf("NUMA node: %d\n", ancestor->logical_index); - // } else { - // printf("No NUMA node found for the given PCI device.\n"); - // } } - - hwloc_obj_t ancestor = obj1; - while (ancestor) { - if (ancestor->type == HWLOC_OBJ_NUMANODE) { - printf("NUMA node: %d\n", ancestor->logical_index); - break; - } - ancestor = ancestor->parent; - } - - // Step 6: Handle the case where no NUMA node is found - if (!ancestor || ancestor->type != HWLOC_OBJ_NUMANODE) { - printf("No NUMA node found for the given PCI device.\n"); - } + get_numa_nodes_from_pcie(topology, obj1); + int core_id = 3; // Example core ID int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); hwloc_obj_t numa_node = NULL; From 4ce79aea1690173ac9b30b2e4ef4dabb5aea83f4 Mon Sep 17 00:00:00 2001 From: Lukas Sismis Date: Thu, 18 Jul 2024 10:32:06 +0200 Subject: [PATCH 5/5] fixup! fixup! fixup! fixup! wip: affiinity --- src/threadvars.h | 2 + src/tm-threads.c | 2 +- src/util-affinity.c | 157 ++++++++++++++++++++++++++------------------ src/util-affinity.h | 3 +- src/util-runmodes.c | 7 ++ 5 files changed, 106 insertions(+), 65 deletions(-) diff --git a/src/threadvars.h b/src/threadvars.h index cebcdb4e3ac1..d56a7a4f9f25 100644 --- a/src/threadvars.h +++ b/src/threadvars.h @@ -135,6 +135,8 @@ typedef struct ThreadVars_ { struct FlowQueue_ *flow_queue; bool break_loop; + char *iface_name; // set if the TV is TVT_PPT + } ThreadVars; /** Thread setup flags: */ diff --git a/src/tm-threads.c b/src/tm-threads.c index 557d994140c7..17c1f41c0a47 100644 --- a/src/tm-threads.c +++ b/src/tm-threads.c @@ -860,7 +860,7 @@ TmEcode TmThreadSetupOptions(ThreadVars *tv) if (tv->thread_setup_flags & THREAD_SET_AFFTYPE) { ThreadsAffinityType *taf = &thread_affinity[tv->cpu_affinity]; if (taf->mode_flag == EXCLUSIVE_AFFINITY) { - uint16_t cpu = AffinityGetNextCPU(taf); + uint16_t cpu = AffinityGetNextCPU(tv, taf); SetCPUAffinity(cpu); /* If CPU is in a set overwrite the default thread prio */ if (CPU_ISSET(cpu, &taf->lowprio_cpu)) { diff --git a/src/util-affinity.c b/src/util-affinity.c index 65e65f80c948..6a97dc03019c 100644 --- a/src/util-affinity.c +++ b/src/util-affinity.c @@ -278,24 +278,7 @@ void AffinitySetupLoadFromConfig(void) static hwloc_topology_t topology = NULL; -// int DeviceGetNumaID(hwloc_topology_t topology, hwloc_obj_t obj) { -// hwloc_obj_t numa_node = NULL; -// while ((numa_node = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, numa_node)) != NULL) { -// SCLogNotice("another numa"); -// } - -// hwloc_obj_t parent = obj->parent; -// while (parent) { -// printf("Object type: %s\n", hwloc_obj_type_string(parent->type)); -// if (parent->type == HWLOC_OBJ_NUMANODE) { -// return parent->logical_index; -// } -// parent = parent->parent; -// } - -// return -1; -// } -int DeviceGetNumaID(hwloc_topology_t topology, hwloc_obj_t obj) { +int HwLocDeviceNumaGet(hwloc_topology_t topology, hwloc_obj_t obj) { hwloc_obj_t non_io_ancestor = hwloc_get_non_io_ancestor_obj(topology, obj); if (non_io_ancestor == NULL) { fprintf(stderr, "Failed to find non-IO ancestor object.\n"); @@ -313,6 +296,7 @@ int DeviceGetNumaID(hwloc_topology_t topology, hwloc_obj_t obj) { return -1; } +// can only be used from hwloc version 2.5 and up void get_numa_nodes_from_pcie(hwloc_topology_t topology, hwloc_obj_t pcie_obj) { hwloc_obj_t nodes[16]; // Assuming a maximum of 16 NUMA nodes unsigned num_nodes = 16; @@ -332,8 +316,6 @@ void get_numa_nodes_from_pcie(hwloc_topology_t topology, hwloc_obj_t pcie_obj) { } } - - // Static function to find the NUMA node of a given hwloc object static hwloc_obj_t find_numa_node(hwloc_topology_t topology, hwloc_obj_t obj) { if (!obj) { @@ -366,7 +348,7 @@ static hwloc_obj_t find_numa_node(hwloc_topology_t topology, hwloc_obj_t obj) { return NULL; } -hwloc_obj_t find_pcie_address(hwloc_topology_t topology, const char *interface_name) { +static hwloc_obj_t HwLocDeviceGetByKernelName(hwloc_topology_t topology, const char *interface_name) { hwloc_obj_t obj = NULL; while ((obj = hwloc_get_next_osdev(topology, obj)) != NULL) { @@ -397,7 +379,7 @@ static void deparse_pcie_address(const char *pcie_address, unsigned int *domain, } // Function to convert PCIe address to hwloc object -hwloc_obj_t get_hwloc_object_from_pcie_address(hwloc_topology_t topology, const char *pcie_address) { +static hwloc_obj_t HwLocDeviceGetByPcie(hwloc_topology_t topology, const char *pcie_address) { hwloc_obj_t obj = NULL; unsigned int domain, bus, device, function; deparse_pcie_address(pcie_address, &domain, &bus, &device, &function); @@ -437,46 +419,9 @@ void print_hwloc_object(hwloc_obj_t obj) { } } - -/** - * \brief Return next cpu to use for a given thread family - * \retval the cpu to used given by its id - */ -uint16_t AffinityGetNextCPU(ThreadsAffinityType *taf) +static bool CPUIsFromNuma(uint16_t ncpu, uint16_t numa) { - if (topology == NULL) { - if (hwloc_topology_init(&topology) == -1) { - FatalError("Failed to initialize topology"); - } - - // hwloc_topology_get_flags - - int ret = hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM); - ret = hwloc_topology_set_io_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_ALL); - if (ret == -1) { - FatalError("Failed to set topology flags"); - hwloc_topology_destroy(topology); - } - - if (hwloc_topology_load(topology) == -1) { - FatalError("Failed to load topology"); - hwloc_topology_destroy(topology); - } - } - - hwloc_obj_t obj1 = get_hwloc_object_from_pcie_address(topology, "0000:17:00.0"); - // print_hwloc_object(obj1); - - obj1 = find_pcie_address(topology, "ens1f1"); - if (obj1 != NULL) { - static char pcie_address[32]; - snprintf(pcie_address, sizeof(pcie_address), "%04x:%02x:%02x.%x", obj1->attr->pcidev.domain, obj1->attr->pcidev.bus, obj1->attr->pcidev.dev, obj1->attr->pcidev.func); - SCLogNotice("PCIe addr of ens1f0 is %s with NUMA id %d or %p", pcie_address, DeviceGetNumaID(topology, obj1), find_numa_node(topology, obj1)); - } - - get_numa_nodes_from_pcie(topology, obj1); - - int core_id = 3; // Example core ID + int core_id = ncpu; int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); hwloc_obj_t numa_node = NULL; @@ -492,10 +437,76 @@ uint16_t AffinityGetNextCPU(ThreadsAffinityType *taf) hwloc_bitmap_free(cpuset); } - FatalError("ok enough"); - + if (numa == numa_node->logical_index) + return true; + return false; +} + +/** + * \brief Return next cpu to use for a given thread family + * \retval the cpu to used given by its id + */ +uint16_t AffinityGetNextCPU(ThreadVars *tv, ThreadsAffinityType *taf) +{ + // todo: instead of adding iface to the threadvars + // add a preffered NUMA node - that can be filled out in prior and it is more universal + int iface_numa = -1; + + // threading.cpu-assignment: + // - legacy - assign as usual + // - auto - use hwloc to determine NUMA locality of the NIC and try to assign a core from this NUMA node. + // If it fails then use the other NUMA node. + // Using this approach e.g. on bonded devices/aliased and any other will not work + // Warn/Notify a user when device's NUMA node cannot be determined. + // Mention in the docs that NUMA locatity supports PCIe addresses and Kernel interfaces + // - manual - in workers CPU set either: + // - Specify in one line ([ "eth0@1,2,3,4,7-9", "eth1@10,11" ]) + // - Specify threading in a list: + // - worker-cpu-set: + // - interface: eth0 + // cpu: [ 1,2,3,4 ] + // mode: "exclusive" + // prio: + // high: [ 3 ] + // default: "medium" + + if (tv->type == TVT_PPT && tv->iface_name) { + if (topology == NULL) { + if (hwloc_topology_init(&topology) == -1) { + FatalError("Failed to initialize topology"); + } + int ret = hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM); + ret = hwloc_topology_set_io_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_ALL); + if (ret == -1) { + FatalError("Failed to set topology flags"); + hwloc_topology_destroy(topology); + } + if (hwloc_topology_load(topology) == -1) { + FatalError("Failed to load topology"); + hwloc_topology_destroy(topology); + } + } + + // try kernel inteface first + hwloc_obj_t obj1 = HwLocDeviceGetByKernelName(topology, tv->iface_name); + if (obj1 == NULL) { + // if unsuccessful try PCIe search + obj1 = HwLocDeviceGetByPcie(topology, tv->iface_name); + } + + if (obj1 != NULL) { + static char pcie_address[32]; + snprintf(pcie_address, sizeof(pcie_address), "%04x:%02x:%02x.%x", obj1->attr->pcidev.domain, obj1->attr->pcidev.bus, obj1->attr->pcidev.dev, obj1->attr->pcidev.func); + SCLogNotice("PCIe addr of ens1f0 is %s with NUMA id %d or %p", pcie_address, HwLocDeviceNumaGet(topology, obj1), find_numa_node(topology, obj1)); + } + + iface_numa = HwLocDeviceNumaGet(topology, obj1); + // can be combined with newer api in get_numa_nodes_from_pcie(topology, obj1); + + } + // if (topology != NULL) { // int numa = get_numa_node_for_net_device(topology, "ens1f0"); // FatalError("NUMA node for ens1f0: %d\n", numa); @@ -507,6 +518,26 @@ uint16_t AffinityGetNextCPU(ThreadsAffinityType *taf) int iter = 0; SCMutexLock(&taf->taf_mutex); ncpu = taf->lcpu; + + // not ideal cuz if you have one if and threads 1,2,3,4 + // then 1,3 are double assigned + + // probably divide configured CPU sets into NUMA nodes and operate on that independently + // e.g. for NICs on NUMA 1 primarily use corres from NUMA 1, + // when exhausted start using cores from NUMA 0. + // when exhausted use cores from other NUMAs(?) + // when exhausted reset counters on NUMAs and use the cores again + + if (iface_numa != -1) { + while ((!CPU_ISSET(ncpu, &taf->cpu_set) || !CPUIsFromNuma(ncpu, iface_numa))) { + ncpu++; + if (ncpu >= UtilCpuGetNumProcessorsOnline()) { + ncpu = 0; + break; + } + } + } + while (!CPU_ISSET(ncpu, &taf->cpu_set) && iter < 2) { ncpu++; if (ncpu >= UtilCpuGetNumProcessorsOnline()) { diff --git a/src/util-affinity.h b/src/util-affinity.h index 9ea75fa3713f..15bb56206ada 100644 --- a/src/util-affinity.h +++ b/src/util-affinity.h @@ -26,6 +26,7 @@ #include "suricata-common.h" #include "conf.h" #include "threads.h" +#include "threadvars.h" #include @@ -88,7 +89,7 @@ extern ThreadsAffinityType thread_affinity[MAX_CPU_SET]; void AffinitySetupLoadFromConfig(void); ThreadsAffinityType * GetAffinityTypeFromName(const char *name); -uint16_t AffinityGetNextCPU(ThreadsAffinityType *taf); +uint16_t AffinityGetNextCPU(ThreadVars *tv, ThreadsAffinityType *taf); uint16_t UtilAffinityGetAffinedCPUNum(ThreadsAffinityType *taf); #ifdef HAVE_DPDK uint16_t UtilAffinityCpusOverlap(ThreadsAffinityType *taf1, ThreadsAffinityType *taf2); diff --git a/src/util-runmodes.c b/src/util-runmodes.c index f78e857abfc6..a6b4231e33dc 100644 --- a/src/util-runmodes.c +++ b/src/util-runmodes.c @@ -310,6 +310,13 @@ static int RunModeSetLiveCaptureWorkersForDevice(ConfigIfaceThreadsCountFunc Mod TmThreadSetCPU(tv, WORKER_CPU_SET); + if (tv->type == TVT_PPT) { + tv->iface_name = strdup(live_dev); + SCLogNotice("Duplicated livedev %s to %s", live_dev, tv->iface_name); + } else { + tv->iface_name = NULL; + } + if (TmThreadSpawn(tv) != TM_ECODE_OK) { FatalError("TmThreadSpawn failed"); }