From 738087639e1a8577532efff07e37f0f00a5e9a71 Mon Sep 17 00:00:00 2001 From: Corey Goodfred <53098963+cgoodfred@users.noreply.github.com> Date: Thu, 6 Oct 2022 13:06:07 -0400 Subject: [PATCH] [COST-3007] add node role queries to get nodes and fill in node role on topology (#3912) * add node role queries to get nodes and fill in node role * tweak to where condition and yaml updates for local dev * filter node labels table to reduce amount of data joined --- dev/scripts/nise_ymls/ocp/ocp_on_premise.yml | 10 +++++ .../nise_ymls/ocp_on_aws/ocp_static_data.yml | 10 +++++ .../ocp_on_azure/ocp_static_data.yml | 11 ++++- .../nise_ymls/ocp_on_gcp/ocp_static_data.yml | 10 +++++ koku/masu/database/ocp_report_db_accessor.py | 42 +++++++++++++++---- .../database/test_ocp_report_db_accessor.py | 42 ++++++++++++++++++- 6 files changed, 113 insertions(+), 12 deletions(-) diff --git a/dev/scripts/nise_ymls/ocp/ocp_on_premise.yml b/dev/scripts/nise_ymls/ocp/ocp_on_premise.yml index b0b45913c7..2819f8f255 100644 --- a/dev/scripts/nise_ymls/ocp/ocp_on_premise.yml +++ b/dev/scripts/nise_ymls/ocp/ocp_on_premise.yml @@ -104,6 +104,7 @@ generators: capacity_gig: 20 - node: node_name: compute_3 + node_labels: label_nodeclass:compute|label_node_role_kubernetes_io:infra cpu_cores: 4 memory_gig: 16 namespaces: @@ -321,6 +322,15 @@ generators: cpu_limit: 1 mem_limit_gig: 4 pod_seconds: 3600 + openshift-kube-apiserver: + pods: + - pod: + pod_name: pod_apiserver + cpu_request: 1 + mem_request_gig: 2 + cpu_limit: 1 + mem_limit_gig: 4 + pod_seconds: 3600 - node: node_name: master_2 cpu_cores: 4 diff --git a/dev/scripts/nise_ymls/ocp_on_aws/ocp_static_data.yml b/dev/scripts/nise_ymls/ocp_on_aws/ocp_static_data.yml index 07a8f8d53e..bbf3afce50 100644 --- a/dev/scripts/nise_ymls/ocp_on_aws/ocp_static_data.yml +++ b/dev/scripts/nise_ymls/ocp_on_aws/ocp_static_data.yml @@ -110,6 +110,7 @@ generators: capacity_gig: 20 - node: node_name: aws_compute3 + node_labels: label_nodeclass:compute|label_node_role_kubernetes_io:infra cpu_cores: 4 memory_gig: 16 resource_id: 55555557 @@ -171,3 +172,12 @@ generators: mem_limit_gig: 4 pod_seconds: 3600 labels: label_environment:dev|label_app:master|label_version:master + openshift-kube-apiserver: + pods: + - pod: + pod_name: pod_apiserver + cpu_request: 1 + mem_request_gig: 2 + cpu_limit: 1 + mem_limit_gig: 4 + pod_seconds: 3600 diff --git a/dev/scripts/nise_ymls/ocp_on_azure/ocp_static_data.yml b/dev/scripts/nise_ymls/ocp_on_azure/ocp_static_data.yml index f354d92fdc..27e96d6a27 100644 --- a/dev/scripts/nise_ymls/ocp_on_azure/ocp_static_data.yml +++ b/dev/scripts/nise_ymls/ocp_on_azure/ocp_static_data.yml @@ -121,7 +121,7 @@ generators: capacity_gig: 20 - node: node_name: azure_compute3 - node_labels: label_nodeclass:compute + node_labels: label_nodeclass:compute|label_node_role_kubernetes_io:infra cpu_cores: 4 memory_gig: 16 resource_id: 99999997 @@ -210,3 +210,12 @@ generators: mem_limit_gig: 4 pod_seconds: 3600 labels: label_environment:Jupiter|label_app:Sombrero|label_version:Sombrero|label_qa:approved + openshift-kube-apiserver: + pods: + - pod: + pod_name: pod_apiserver + cpu_request: 1 + mem_request_gig: 2 + cpu_limit: 1 + mem_limit_gig: 4 + pod_seconds: 3600 diff --git a/dev/scripts/nise_ymls/ocp_on_gcp/ocp_static_data.yml b/dev/scripts/nise_ymls/ocp_on_gcp/ocp_static_data.yml index 03d8206a4a..59c16973bb 100644 --- a/dev/scripts/nise_ymls/ocp_on_gcp/ocp_static_data.yml +++ b/dev/scripts/nise_ymls/ocp_on_gcp/ocp_static_data.yml @@ -108,6 +108,7 @@ generators: capacity_gig: 20 - node: node_name: gcp_compute3 + node_labels: label_nodeclass:compute|label_node_role_kubernetes_io:infra cpu_cores: 4 memory_gig: 16 namespaces: @@ -167,3 +168,12 @@ generators: mem_limit_gig: 4 pod_seconds: 3600 labels: label_environment:ruby|label_app:summer|label_version:yellow + openshift-kube-apiserver: + pods: + - pod: + pod_name: pod_apiserver + cpu_request: 1 + mem_request_gig: 2 + cpu_limit: 1 + mem_limit_gig: 4 + pod_seconds: 3600 diff --git a/koku/masu/database/ocp_report_db_accessor.py b/koku/masu/database/ocp_report_db_accessor.py index 07021b4418..cc54b5f26c 100644 --- a/koku/masu/database/ocp_report_db_accessor.py +++ b/koku/masu/database/ocp_report_db_accessor.py @@ -2298,13 +2298,25 @@ def populate_cluster_table(self, provider, cluster_id, cluster_alias): return cluster def populate_node_table(self, cluster, nodes): - """Get or create an entry in the OCP cluster table.""" + """Get or create an entry in the OCP node table.""" LOG.info("Populating reporting_ocp_nodes table.") with schema_context(self.schema): for node in nodes: - OCPNode.objects.get_or_create( + tmp_node = OCPNode.objects.filter( node=node[0], resource_id=node[1], node_capacity_cpu_cores=node[2], cluster=cluster - ) + ).first() + if not tmp_node: + OCPNode.objects.create( + node=node[0], + resource_id=node[1], + node_capacity_cpu_cores=node[2], + node_role=node[3], + cluster=cluster, + ) + # if the node entry already exists but does not have a role assigned, update the node role + elif not tmp_node.node_role: + tmp_node.node_role = node[3] + tmp_node.save() def populate_pvc_table(self, cluster, pvcs): """Get or create an entry in the OCP cluster table.""" @@ -2323,18 +2335,30 @@ def populate_project_table(self, cluster, projects): def get_nodes_presto(self, source_uuid, start_date, end_date): """Get the nodes from an OpenShift cluster.""" sql = f""" - SELECT node, - resource_id, - max(node_capacity_cpu_cores) as node_capacity_cpu_cores + SELECT ocp.node, + ocp.resource_id, + max(ocp.node_capacity_cpu_cores) as node_capacity_cpu_cores, + CASE + WHEN contains(array_agg(DISTINCT ocp.namespace), 'openshift-kube-apiserver') THEN 'master' + WHEN any_match(array_agg(DISTINCT nl.node_labels), element -> element like '%"node_role_kubernetes_io": "infra"%') THEN 'infra' + ELSE 'worker' + END as node_role FROM hive.{self.schema}.openshift_pod_usage_line_items_daily as ocp + LEFT JOIN hive.{self.schema}.openshift_node_labels_line_items_daily as nl + ON ocp.node = nl.node WHERE ocp.source = '{source_uuid}' AND ocp.year = '{start_date.strftime("%Y")}' AND ocp.month = '{start_date.strftime("%m")}' AND ocp.interval_start >= TIMESTAMP '{start_date}' AND ocp.interval_start < date_add('day', 1, TIMESTAMP '{end_date}') - GROUP BY node, - resource_id - """ + AND nl.source = '{source_uuid}' + AND nl.year = '{start_date.strftime("%Y")}' + AND nl.month = '{start_date.strftime("%m")}' + AND nl.interval_start >= TIMESTAMP '{start_date}' + AND nl.interval_start < date_add('day', 1, TIMESTAMP '{end_date}') + GROUP BY ocp.node, + ocp.resource_id + """ # noqa: E501 nodes = self._execute_presto_raw_sql_query(self.schema, sql, log_ref="get_nodes_presto") diff --git a/koku/masu/test/database/test_ocp_report_db_accessor.py b/koku/masu/test/database/test_ocp_report_db_accessor.py index 3611a7ed9a..0578b50d1c 100644 --- a/koku/masu/test/database/test_ocp_report_db_accessor.py +++ b/koku/masu/test/database/test_ocp_report_db_accessor.py @@ -2721,7 +2721,8 @@ def test_populate_openshift_cluster_information_tables(self, mock_get_nodes, moc volumes = ["vol_1", "vol_2"] pvcs = ["pvc_1", "pvc_2"] projects = ["project_1", "project_2"] - mock_get_nodes.return_value = zip(nodes, resource_ids, capacity) + roles = ["master", "worker"] + mock_get_nodes.return_value = zip(nodes, resource_ids, capacity, roles) mock_get_pvcs.return_value = zip(volumes, pvcs) mock_get_projects.return_value = projects cluster_id = uuid.uuid4() @@ -2743,6 +2744,7 @@ def test_populate_openshift_cluster_information_tables(self, mock_get_nodes, moc self.assertIsNotNone(db_node.resource_id) self.assertIsNotNone(db_node.node_capacity_cpu_cores) self.assertIsNotNone(db_node.cluster_id) + self.assertIsNotNone(db_node.node_role) for pvc in pvcs: self.assertIsNotNone(OCPPVC.objects.filter(persistent_volume_claim=pvc).first()) for project in projects: @@ -2759,7 +2761,8 @@ def test_get_openshift_topology_for_provider(self, mock_get_nodes, mock_get_pvcs volumes = ["vol_1", "vol_2"] pvcs = ["pvc_1", "pvc_2"] projects = ["project_1", "project_2"] - mock_get_nodes.return_value = zip(nodes, resource_ids, capacity) + roles = ["master", "worker"] + mock_get_nodes.return_value = zip(nodes, resource_ids, capacity, roles) mock_get_pvcs.return_value = zip(volumes, pvcs) mock_get_projects.return_value = projects cluster_id = str(uuid.uuid4()) @@ -2791,6 +2794,41 @@ def test_get_openshift_topology_for_provider(self, mock_get_nodes, mock_get_pvcs for project in projects: self.assertIn(project.project, topology.get("projects")) + def test_populate_node_table_update_role(self): + """Test that populating the node table for an entry that previously existed fills the node role correctly.""" + node_info = ["node_role_test_node", "node_role_test_id", 1, "worker"] + cluster_id = str(uuid.uuid4()) + cluster_alias = "node_role_test" + cluster = self.accessor.populate_cluster_table(self.aws_provider, cluster_id, cluster_alias) + with schema_context(self.schema): + node = OCPNode.objects.create( + node=node_info[0], resource_id=node_info[1], node_capacity_cpu_cores=node_info[2], cluster=cluster + ) + self.assertIsNone(node.node_role) + self.accessor.populate_node_table(cluster, [node_info]) + node = OCPNode.objects.get( + node=node_info[0], resource_id=node_info[1], node_capacity_cpu_cores=node_info[2], cluster=cluster + ) + self.assertEqual(node.node_role, node_info[3]) + + def test_populate_node_table_second_time_no_change(self): + """Test that populating the node table for an entry a second time does not duplicate entries.""" + node_info = ["node_role_test_node", "node_role_test_id", 1, "worker"] + cluster_id = str(uuid.uuid4()) + cluster_alias = "node_role_test" + cluster = self.accessor.populate_cluster_table(self.aws_provider, cluster_id, cluster_alias) + with schema_context(self.schema): + self.accessor.populate_node_table(cluster, [node_info]) + node_count = OCPNode.objects.filter( + node=node_info[0], resource_id=node_info[1], node_capacity_cpu_cores=node_info[2], cluster=cluster + ).count() + self.assertEqual(node_count, 1) + self.accessor.populate_node_table(cluster, [node_info]) + node_count = OCPNode.objects.filter( + node=node_info[0], resource_id=node_info[1], node_capacity_cpu_cores=node_info[2], cluster=cluster + ).count() + self.assertEqual(node_count, 1) + def test_delete_infrastructure_raw_cost_from_daily_summary(self): """Test that infra raw cost is deleted.""" dh = DateHelper()