Skip to content

Commit

Permalink
fixing parquet serialization
Browse files Browse the repository at this point in the history
  • Loading branch information
alexei-led committed May 14, 2023
1 parent dacc27f commit 221cfc9
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 55 deletions.
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ Create Amazon Glue schema for storing events:
--data-format 'AVRO' \
--compatibility 'BACKWARD' \
--schema-definition 'file://./schema/schema.json'

```

Create Amazon Glue table for storing events:
Expand Down Expand Up @@ -145,12 +144,15 @@ cat <<EOF > schema/iam-role-policy.json
"Action": [
"glue:GetTable",
"glue:GetTableVersion",
"glue:GetTableVersions"
"glue:GetTableVersions",
"glue:GetSchema",
"glue:GetSchemaVersion"
],
"Resource": [
"arn:aws:glue:$AWS_REGION:$AWS_ACCOUNT:database/eks-lens",
"arn:aws:glue:$AWS_REGION:$AWS_ACCOUNT:catalog",
"arn:aws:glue:$AWS_REGION:$AWS_ACCOUNT:table/eks-lens/events"
"arn:aws:glue:$AWS_REGION:$AWS_ACCOUNT:table/eks-lens/events",
"arn:aws:glue:$AWS_REGION:$AWS_ACCOUNT:schema/default-registry/eks-lens"
]
},
{
Expand Down
31 changes: 27 additions & 4 deletions internal/usage/record.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package usage

import (
"fmt"
"strings"
"time"

Expand Down Expand Up @@ -90,16 +91,38 @@ func NodeInfoFromNode(cluster string, node *v1.Node) NodeInfo {
id = id[strings.LastIndex(id, "/")+1:]
}

// get nodegroup from node label, fargate nodegroup is empty
nodegroup := node.GetLabels()["eks.amazonaws.com/nodegroup"]
if nodegroup == "" {
nodegroup = "fargate"
}

// get region from node label
region := node.GetLabels()["topology.kubernetes.io/region"]

// get zone from node label
zone := node.GetLabels()["topology.kubernetes.io/zone"]

// get instance type from node label
instanceType := node.GetLabels()["beta.kubernetes.io/instance-type"]
if instanceType == "" {
instanceType = node.GetLabels()["node.kubernetes.io/instance-type"]
// if empty, assume fargate and build instance type based on pattern "fargate-vCPU-memoryGB" where memory is rounded to GiB
if instanceType == "" {
instanceType = fmt.Sprintf("fargate-%dvCPU-%dGB", node.Status.Capacity.Cpu().Value(), node.Status.Capacity.Memory().ScaledValue(resource.Giga))
}
}

result := NodeInfo{
ID: id,
Name: node.GetName(),
Cluster: cluster,
Nodegroup: node.GetLabels()["eks.amazonaws.com/nodegroup"],
InstanceType: node.GetLabels()["node.kubernetes.io/instance-type"],
Nodegroup: nodegroup,
InstanceType: instanceType,
ComputeType: computeType,
CapacityType: capacityType,
Region: node.GetLabels()["topology.kubernetes.io/region"],
Zone: node.GetLabels()["topology.kubernetes.io/zone"],
Region: region,
Zone: zone,
Arch: node.Status.NodeInfo.Architecture,
OS: node.Status.NodeInfo.OperatingSystem,
OSImage: node.Status.NodeInfo.OSImage,
Expand Down
145 changes: 97 additions & 48 deletions schema/schema.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
{
"type": "record",
"name": "PodInfo",
"namespace": "com.example",
"fields": [
{
"name": "name",
Expand Down Expand Up @@ -96,39 +95,27 @@
},
{
"name": "gpu",
"type": [
"null",
"int"
],
"default": null
"type": "int",
"default": 0
},
{
"name": "memory",
"type": "int"
"type": "long"
},
{
"name": "pods",
"type": [
"null",
"int"
],
"default": null
"type": "int",
"default": 0
},
{
"name": "storage",
"type": [
"null",
"int"
],
"default": null
"type": "long",
"default": 0
},
{
"name": "storageEphemeral",
"type": [
"null",
"int"
],
"default": null
"type": "long",
"default": 0
}
]
}
Expand All @@ -145,39 +132,27 @@
},
{
"name": "gpu",
"type": [
"null",
"int"
],
"default": null
"type": "int",
"default": 0
},
{
"name": "memory",
"type": "int"
"type": "long"
},
{
"name": "pods",
"type": [
"null",
"int"
],
"default": null
"type": "int",
"default": 0
},
{
"name": "storage",
"type": [
"null",
"int"
],
"default": null
"type": "long",
"default": 0
},
{
"name": "storageEphemeral",
"type": [
"null",
"int"
],
"default": null
"type": "long",
"default": 0
}
]
}
Expand All @@ -194,11 +169,8 @@
},
{
"name": "qosClass",
"type": [
"null",
"string"
],
"default": null
"type": "string",
"default": "BestEffort"
},
{
"name": "startTime",
Expand All @@ -220,6 +192,83 @@
"type": "string",
"logicalType": "timestamp-millis"
}
},
{
"name": "resources",
"type": {
"type": "record",
"name": "Resources",
"fields": [
{
"name": "limits",
"type": {
"type": "record",
"name": "Limits",
"fields": [
{
"name": "cpu",
"type": "int",
"default": 0
},
{
"name": "gpu",
"type": "int",
"default": 0
},
{
"name": "memory",
"type": "long",
"default": 0
},
{
"name": "storage",
"type": "long",
"default": 0
},
{
"name": "storageEphemeral",
"type": "long",
"default": 0
}
]
}
},
{
"name": "requests",
"type": {
"type": "record",
"name": "Requests",
"fields": [
{
"name": "cpu",
"type": "int",
"default": 0
},
{
"name": "gpu",
"type": "int",
"default": 0
},
{
"name": "memory",
"type": "long",
"default": 0
},
{
"name": "storage",
"type": "long",
"default": 0
},
{
"name": "storageEphemeral",
"type": "long",
"default": 0
}
]
}
}
]
}
}
]
}
8 changes: 8 additions & 0 deletions schema/table.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@
"SchemaName": "eks-lens"
},
"SchemaVersionNumber": 1
},
"InputFormat": "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat",
"OutputFormat": "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat",
"SerdeInfo": {
"SerializationLibrary": "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe",
"Parameters": {
"serialization.format": "1"
}
}
}
}

0 comments on commit 221cfc9

Please sign in to comment.