Skip to content

Commit

Permalink
add telemetry EDA notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
isabelizimm committed Jun 10, 2021
1 parent 6bdeca4 commit 505b1c9
Show file tree
Hide file tree
Showing 2 changed files with 1,313 additions and 0 deletions.
210 changes: 210 additions & 0 deletions notebooks/data_sources/telemetry/step0_EDA.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "44680b12-bb14-460b-9acc-f4250b17fb7b",
"metadata": {},
"source": [
"## EDA\n",
"\n",
"Data available [here](https://kzn-swift.massopen.cloud/swift/v1/devicehealth/)."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "0447735e-ffa8-4480-ac98-974d042815aa",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import gc\n",
"import wget \n",
"from zipfile import ZipFile\n",
"import json\n",
"\n",
"import numpy as np\n",
"import scipy as sp\n",
"import pandas as pd\n",
"\n",
"from matplotlib import pyplot as plt\n",
"import seaborn as sns"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "ed3303bb-488c-4b31-ba41-ec0482ed0480",
"metadata": {},
"outputs": [],
"source": [
"month = \"device_health_metrics_2020-01.zip\""
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "f798f764-f5b7-4688-8f5f-8dce98ae970a",
"metadata": {},
"outputs": [],
"source": [
"url = f'https://kzn-swift.massopen.cloud/swift/v1/devicehealth/{month}'"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "58940092-2918-485f-8b7e-ed7c12fd5394",
"metadata": {},
"outputs": [],
"source": [
"zipped_data = wget.download(url)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "5ebcd91e-1866-448e-9ee5-12c4c4bc5020",
"metadata": {},
"outputs": [],
"source": [
"# Figure out unzipping\n",
"\n",
"#with ZipFile('../../device_health_metrics_2020-01.zip', 'r') as myzip:\n",
"# myzip.extract('device_health_metrics_2020-01.csv')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "95215ead-1e86-49df-b87d-f9c32ca8b03d",
"metadata": {},
"outputs": [],
"source": [
"health_data = pd.read_csv(\"device_health_metrics_2020-01.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "8fd6a4f0-d9ee-40d3-8999-956f6049c52d",
"metadata": {},
"outputs": [],
"source": [
"health_data['report'] = health_data['report'].apply(lambda x: json.loads(x))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "ab605f57-9be8-4263-b983-61b760a7ae0d",
"metadata": {},
"outputs": [],
"source": [
"unrolled_health_data = pd.json_normalize(health_data['report'])"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "41cad13a-5c6a-4b31-ac1a-66f3b3137477",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 NaN\n",
"1 NaN\n",
"2 NaN\n",
"3 NaN\n",
"4 NaN\n",
" ... \n",
"1097 [{'id': 5, 'raw': {'value': 0, 'string': '0'},...\n",
"1098 [{'id': 5, 'raw': {'value': 0, 'string': '0'},...\n",
"1099 [{'id': 1, 'raw': {'value': 526, 'string': '52...\n",
"1100 [{'id': 1, 'raw': {'value': 7943, 'string': '7...\n",
"1101 [{'id': 1, 'raw': {'value': 16, 'string': '16'...\n",
"Name: ata_smart_attributes.table, Length: 1102, dtype: object"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# figure out how to unroll this column\n",
"unrolled_health_data['ata_smart_attributes.table']"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "03bdc22a-dcf2-457d-91f1-b936a5c9d865",
"metadata": {},
"outputs": [],
"source": [
"# show variety of vendors iz"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "598f0183-6739-4cb0-a4c5-d2ad76b8ce62",
"metadata": {},
"outputs": [],
"source": [
"# show different types of disks iz"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3b98a4c4-e6c4-4ed7-b505-88c17ec96661",
"metadata": {},
"outputs": [],
"source": [
"# failures vs not failures kc"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3614582a-c95f-41c0-93cd-cec9e70694fb",
"metadata": {},
"outputs": [],
"source": [
"# how many disks smartctl ran kc"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "13a5cb51-6748-4d2b-bdd1-42641d60748e",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading

0 comments on commit 505b1c9

Please sign in to comment.