-
Notifications
You must be signed in to change notification settings - Fork 0
/
getting_started_python.py
50 lines (39 loc) · 1.62 KB
/
getting_started_python.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# -*- coding: utf-8 -*-
"""
Created on Tue May 19 13:29:23 2015
@author: Thomas
"""
import csv
import numpy as np
csv_file_object = csv.reader(open('train.csv', 'rb'))
header = csv_file_object.next()
# Import data in 'data' as numpy array
data = []
for row in csv_file_object:
data.append(row)
data = np.array(data)
# Find number of passengers and survivors
number_passengers = np.size(data[0::,1].astype(np.float))
number_survived = np.sum(data[0::,1].astype(np.float))
proportion_survivors = number_survived / number_passengers
# Get only women and only men data
women_only_stats = data[0::,4] == 'female'
men_only_stats = data[0::,4] != 'female'
# Find out survival percentage men/women
women_onboard = data[women_only_stats,1].astype(np.float)
men_onboard = data[men_only_stats,1].astype(np.float)
proportion_women_survived = np.sum(women_onboard) / np.size(women_onboard)
proportion_men_survived = np.sum(men_onboard) / np.size(men_onboard)
test_file = open('test.csv', 'rb')
test_file_object = csv.reader(test_file)
header = test_file_object.next()
prediction_file = open("genderbasedmodel.csv", "wb")
prediction_file_object = csv.writer(prediction_file)
prediction_file_object.writerow(["PassengerId", "Survived"])
for row in test_file_object: # For each row in test.csv
if row[3] == 'female': # is it a female, if yes then
prediction_file_object.writerow([row[0],'1']) # predict 1
else: # or else if male,
prediction_file_object.writerow([row[0],'0']) # predict 0
test_file.close()
prediction_file.close()