-
Notifications
You must be signed in to change notification settings - Fork 54
/
csv2libsvm.py
60 lines (45 loc) · 1.39 KB
/
csv2libsvm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/env python
"""
Convert CSV file to libsvm format. Works only with numeric variables.
"""
import sys
import csv
import argparse
def construct_line( label, line ):
new_line = []
if float( label ) == 0.0:
label = "0"
new_line.append( label )
for i, item in enumerate( line ):
if item == '' or float( item ) == 0.0:
continue
new_item = "%s:%s" % ( i + 1, item )
new_line.append( new_item )
new_line = " ".join( new_line )
new_line += "\n"
return new_line
# ---
parser = argparse.ArgumentParser()
parser.add_argument( "input_file", help = "path to the CSV input file" )
parser.add_argument( "output_file", help = "path to the output file" )
parser.add_argument( "-l", "--label-index", help = "zero based index for the label column. If there are no labels in the file, use -1.",
type = int, default = 0 )
parser.add_argument( "-s", "--skip-headers", help = "Use this switch if there are headers in the input file.", action = 'store_true' )
args = parser.parse_args()
#
i = open( args.input_file )
o = open( args.output_file, 'wb' )
reader = csv.reader( i )
if args.skip_headers:
headers = reader.next()
for line in reader:
if args.label_index == -1:
label = "1"
else:
label = line.pop( args.label_index )
try:
new_line = construct_line( label, line )
o.write( new_line )
except ValueError:
print "Problem with the following line, skipping..."
print line