-
Notifications
You must be signed in to change notification settings - Fork 95
/
frequency.py
86 lines (45 loc) · 1.24 KB
/
frequency.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import sys
import json
from pprint import pprint
def hw():
print 'Hello, world!'
def lines(fp):
print str(len(fp.readlines()))
def pri(y):
print y
def main():
tc=0
sums={}
new=open(sys.argv[1])
for line in new:
data=json.loads(line)
if "text" in data:
l=data["text"]
l2=l.encode('ascii','ignore')
ter=l2.split(" ")
#print(ter)
for a in ter:
tc=tc+1
#print tc
new2=open(sys.argv[1])
for line2 in new2:
data2=json.loads(line2)
if "text" in data2:
l2=data2["text"]
l3=l2.encode('ascii','ignore')
count=1
te=l3.split(" ")
#print(te)
for b in te:
#print b
if b in sums:
sums[b] += 1
else:
sums.update({b:count})
for b,count in sums.items():
n=float(count)/tc
k= str(b)+" "+str(n)
words=k.split()
print words[0]+"\t"+words[1]
if __name__ == '__main__':
main()