-
Notifications
You must be signed in to change notification settings - Fork 0
/
recommend.py
200 lines (164 loc) · 4.83 KB
/
recommend.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
from __future__ import print_function
import urllib
import json
import time, os
import re
import re
import string
import numpy as np
import scipy
import os
from os import listdir
from os.path import isfile, join
from numpy import array,newaxis
import random
from keras.models import model_from_json
import pickle
import operator
import sys
batch_size = 128
no_sol = 50
# print("Enter the problem id : ")
prob_name=str(sys.argv[1])
mypath='./'
if not os.path.exists("./data"+prob_name):
#processing the problem id
r = re.compile("([0-9]+)([a-zA-Z]+)")
m = r.match(prob_name)
problem = m.group(1)
section = m.group(2)
#configuring url for getting the submission ids
SUBMISSION_URL1 = 'http://codeforces.com/problemset/status/{prob}/problem/{sec}/page/{no}'
SOURCE_CODE_BEGIN1 = 'var viewableSubmissionIds = ['
#initialising
submission=[]
i=1
#parsing the submission status(html)
while True:
submission_info = urllib.urlopen(SUBMISSION_URL1.format(prob=problem,sec=section,no=i)).read()
start_pos = submission_info.find(SOURCE_CODE_BEGIN1) + len(SOURCE_CODE_BEGIN1)
end_pos = submission_info.find("];", start_pos)
result = submission_info[start_pos:end_pos].replace("\"","").replace("\r","").replace("\n","").replace(" ","")
arr=result.split(",")
i=i+1
if arr[0] in submission:
break
submission.extend(arr)
if i > 4:
break
print("collected submission ids")
print(len(submission))
print(submission)
#phase 2 : retrieving the code for each submission id
MAGIC_START_POINT = 17000
#configuring the url for getting code
SOURCE_CODE_BEGIN2 = '<pre class="prettyprint lang-cpp program-source" style="padding: 0.5em;">'
SUBMISSION_URL2 = 'http://codeforces.com/contest/{prob}/submission/{sub_no}'
#initialising symbols to be replaced
replacer = {'"': '\"', '>': '>', '<': '<', '&': '&', "'": "'","'":"'"}
keys = replacer.keys()
#parsing symbols
def parse(source_code):
for key in keys:
source_code = source_code.replace(key, replacer[key])
return source_code
#creating a directory for the problem
print(prob_name)
tt="data"+prob_name
print(tt)
os.makedirs(tt)
ctr = 0;
#parsing the html code and creating a file for each submission
for j in submission:
print(j)
submission_info = urllib.urlopen(SUBMISSION_URL2.format(prob=problem,sub_no=j)).read()
start_pos = submission_info.find(SOURCE_CODE_BEGIN2)
if start_pos == -1 :
continue
start_pos += len(SOURCE_CODE_BEGIN2)
end_pos = submission_info.find("</pre>", start_pos)
result = parse(submission_info[start_pos:end_pos]).replace('\r', '')
file=open("data"+prob_name + '/'+j,"w")
file.write(result)
file.close()
ctr+=1
if ctr > no_sol :
break
print("Successfully scrapped solutions: ",ctr)
#Pre-processing Begins
os.system("python removeSpacesRecommend.py "+prob_name)
os.system("python renameVariablesRecommend.py "+prob_name)
os.system("python filterRecommend.py "+prob_name)
os.system("python mappingRecommend.py "+prob_name)
print("Pre-Processing Done")
#Vectorization
test_data=[]
x=[]
mypath='./Final'+prob_name+'/'
dirs=os.listdir(mypath)
onlyfiles = os.listdir(mypath)
for j in onlyfiles:
f=open(mypath+j,'r')
chars=f.readlines()
f.close()
temp=chars[0].split(" ")
del temp[-1]
temp=map(int,temp)
#mm=len(temp)
x.append(temp)
test_data=array(x)
test_data=test_data[:,:,newaxis]
X_test = test_data[:]
print(X_test.shape)
print("Vectorization Done")
X_test = X_test.astype('float32')
X_test /= 95
print(X_test.shape[0], 'predict samples')
#Loading Model
json_file = open('model.json','r')
loaded_model_json = json_file.read()
json_file.close
model = model_from_json(loaded_model_json)
model.load_weights("model.h5")
model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy'])
print("Loaded model...")
#Predictions
classes = model.predict_classes(X_test, batch_size=batch_size, verbose=1)
print(classes)
probs = model.predict_proba(X_test, batch_size=batch_size, verbose=1)
print(probs)
mean_prob = probs.mean(axis=0)
unique, counts = np.unique(classes, return_counts=True)
v = np.argmax(counts)
print(np.asarray((unique, counts)))
print(v)
print("PREDICTED CLASS FOR THE PROBLEM IS : ",unique[v])
print (prob_name)
print(mean_prob)
# os.system("rm -rf Clean-B"+prob_name)
os.system("rm -rf Filtered"+prob_name)
os.system("rm -rf Final"+prob_name)
os.system("rm -rf Renamed"+prob_name)
# os.system("rm -rf data"+prob_name)
with open("tags_prob.txt", "rb") as myFile:
prob_repo = pickle.load(myFile)
dict={}
for i in prob_repo:
ans=0
print (prob_repo[i])
if i != "test":
for j in range(4):
ans+=np.square(mean_prob[j]-prob_repo[i][j])
print (ans)
dict[i]=np.sqrt(ans)
print (" ")
c=0
print("Recommended Problems :")
res={}
for key, value in sorted(dict.iteritems(),key=lambda (k,v):(v,k)):
res[c]=key
print ("%s" %(key))
c+=1
if c == 3 :
break
# print(res.values())