-
Notifications
You must be signed in to change notification settings - Fork 6
/
osm2.py
executable file
·183 lines (152 loc) · 6.86 KB
/
osm2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
#!/usr/bin/env python3
# Select Lines from a CSV File According to the SuperMemo 2 (SM-2) Algorithm
# Written in 2012 by 伴上段
#
# To the extent possible under law, the author(s) have dedicated all copyright
# and related and neighboring rights to this software to the public domain
# worldwide. This software is distributed without any warranty.
#
# You should have received a copy of the CC0 Public Domain Dedication along
# with this software. If not, see
# <http://creativecommons.org/publicdomain/zero/1.0/>.
# The core SM-2 algorithm is in TLine.Respond().
from argparse import *
from csv import *
from datetime import *
from itertools import *
from math import *
from os.path import *
from random import *
from sys import *
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter, description=""" Select CSV-formatted lines from standard input and the specified log file
according to the SuperMemo 2 (SM-2) algorithm.
This program is useful for scheduling reviews of flashcards stored within
CSV files.
about SM-2:
SM-2 is the scheduling algorithm used in the flashcard program SuperMemo 2.
It's a spaced repetition scheduling (SRS) algorithm, which means it tries
to optimally schedule flashcards over increasing intervals to
improve retention.
You can read about SM-2 online; see
<http://www.supermemo.com/english/ol/sm2.htm> for an overview.
formatting:
This program treats the first field of each nonempty line from standard input
as that line's unique ID. (If multiple lines have the same ID, then the last
line wins.) It uses the ID to match the line with records scanned from the
log file. Each log file line must have the following format:
<ID> <field-separator> <timestamp> <field-separator> <quality-response>
where <ID> is the unique ID of the line associated with the record,
<field-separator> is the CSV field separator, <timestamp> is the record's
timestamp (you can modify its format via the -f option), and
<quality-response> is an integer in the range [0,5] representing the
"quality of review response" described in SM-2 (see above).
output:
This program prints due lines in no particular order in CSV format. The
program appends four additional fields to each line:
1. the line's "interval number," which indicates how many times the user
reviewed the line since it was created or since the last time it had
a quality of review response less than three;
2. the line's "interval," which is how many days will be added to the line's
due date if it receives a quality of review response of three or greater
the next time it's reviewed;
3. the line's "easiness factor," which is never less than 1.3; and
4. the line's due date.
All but (4) are described in the SM-2 link mentioned above.""")
parser.add_argument("-n", "--num-lines", type=int, default=10, dest="num", help="the maximum number of lines with log records to select (default: 30)")
parser.add_argument("-e", "--num-new-lines", type=int, default=4, dest="new", help="the maximum number of lines without log records to select (default: 10)")
parser.add_argument("-s", "--field-sep", default="\t", help="the CSV field separator (default: \\t)")
parser.add_argument("-f", "--date-format", default="%Y年%m月%d日", help="the format of dates/timestamps in the log file and output (uses date/strftime flags, default: %%Y年%%m月%%d日)")
parser.add_argument("-a", "--show-all", default=False, action="store_true", help="dump all lines to standard output regardless of whether they're due")
parser.add_argument("logfile", help="a CSV-formatted file containing records for the deck's lines")
args = parser.parse_args()
class TLine(object):
__slots__ = ("fields", "duedate", "interval", "intervalnum", "ef")
def __init__(self, fields, dateandtime):
self.fields = fields
self.duedate = dateandtime
self.interval = 1
self.intervalnum = 1
self.ef = 2.5
super().__init__()
def Respond(self, q, now):
if q < 3:
self.intervalnum = 1
self.interval = 1
self.duedate = now + timedelta(days=self.interval)
else:
self.intervalnum += 1
self.duedate = now + timedelta(days=self.interval)
self.interval = (6 if self.intervalnum == 2 else ceil(self.interval + self.ef))
self.ef = max(self.ef + 0.1 - (5 - q) * (0.08 + 0.02 * (5 - q)), 1.3)
# Check arguments for illegal values.
ret = 0
if args.num < 0:
sys.stderr.write("negative number of old lines\n")
ret = 1
if args.new < 0:
sys.stderr.write("negative number of new lines\n")
ret = 1
if not exists(args.logfile):
stderr.write(args.logfile + " does not exist.\n")
ret = 1
if ret != 0:
exit(ret)
# Process the lines from stdin.
zerodate = datetime.min
now = datetime.now()
lines = {}
for lineno, fields in enumerate(reader(stdin, delimiter=args.field_sep)):
if len(fields) != 0:
lines[fields[0]] = TLine(fields, zerodate)
# Process the log file.
with open(args.logfile, 'r') as logf:
for lineno, fields in enumerate(reader(logf, delimiter=args.field_sep)):
if len(fields) != 3:
stderr.write(args.logfile + ":" + str(lineno) + ": invalid number of fields: " + str(len(fields)) + "\n")
exit(3)
entry = lines.get(fields[0], None)
if entry is not None:
try:
logdate = datetime.strptime(fields[1], args.date_format)
except ValueError as e:
stderr.write(args.logfile + ":" + str(lineno) + ": invalid date format: " + str(e) + "\n")
exit(3)
try:
q = int(fields[2])
except ValueError:
stderr.write(args.logfile + ":" + str(lineno) + ": invalid quality response: " + fields[2] + "\n")
exit(3)
if q < 0 or q > 5:
stderr.write(args.logfile + ":" + str(lineno) + ": invalid quality response: " + fields[2] + "\n")
exit(3)
entry.Respond(q, logdate)
csvout = writer(stdout, delimiter=args.field_sep)
if args.show_all:
for line in lines.values():
csvout.writerow(tuple(chain(line.fields, (line.intervalnum, line.interval, line.ef, line.duedate.strftime(args.date_format)))))
exit(0)
class TRandomSelector(object):
def __init__(self, capacity):
self.capacity = int(capacity)
self.sample = []
self.counter = 0
if not self.capacity:
self.Add = (lambda me: None)
def __iter__(self):
for selected in self.sample:
yield selected
def Add(self, o):
self.counter += 1
if self.counter <= self.capacity:
self.sample.append(o)
else:
tag = randint(0, self.counter)
if tag < self.capacity:
self.sample[tag] = o
new_chooser = TRandomSelector(args.new)
old_chooser = TRandomSelector(args.num)
for line in lines.values():
if line.duedate <= now:
(new_chooser if line.duedate is zerodate else old_chooser).Add(line)
for line in chain(new_chooser, old_chooser):
csvout.writerow(tuple(chain(line.fields, (line.intervalnum, line.interval, line.ef, line.duedate.strftime(args.date_format)))))