-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from infolab-csail/migrate-from-wikipediabase
Migrate lispify from wikipediabase
- Loading branch information
Showing
9 changed files
with
549 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
[run] | ||
omit = *virtualenv*, *pypy*, *setup.py, */tests/*, */scripts/* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
language: python | ||
python: | ||
- "2.7" | ||
install: | ||
- pip install codecov pep8 | ||
- python setup.py install | ||
script: | ||
- coverage run setup.py test | ||
- pep8 lispify tests --ignore=E501 | ||
after_success: | ||
- codecov |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,349 @@ | ||
""" | ||
Lispify converts Python objects into Lisp-like encoded strings that are | ||
interpretable in Common Lisp. | ||
Use the 'lispify' function to encode a Python object. | ||
To create a new way of interpreting data subclass the LispType class. | ||
Prefix your class with an underscore (_) if it is an intermediate representation | ||
so that the lispify method ignores it. | ||
""" | ||
|
||
from numbers import Number | ||
import warnings | ||
|
||
from .util import subclasses, camel_case_to_lisp_name | ||
|
||
|
||
# For fully deterministic lisp types use this priority. | ||
MIN_PRIORITY = 0 | ||
MAX_PRIORITY = 15 | ||
|
||
|
||
def mid_priority(): | ||
""" | ||
Just count. Later classes override previous ones. | ||
""" | ||
|
||
for i in range(MIN_PRIORITY + 1, MAX_PRIORITY): | ||
yield i | ||
|
||
while True: | ||
warnings.warn("Increase MAX_PRIORITY") | ||
yield i | ||
|
||
MID_PRIORITY = mid_priority() | ||
|
||
|
||
class LispType(object): | ||
|
||
""" | ||
A LispType is a Lisp-like encoded string that is interpretable in | ||
Common Lisp. Its `val` attribute is a Python object. | ||
Note that not only answers but also questions are lispified. | ||
To use this subclass it and provide any of the following: | ||
- val_str: string representation | ||
- should_parse | ||
- parse_val | ||
""" | ||
|
||
priority = 0 | ||
literal = False | ||
|
||
def __init__(self, val): | ||
""" | ||
Lispify a piece of data. Throws ValueError on failure. | ||
""" | ||
|
||
self.val = val | ||
|
||
if self.should_parse(): | ||
self.val = self.parse_val(val) | ||
else: | ||
raise ValueError("failed to lispify %s" % str(self.val)) | ||
|
||
def should_parse(self): | ||
""" | ||
If this returns False, LispType is invalid whatever the value. | ||
""" | ||
|
||
return True | ||
|
||
def parse_val(self, val): | ||
""" | ||
Override this to have special value manipulation. | ||
""" | ||
|
||
return val | ||
|
||
def __repr__(self): | ||
r = u"<%s object %s>" % (self.__class__.__name__, | ||
self.__str__()) | ||
return r.encode('utf-8') | ||
|
||
def __str__(self): | ||
return unicode(self).encode('utf-8') | ||
|
||
def __unicode__(self): | ||
return u"%s" % self.val_str() | ||
|
||
def val_str(self): | ||
return unicode(self.val) | ||
|
||
def __nonzero__(self): | ||
return True | ||
|
||
def __eq__(self, other): | ||
# compare LispType objects based on their string representation | ||
if isinstance(other, self.__class__): | ||
return self.__str__() == other.__str__() | ||
elif isinstance(other, basestring): | ||
return self.__str__() == other | ||
|
||
return False | ||
|
||
def __hash__(self): | ||
return hash(self.__str__()) | ||
|
||
|
||
class LispString(LispType): | ||
priority = next(MID_PRIORITY) | ||
|
||
def should_parse(self): | ||
return isinstance(self.val, basestring) | ||
|
||
def val_str(self): | ||
v = self.val.replace('"', '\\"') # escape double quotes | ||
v = u'"{0}"'.format(v) | ||
return v | ||
|
||
|
||
class LispList(LispType): | ||
|
||
""" | ||
This is coordinates and other things like that | ||
""" | ||
|
||
priority = next(MID_PRIORITY) | ||
literal = True | ||
|
||
def should_parse(self): | ||
return hasattr(self.val, '__iter__') | ||
|
||
def erepr(self, v): | ||
if isinstance(v, LispType): | ||
return unicode(v) | ||
|
||
try: | ||
return unicode(lispify(v)) | ||
except NotImplementedError: | ||
return repr(v) | ||
|
||
def val_str(self): | ||
return u"(%s)" % (" ".join([self.erepr(v) for v in self.val])) | ||
|
||
def __contains__(self, val): | ||
return (self.erepr(val) in map(self.erepr, self.val)) | ||
|
||
|
||
class LispDict(LispType): | ||
|
||
""" | ||
Get a lispy dictionary of non-None items. | ||
""" | ||
|
||
priority = next(MID_PRIORITY) | ||
|
||
def should_parse(self): | ||
return isinstance(self.val, dict) | ||
|
||
def val_str(self): | ||
pairs = sorted(self.val.items()) | ||
return u'(%s)' % self._plist(pairs) | ||
|
||
def _plist(self, pairs): | ||
""" | ||
A lispy plist without the parens. | ||
""" | ||
|
||
return " ".join(list(self._paren_content_iter(pairs))) | ||
|
||
def _kv_pair(self, k, v): | ||
if k is None: | ||
return u"%s" % v | ||
|
||
if isinstance(k, basestring): | ||
return u":%s %s" % (k, v) | ||
|
||
def _paren_content_iter(self, pairs): | ||
for k, v in pairs: | ||
if v is not None: | ||
# XXX: NastyHack(TM). Replace the nonbreaking space | ||
# with a space. | ||
yield self._kv_pair(k, lispify(v)) | ||
|
||
|
||
class LispDate(LispDict): | ||
|
||
""" | ||
Special LispDict that indicates a date. | ||
Note: YOU are responsible for parsing the date into the format that | ||
you want (e.g. yyyymmdd) using overlay framework or whatever. | ||
""" | ||
|
||
priority = next(MID_PRIORITY) | ||
|
||
# dictionary keys that indicate date format should be treated differently | ||
keywords = ["yyyymmdd"] | ||
|
||
def should_parse(self): | ||
if isinstance(self.val, dict): | ||
if any([(kw in self.val) for kw in self.keywords]): | ||
return True | ||
|
||
def _paren_content_iter(self, pairs): | ||
for k, v in pairs: | ||
if v is not None: | ||
if k in self.keywords: | ||
yield self._kv_pair(k, v) | ||
else: | ||
yield self._kv_pair(k, lispify(v)) | ||
|
||
|
||
class LispError(LispType): | ||
|
||
""" | ||
An error with a symbol. The expected value should be an exception | ||
with the class name as desired symbol name and the following attributes: | ||
:message a message for internal debugging (used for Python exceptions) | ||
:reply a reply message that is okay to show users | ||
(e.g. death-date for a person that is still alive) | ||
""" | ||
|
||
priority = MAX_PRIORITY | ||
|
||
def should_parse(self): | ||
return isinstance(self.val, Exception) | ||
|
||
def val_str(self): | ||
symbol = camel_case_to_lisp_name(type(self.val).__name__) | ||
kw = self.val.__dict__ | ||
if self.val.message: | ||
kw.update({'message': self.val.message}) | ||
|
||
if kw: | ||
return u"(:error {symbol} {keys})".format( | ||
symbol=symbol, | ||
keys=self._plist(kw.items()) | ||
) | ||
else: | ||
return u"(:error %s)" % symbol | ||
|
||
def _plist(self, pairs): | ||
return " ".join(list(self._paren_content_iter(pairs))) | ||
|
||
def _kv_pair(self, k, v): | ||
if k is None: | ||
return u"%s" % v | ||
|
||
if isinstance(k, basestring): | ||
return u":%s %s" % (k, v) | ||
|
||
def _paren_content_iter(self, pairs): | ||
for k, v in pairs: | ||
if v is not None: | ||
yield self._kv_pair(k, lispify(v)) | ||
|
||
def __nonzero__(self): | ||
""" | ||
Errors should count as zeros so that you can check for an answer with | ||
if potential_error_or_none: | ||
# do stuff | ||
""" | ||
|
||
return False | ||
|
||
|
||
class _LispLiteral(LispType): | ||
|
||
""" | ||
Lisp literals. These are not. | ||
""" | ||
priority = MAX_PRIORITY | ||
literal = True | ||
|
||
|
||
class LispKeyword(_LispLiteral): | ||
|
||
""" | ||
Just a keyword. No content. | ||
""" | ||
|
||
def should_parse(self): | ||
return isinstance(self.val, basestring) and self.val.startswith(':') \ | ||
and ' ' not in self.val | ||
|
||
def val_str(self): | ||
return self.val | ||
|
||
|
||
class LispBool(_LispLiteral): | ||
|
||
""" | ||
Lispify a boolean value | ||
""" | ||
|
||
def should_parse(self): | ||
return isinstance(self.val, bool) | ||
|
||
def val_str(self): | ||
return u't' if self.val else u'nil' | ||
|
||
|
||
class LispNone(_LispLiteral): | ||
|
||
""" | ||
Lispified none object | ||
""" | ||
|
||
def should_parse(self): | ||
return self.val is None | ||
|
||
def val_str(self): | ||
return u'nil' | ||
|
||
|
||
class LispNumber(_LispLiteral): | ||
|
||
def should_parse(self): | ||
return isinstance(self.val, Number) | ||
|
||
def val_str(self): | ||
return str(self.val) | ||
|
||
|
||
LISP_TYPES = subclasses(LispType, instantiate=False) | ||
|
||
|
||
def lispify(obj): | ||
""" | ||
Return a Lisp-like encoded string. | ||
""" | ||
|
||
if isinstance(obj, LispType): | ||
return obj | ||
|
||
for T in LISP_TYPES: | ||
try: | ||
return T(obj) | ||
except ValueError: | ||
pass | ||
|
||
raise NotImplementedError( | ||
"Implement LispType for val: %s or provide fallback error." % obj) | ||
|
||
__all__ = ['lispify'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
from itertools import chain | ||
import re | ||
|
||
|
||
def subclasses(cls, instantiate=True, **kw): | ||
""" | ||
A list of instances of subclasses of cls. Instantiate wit kw and | ||
return just the classes with instantiate=False. | ||
""" | ||
|
||
lcls = cls.__subclasses__() | ||
rcls = lcls + list(chain.from_iterable([c.__subclasses__() for c in lcls])) | ||
for c in rcls: | ||
assert isinstance(c.priority, int), \ | ||
"type(%s . priority) = %s != int" % (repr(c), type(c.priority.name)) | ||
|
||
clss = sorted(rcls, key=lambda c: c.priority, reverse=True) | ||
|
||
if not instantiate: | ||
return [C for C in clss | ||
if not C.__name__.startswith("_")] | ||
|
||
return [C(**kw) for C in clss | ||
if not C.__name__.startswith("_")] | ||
|
||
|
||
def camel_case_to_lisp_name(name): | ||
""" | ||
Convert a python CamelCase class name to lisp hyphenated class name. | ||
""" | ||
words = map(lambda s: s.lower(), re.findall('[A-Z][a-z]*', name)) | ||
lisp_name = '-'.join(words) | ||
return lisp_name |
Oops, something went wrong.