diff --git a/README.md b/README.md index 0feadfb..a26e765 100644 --- a/README.md +++ b/README.md @@ -12,27 +12,29 @@ python3 -m pip install multisort None ### Performance -Average over 10 iterations with 500 rows. +Average over 10 iterations with 1000 rows. Test | Secs ---|--- -cmp_func|0.0054 -pandas|0.0061 -reversor|0.0149 -msorted|0.0179 +superfast|0.0005 +multisort|0.0035 +pandas|0.0079 +cmp_func|0.0138 +reversor|0.037 -As you can see, if the `cmp_func` is by far the fastest methodology as long as the number of cells in the table are 500 rows for 5 columns. However for larger data sets, `pandas` is the performance winner and scales extremely well. In such large dataset cases, where performance is key, `pandas` should be the first choice. +Hands down the fastest is the `superfast` methdology shown below. You do not need this library to accomplish this as its just core python. -The surprising thing from testing is that `cmp_func` far outperforms `reversor` which which is the only other methodology for multi-columnar sorting that can handle `NoneType` values. +`multisort` from this library gives reasonable performance for large data sets; eg. its better than pandas up to about 5,500 records. It is also much simpler to read and write, and it has error handling that does its best to give useful error messages. ### Note on `NoneType` and sorting -If your data may contain None, it would be wise to ensure your sort algorithm is tuned to handle them. This is because sorted uses `<` comparisons; which is not supported by `NoneType`. For example, the following error will result: `TypeError: '>' not supported between instances of 'NoneType' and 'str'`. +If your data may contain None, it would be wise to ensure your sort algorithm is tuned to handle them. This is because sorted uses `<` comparisons; which is not supported by `NoneType`. For example, the following error will result: `TypeError: '>' not supported between instances of 'NoneType' and 'str'`. All examples given on this page are tuned to handle `None` values. ### Methodologies Method|Descr|Notes ---|---|--- -cmp_func|Multi column sorting in the model `java.util.Comparator`|Fastest for small to medium size data -reversor|Enable multi column sorting with column specific reverse sorting|Medium speed. [Source](https://stackoverflow.com/a/56842689/286807) -msorted|Simple one-liner designed after `multisort` [example from python docs](https://docs.python.org/3/howto/sorting.html#sort-stability-and-complex-sorts)|Slowest of the bunch but not by much +multisort|Simple one-liner designed after `multisort` [example from python docs](https://docs.python.org/3/howto/sorting.html#sort-stability-and-complex-sorts)|Second fastest of the bunch but most configurable and easy to read. +cmp_func|Multi column sorting in the model `java.util.Comparator`|Reasonable speed|Enable multi column sorting with column specific reverse sorting|Medium speed. [Source](https://stackoverflow.com/a/56842689/286807) +superfast|NoneType safe sample implementation of multi column sorting as mentioned in [example from python docs](https://docs.python.org/3/howto/sorting.html#sort-stability-and-complex-sorts)|Fastest by orders of magnitude but a bit more complex to write. + @@ -49,39 +51,39 @@ rows_dict = [ ] ``` -### `msorted` +### `multisort` Sort rows_dict by _grade_, descending, then _attend_, ascending and put None first in results: ``` -from multisort import msorted -rows_sorted = msorted(rows_dict, [ - ('grade', {'reverse': False, 'none_first': True}) +from multisort import multisort +rows_sorted = multisort(rows_dict, [ + ('grade', {'reverse': False}) ,'attend' ]) ``` - Sort rows_dict by _grade_, descending, then _attend_ and call upper() for _grade_: ``` -from multisort import msorted -rows_sorted = msorted(rows_dict, [ - ('grade', {'reverse': False, 'clean': lambda s:None if s is None else s.upper()}) +from multisort import multisort +rows_sorted = multisort(rows_dict, [ + ('grade', {'reverse': False, 'clean': lambda s: None if s is None else s.upper()}) ,'attend' ]) ``` -`msorted` parameters: +`multisort` parameters: option|dtype|description ---|---|--- `key`|int or str|Key to access data. int for tuple or list `spec`|str, int, list|Sort specification. Can be as simple as a column key / index `reverse`|bool|Reverse order of final sort (defalt = False) -`msorted` `spec` options: +`multisort` `spec` options: option|dtype|description ---|---|--- reverse|bool|Reverse sort of column -clean|func|Function / lambda to clean the value -none_first|bool|If True, None will be at top of sort. Default is False (bottom) +clean|func|Function / lambda to clean the value. These calls can cause a significant slowdown. +required|bool|Default True. If false, will substitute None or default if key not found (not applicable for list or tuple rows) +default|any|Value to substitute if required==False and key does not exist or None is found. Can be used to achive similar functionality to pandas `na_position` @@ -134,7 +136,7 @@ rows_obj = [ ] ``` -### `msorted` +### `multisort` (Same syntax as with 'dict' example) @@ -177,11 +179,11 @@ rows_tuple = [ (COL_IDX, COL_NAME, COL_GRADE, COL_ATTEND) = range(0,4) ``` -### `msorted` +### `multisort` Sort rows_tuple by _grade_, descending, then _attend_, ascending and put None first in results: ``` -from multisort import msorted -rows_sorted = msorted(rows_tuple, [ +from multisort import multisort +rows_sorted = multisort(rows_tuple, [ (COL_GRADE, {'reverse': False, 'none_first': True}) ,COL_ATTEND ]) @@ -218,6 +220,6 @@ rows_sorted = sorted(rows_tuple, key=cmp_func(cmp_student), reverse=True) ### Tests / Samples Name|Descr|Other ---|---|--- -tests/test_msorted.py|msorted unit tests|- +tests/test_multisort.py|multisort unit tests|- tests/performance_tests.py|Tunable performance tests using asyncio | requires pandas tests/hand_test.py|Hand testing|- diff --git a/dot.vscode/launch.json b/dot.vscode/launch.json new file mode 100644 index 0000000..ffa30f4 --- /dev/null +++ b/dot.vscode/launch.json @@ -0,0 +1,20 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python: Current File", + "type": "python", + "request": "launch", + "console": "integratedTerminal", + "justMyCode": true, + // "program": "tests/hand_test.py", + // "program": "tests/performance_tests.py", + // "program": "tests/perf_tests_2.py", + "program": "tests/test_multisort.py", + // "args": ["DictTests.test_list_of_dicts"] + } + ] +} \ No newline at end of file diff --git a/dot.vscode/settings.json b/dot.vscode/settings.json new file mode 100644 index 0000000..b889f61 --- /dev/null +++ b/dot.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "python.envFile": "${workspaceFolder}/dev.env", + "python.linting.pylintEnabled": false, + "python.linting.flake8Enabled": true, + "python.linting.enabled": true +} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 1ddac9e..057ed35 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "multisort" -version = "0.1.1" +version = "0.1.2" description = "NoneType Safe Multi Column Sorting For Python" license = "MIT" authors = ["Timothy C. Quinn"] diff --git a/src/multisort/__init__.py b/src/multisort/__init__.py index 6ab5b64..a31aef9 100644 --- a/src/multisort/__init__.py +++ b/src/multisort/__init__.py @@ -1 +1 @@ -from .multisort import msorted, cmp_func, reversor \ No newline at end of file +from .multisort import multisort, cmp_func, reversor \ No newline at end of file diff --git a/src/multisort/multisort.py b/src/multisort/multisort.py index eb9bdc3..182105b 100644 --- a/src/multisort/multisort.py +++ b/src/multisort/multisort.py @@ -11,7 +11,7 @@ cmp_func = cmp_to_key -# .: msorted :. +# .: multisort :. # spec is a list one of the following # # (,) @@ -21,70 +21,102 @@ # dict. Options: # reverse: opt - reversed sort (defaults to False) # clean: opt - callback to clean / alter data in 'field' -# none_first: opt - If True, None will be at top of sort. Default is False (bottom) -class Comparator: - @classmethod - def new(cls, *args): - if len(args) == 1 and isinstance(args[0], (int,str)): - _c = Comparator(spec=args[0]) +def multisort(rows, spec, reverse:bool=False): + key=clean=rows_sorted=default=None + col_reverse=False + required=True + for s_c in reversed([spec] if isinstance(spec, (int, str)) else spec): + if isinstance(s_c, (int, str)): + key = s_c else: - _c = Comparator(spec=args) - return cmp_to_key(_c._compare_a_b) + if len(s_c) == 1: + key = s_c[0] + elif len(s_c) == 2: + key = s_c[0] + s_opts = s_c[1] + assert not s_opts is None and isinstance(s_opts, dict), f"Invalid Spec. Second value must be a dict. Got {getClassName(s_opts)}" + col_reverse = s_opts.get('reverse', False) + clean = s_opts.get('clean', None) + default = s_opts.get('default', None) + required = s_opts.get('required', True) - def __init__(self, spec): - if isinstance(spec, (int, str)): - self.spec = ( (spec, False, None, False), ) - else: - a=[] - for s_c in spec: - if isinstance(s_c, (int, str)): - a.append((s_c, None, None, False)) - else: - assert isinstance(s_c, tuple) and len(s_c) in (1,2),\ - f"Invalid spec. Must have 1 or 2 params per record. Got: {s_c}" - if len(s_c) == 1: - a.append((s_c[0], None, None, False)) - elif len(s_c) == 2: - s_opts = s_c[1] - assert not s_opts is None and isinstance(s_opts, dict), f"Invalid Spec. Second value must be a dict. Got {getClassName(s_opts)}" - a.append((s_c[0], s_opts.get('reverse', False), s_opts.get('clean', None), s_opts.get('none_first', False))) - - self.spec = a - - def _compare_a_b(self, a, b): - if a is None: return 1 - if b is None: return -1 - for k, desc, clean, none_first in self.spec: + def _sort_column(row): # Throws MSIndexError, MSKeyError + ex1=None try: try: - va = a[k]; vb = b[k] + v = row[key] except Exception as ex: - va = getattr(a, k); vb = getattr(b, k) - - except Exception as ex: - raise KeyError(f"Key {k} is not available in object(s) given a: {a.__class__.__name__}, b: {a.__class__.__name__}") + ex1 = ex + v = getattr(row, key) + except Exception as ex2: + if isinstance(row, (list, tuple)): # failfast for tuple / list + raise MSIndexError(ex1.args[0], row, ex1) - if clean: - va = clean(va) - vb = clean(vb) + elif required: + raise MSKeyError(ex2.args[0], row, ex2) - if va != vb: - if va is None: return -1 if none_first else 1 - if vb is None: return 1 if none_first else -1 - if desc: - return -1 if va > vb else 1 else: - return 1 if va > vb else -1 + if default is None: + v = None + else: + v = default + + if default: + if v is None: return default + return clean(v) if clean else v + else: + if v is None: return True, None + if clean: return False, clean(v) + return False, v + + try: + if rows_sorted is None: + rows_sorted = sorted(rows, key=_sort_column, reverse=col_reverse) + else: + rows_sorted.sort(key=_sort_column, reverse=col_reverse) + + + except Exception as ex: + msg=None + row=None + key_is_int=isinstance(key, int) + + if isinstance(ex, MultiSortBaseExc): + row = ex.row + if isinstance(ex, MSIndexError): + msg = f"Invalid index for {row.__class__.__name__} row of length {len(row)}. Row: {row}" + else: # MSKeyError + msg = f"Invalid key/property for row of type {row.__class__.__name__}. Row: {row}" + else: + msg = ex.args[0] + + raise MultiSortError(f"""Sort failed on key {"int" if key_is_int else "str '"}{key}{'' if key_is_int else "' "}. {msg}""", row, ex) + + + return reversed(rows_sorted) if reverse else rows_sorted + - return 0 +class MultiSortBaseExc(Exception): + def __init__(self, msg, row, cause): + self.message = msg + self.row = row + self.cause = cause + +class MSIndexError(MultiSortBaseExc): + def __init__(self, msg, row, cause): + super(MSIndexError, self).__init__(msg, row, cause) +class MSKeyError(MultiSortBaseExc): + def __init__(self, msg, row, cause): + super(MSKeyError, self).__init__(msg, row, cause) -def msorted(rows, spec, reverse:bool=False): - if isinstance(spec, (int, str)): - _c = Comparator.new(spec) - else: - _c = Comparator.new(*spec) - return sorted(rows, key=_c, reverse=reverse) +class MultiSortError(MultiSortBaseExc): + def __init__(self, msg, row, cause): + super(MultiSortError, self).__init__(msg, row, cause) + def __str__(self): + return self.message + def __repr__(self): + return f" {self.__str__()}" # For use in the multi column sorted syntax to sort by 'grade' and then 'attend' descending # dict example: diff --git a/tests/hand_test.py b/tests/hand_test.py index b375447..defbd81 100644 --- a/tests/hand_test.py +++ b/tests/hand_test.py @@ -1,20 +1,20 @@ import sys -from multisort import msorted, cmp_func, reversor +from multisort import multisort, cmp_func, reversor import test_util as util pc = util.pc def main(): - # test_msorted_dict_single() - # test_msorted_obj_single() - # test_msorted_tuple_single() + # test_multisort_dict_single() + # test_multisort_obj_single() + # test_multisort_tuple_single() - test_msorted_dict_multi() - # test_msorted_obj_multi() - # test_msorted_tuple_multi() + test_multisort_dict_multi() + # test_multisort_obj_multi() + # test_multisort_tuple_multi() students_dict = [ - {'idx': 0, 'name': 'joh', 'grade': 'C', 'attend': 100} + {'idx': 0, 'name': 'joh', 'grade': None, 'attend': 100} ,{'idx': 1, 'name': 'jan', 'grade': 'a', 'attend': 80} ,{'idx': 2, 'name': 'dav', 'grade': 'B', 'attend': 85} ,{'idx': 3, 'name': 'bob' , 'grade': 'C', 'attend': 85} @@ -32,12 +32,12 @@ def __str__(self): return f"name: {self.name}, grade: {self.grade}, attend: {sel def __repr__(self): return self.__str__() students_obj = [ - Student(0, 'joh', 'C', 100) - ,Student(1, 'jan', 'a', 80) - ,Student(2, 'dav', 'B', 85) - ,Student(3, 'bob', 'C', 85) - ,Student(4, 'jim', 'F', 55) - ,Student(5, 'joe', None, 55) + Student(0, 'joh', 'C', 100), + Student(1, 'jan', 'a', 80), + Student(2, 'dav', 'B', 85), + Student(3, 'bob', 'C', 85), + Student(4, 'jim', 'F', 55), + Student(5, 'joe', None, 55), ] student_tuple = [ @@ -52,35 +52,36 @@ def __repr__(self): return self.__str__() - - -def test_msorted_dict_single(): - _sorted = msorted(students_dict, 'grade', reverse=False) +def test_multisort_dict_single(): + _sorted = multisort(students_dict, 'grade', reverse=False) _print_stud(_sorted) -def test_msorted_obj_single(): - _sorted = msorted(students_obj, 'attend', reverse=False) +def test_multisort_obj_single(): + _sorted = multisort(students_obj, 'attend', reverse=False) _print_stud(_sorted) -def test_msorted_tuple_single(): - _sorted = msorted(student_tuple, COL_ATTEND, reverse=False) +def test_multisort_tuple_single(): + _sorted = multisort(student_tuple, COL_ATTEND, reverse=False) _print_stud(_sorted) -def test_msorted_dict_multi(): - _sorted = msorted(students_dict, [('grade', {'reverse': False, 'none_first': False}), 'attend'], reverse=False) +def test_multisort_dict_multi(): + _sorted = multisort(students_dict, [ + ('grade', {'reverse': True, 'clean': lambda s: None if s is None else s.upper(), 'default': '0', 'required': True}), + # ('attend', {'reverse': False}), + ], reverse=False) _print_stud(_sorted) -def test_msorted_obj_multi(): - _sorted = msorted(students_obj, [('grade', {'reverse': True}), 'attend'], reverse=False) +def test_multisort_obj_multi(): + _sorted = multisort(students_obj, [('grade', {'reverse': True}), 'attend'], reverse=False) _print_stud(_sorted) -def test_msorted_tuple_multi(): - _sorted = msorted(student_tuple, [(COL_GRADE, {'reverse': True}), COL_ATTEND], reverse=False) +def test_multisort_tuple_multi(): + _sorted = multisort(student_tuple, [(COL_GRADE, {'reverse': True}), COL_ATTEND], reverse=False) _print_stud(_sorted) diff --git a/tests/performance_tests.py b/tests/performance_tests.py index d5fc9a0..317e7a2 100644 --- a/tests/performance_tests.py +++ b/tests/performance_tests.py @@ -1,20 +1,21 @@ import asyncio import pandas from random import randint -from multisort import msorted, cmp_func, reversor +from operator import itemgetter +from multisort import multisort, cmp_func, reversor import test_util as util pc = util.pc students = [ - {'idx': 0, 'name': 'joh', 'grade': 'C', 'attend': 100} + {'idx': 0, 'name': 'joh', 'grade': None, 'attend': 100} ,{'idx': 1, 'name': 'jan', 'grade': 'a', 'attend': 80} ,{'idx': 2, 'name': 'dav', 'grade': 'B', 'attend': 85} ,{'idx': 3, 'name': 'bob' , 'grade': 'C', 'attend': 85} ,{'idx': 4, 'name': 'jim' , 'grade': 'F', 'attend': 55} ,{'idx': 5, 'name': 'joe' , 'grade': None, 'attend': 55} ] -ITERATIONS = 10 -EXTRA_ROW = 500 +ITERATIONS = 5 +EXTRA_ROW = 1000 def main(): results = asyncio.get_event_loop().run_until_complete(run_tests()) @@ -23,7 +24,7 @@ def main(): if isinstance(result, Exception): raise result rrows.append(result) - rrows = msorted(rrows, 1) + rrows = multisort(rrows, 1) table = util.quickTT(['test', 's/iter']) for rrow in rrows: table.add_row([rrow[0], f"{(rrow[1] / ITERATIONS):.7f}"]) print(f"\nSummary for {ITERATIONS} iteration{'s' if ITERATIONS > 1 else ''} with {len(students)} rows:\n{table.draw()}\n") @@ -39,10 +40,12 @@ async def run_tests(): coroutines = [ run_cmp_func(students[:]), - run_msorted(students[:]), + run_multisort(students[:]), + run_multisort_noclean(students[:]), run_reversor(students[:]), - run_reversor_func(students[:]), run_pandas(students[:]), + superfast(students[:]), + superfast_clean(students[:]), ] res = await asyncio.gather(*coroutines, return_exceptions=True) @@ -69,14 +72,25 @@ def cmp_student(a,b): -async def run_msorted(rows): +async def run_multisort(rows): sw = util.StopWatch() for i in range(0,ITERATIONS): - rows_sorted = msorted(rows, spec=( + rows_sorted = multisort(rows, spec=( ('grade', {'reverse': True, 'clean': lambda v: None if v is None else v.lower()}) ,('attend', {'reverse': True}) ), reverse=True) - return ('msorted', sw.elapsed(prec=7)) + return ('multisort w/ clean', sw.elapsed(prec=7)) + + +async def run_multisort_noclean(rows): + sw = util.StopWatch() + for i in range(0,ITERATIONS): + rows_sorted = multisort(rows, spec=( + ('grade', {'reverse': True}) + ,('attend', {'reverse': True}) + ), reverse=True) + return ('multisort noclean', sw.elapsed(prec=7)) + async def run_reversor(rows): sw = util.StopWatch() @@ -87,16 +101,6 @@ async def run_reversor(rows): ), reverse=True) return ('reversor', sw.elapsed(prec=7)) -async def run_reversor_func(rows): - sw = util.StopWatch() - def _student_sort(o): - return ( reversor(None if o['grade'] is None else o['grade'].lower()) - ,reversor(o['attend']) - ) - for i in range(0,ITERATIONS): - rows_sorted = sorted(rows, key=_student_sort, reverse=True) - - return ('reversor func', sw.elapsed(prec=7)) async def run_pandas(rows): sw = util.StopWatch() @@ -109,5 +113,34 @@ async def run_pandas(rows): return ('pandas', sw.elapsed(prec=7)) +async def superfast(students): + sw = util.StopWatch() + def key_grade(student): + grade = student['grade'] + return grade is None, grade + def key_attend(student): + return student['attend'] + students_sorted = sorted(students, key=key_attend) + students_sorted.sort(key=key_grade, reverse=True) + + return ('superfast', sw.elapsed(prec=7)) + + +async def superfast_clean(students): + sw = util.StopWatch() + def key_grade(student): + grade = student['grade'] + if grade is None: + return True, None + else: + return False, grade.upper() + def key_attend(student): + return student['attend'] + students_sorted = sorted(students, key=key_attend) + students_sorted.sort(key=key_grade, reverse=True) + + return ('superfast w/ clean', sw.elapsed(prec=7)) + + if __name__ == '__main__': main() diff --git a/tests/test_multisort.py b/tests/test_multisort.py new file mode 100644 index 0000000..408aec8 --- /dev/null +++ b/tests/test_multisort.py @@ -0,0 +1,241 @@ +import sys +import unittest +from multisort import multisort +import test_util as util +pc = util.pc + +FAILFAST = True + +STUDENTS_BASE = [ + (0, 'joh', 'a' , 100) + ,(1, 'joe', 'B' , 80) + ,(2, 'dav', 'A' , 85) + ,(3, 'bob', 'C' , 85) + ,(4, 'jim', None , 55) + ,(5, 'jan', 'B' , 70) +] +(COL_IDX, COL_NAME, COL_GRADE, COL_ATTEND) = range(0,4) +STUDENT_COLS=['idx', 'name', 'grade', 'attend'] + +def clean_grade(v): + if v is None: return v + return v.upper() + + +MSORTED_TESTS=[ + ( (2,0,5,1,3,4), [(COL_GRADE, {'reverse': False, 'clean': clean_grade}) , (COL_ATTEND, {'reverse': False})]), + ( (0,2,1,5,3,4), [(COL_GRADE, {'reverse': False, 'clean': clean_grade}) , (COL_ATTEND, {'reverse': True})]), + ( (0,2,1,5,3,4), [(COL_GRADE, {'reverse': False, 'clean': clean_grade}) , (COL_ATTEND, {'reverse': True})]), + ( (4,3,1,5,0,2), [(COL_GRADE, {'reverse': True , 'clean': clean_grade}) , (COL_ATTEND, {'reverse': True})]), + ( (4,3,5,1,2,0), [(COL_GRADE, {'reverse': True , 'clean': clean_grade}) , (COL_ATTEND, {'reverse': False})]), + ( (2,1,5,3,0,4), COL_GRADE), + ( (2,1,5,3,0,4), [COL_GRADE]), + ( (2,5,1,3,0,4), [COL_GRADE, COL_NAME]), +] + + +class Student(): + def __init__(self, idx, name, grade, attend): + self.idx = idx + self.name = name + self.grade = grade + self.attend = attend + def __str__(self): return f"[{self.idx}] name: {self.name}, grade: {self.grade}, attend: {self.attend}" + def __repr__(self): return f" {self.__str__()}" + +class MultiSortBase(unittest.TestCase): + + def _run_tests(self, rows_as, row_as, rows_in): + test_name = sys._getframe(1).f_code.co_name + for i, (expected, spec) in enumerate(MSORTED_TESTS): + for j in range(0,1): + if j == 0: + reverse = False + else: + reverse = True + expected = reversed(expected) + + spec = self._fix_SORT_TESTS_spec(spec, row_as) + + if i > 4: + pc() + + rows_sorted = multisort(rows_in, spec, reverse=reverse) + + if rows_as == 'list': + self.assertIsInstance(rows_in, list) + elif rows_as == 'tuple': + self.assertIsInstance(rows_in, tuple) + + bOk = self._check_sort(expected, rows_sorted, row_as) + + _dump = dump_sort(i, spec, rows_sorted, rows_as, row_as, expected, reverse) + + if not bOk: + self.fail(msg=f"\nTest Name: {test_name}\nTestSet: {i}\n{_dump}\n") + else: + pass + # pc(f'\n.: sort_dump :.\n{_dump}\n') + + def _fix_SORT_TESTS_spec(self, spec, row_as): + if row_as in ('list', 'tuple'): + return spec + elif row_as in ('dict', 'object'): + pass + else: + raise Exception(f"Unexpected row_as: {row_as}") + + if isinstance(spec, (int)): + return STUDENT_COLS[spec] + + a = [] + for spec_c in spec: + if isinstance(spec_c, int): + a.append(STUDENT_COLS[spec_c]) + else: + spec_c = [*spec_c] + spec_c[0] = STUDENT_COLS[spec_c[0]] + a.append(tuple(spec_c)) + spec = a + + return tuple(spec) + + + def _check_sort(self, expected, rows, row_as) -> bool: + assert len(expected) == len(STUDENTS_BASE), f"Invalid expected length ({len(expected)}). got: {len(STUDENTS_BASE)} ({expected})" + indexable = row_as in ('list', 'tuple') + for i, row in enumerate(rows): + if row_as == 'list' and not isinstance(row, list): + self.fail(f"Expecting list but got {util.getClassName(row)}") + elif row_as == 'tuple' and not isinstance(row, tuple): + self.fail(f"Expecting tuple but got {util.getClassName(row)}") + elif row_as == 'dict' and not isinstance(row, dict): + self.fail(f"Expecting dict but got {util.getClassName(row)}") + elif row_as == 'object' and not isinstance(row, object): + self.fail(f"Expecting object but got {util.getClassName(row)}") + + idx = row[0] if indexable else row.idx if row_as == 'object' else row['idx'] + if not expected[i] == idx: return False + return True + + + +class TupleTests(MultiSortBase): + # TupleTests.test_list_of_tuples + def test_list_of_tuples(self): + (rows_as, row_as, rows_in) = _get_rows_in(rows_list=True, row_as_tuple=True) + self._run_tests(rows_as, row_as, rows_in) + + # TupleTests.test_tuple_of_tuples + def test_tuple_of_tuples(self): + (rows_as, row_as, rows_in) = _get_rows_in(rows_tuple=True, row_as_tuple=True) + self._run_tests(rows_as, row_as, rows_in) + + +class DictTests(MultiSortBase): + # DictTests.test_list_of_dicts + def test_list_of_dicts(self): + (rows_as, row_as, rows_in) = _get_rows_in(rows_list=True, row_as_dict=True) + self._run_tests(rows_as, row_as, rows_in) + + # DictTests.test_tuple_of_dict + def test_tuple_of_dict(self): + (rows_as, row_as, rows_in) = _get_rows_in(rows_tuple=True, row_as_dict=True) + self._run_tests(rows_as, row_as, rows_in) + + +class ObjectTests(MultiSortBase): + # ObjectTests.test_list_of_objects + def test_list_of_objects(self): + (rows_as, row_as, rows_in) = _get_rows_in(rows_list=True, row_as_obj=True) + self._run_tests(rows_as, row_as, rows_in) + + # ObjectTests.test_tuple_of_objects + def test_tuple_of_objects(self): + (rows_as, row_as, rows_in) = _get_rows_in(rows_tuple=True, row_as_obj=True) + self._run_tests(rows_as, row_as, rows_in) + + +def norm_spec_item(spec_c): + if isinstance(spec_c, (int, str)): + return (spec_c, None, None) + else: + assert isinstance(spec_c, tuple) and len(spec_c) in (1,2),\ + f"Invalid spec. Must have 1 or 2 params per record. Got: {spec_c}" + if len(spec_c) == 1: + return (spec_c[0], None, None) + elif len(spec_c) == 2: + s_opts = spec_c[1] + assert not s_opts is None and isinstance(s_opts, dict), f"Invalid Spec. Second value must be a dict. Got {util.getClassName(s_opts)}" + return (spec_c[0], s_opts.get('reverse', False), s_opts.get('clean', None)) + + +def dump_sort(stest_no, spec, rows, rows_as, row_as, expected, reverse): + sb = util.StringBuffer('Rows of ') + sb.a(rows_as) + sb.a(' sorted by ') + indexable = row_as in ('list', 'tuple') + if isinstance(spec, (int, str)): + sb.a(spec).a(" (a)") + else: + for i, spec_c in enumerate(spec): + (key, desc, clean) = norm_spec_item(spec_c) + if i > 0: sb.a(", ") + if indexable: + sb.a(STUDENT_COLS[key]) + else: + sb.a(key) + sb.a(' (d)' if desc else ' (a)') + + if reverse: sb.a(' (reversed)') + + sb.a(':\n') + + table = util.quickTT(STUDENT_COLS) + + bOk = True + for i, row in enumerate(rows): + if indexable: + table.add_row(row) + idx = row[0] + else: + if row_as == 'object': + table.add_row([row.idx, row.name, row.grade, row.attend]) + idx = row.idx + else: + table.add_row([row['idx'], row['name'], row['grade'], row['attend']]) + idx = row['idx'] + if not expected[i] == idx: bOk = False + + sb.a(util.pre(table.draw())) + if bOk: + sb.a("\n check: pass") + else: + sb.a('\n check: FAIL! expected: ').a(expected) + + return sb.ts() + + + + + +def _get_rows_in(rows_list=False, rows_tuple=False, row_as_dict=False, row_as_obj=False, row_as_list=False, row_as_tuple=False): + if row_as_dict: + rows_in = [{'idx': r[COL_IDX], 'name': r[COL_NAME], 'grade': r[COL_GRADE], 'attend': r[COL_ATTEND]} for r in STUDENTS_BASE] + elif row_as_obj: + rows_in = [Student(*r) for r in STUDENTS_BASE] + elif row_as_tuple: + rows_in = [tuple(r) for r in STUDENTS_BASE] + elif row_as_list: + rows_in = STUDENTS_BASE + + return ( 'list' if rows_list else 'tuple' + ,'tuple' if row_as_tuple else 'list' if row_as_list else 'dict' if row_as_dict else 'object' + ,tuple(rows_in) if rows_tuple else rows_in) + + +if __name__ == "__main__": + unittest.main() + sys.exit(0) + +