Skip to content

Commit

Permalink
🐛 Partitions must not be sorted in lexicographical order.
Browse files Browse the repository at this point in the history
  • Loading branch information
fbriol committed Jan 10, 2024
1 parent d3f3365 commit b113e30
Showing 1 changed file with 8 additions and 7 deletions.
15 changes: 8 additions & 7 deletions zcollection/partitioning/abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ def list_partitions(
fs: fsspec.AbstractFileSystem,
path: str,
depth: int,
*,
root: bool = True,
) -> Iterator[str]:
"""The number of variables used for partitioning.
Expand Down Expand Up @@ -183,21 +184,21 @@ def list_partitions(
)
# If we're partitioning at top level (example: by year)
if depth == 0:
yield from sorted(folders)
yield from folders
return StopIteration()

for pathname in sorted(folders):
for pathname in folders:
yield from list_partitions(fs,
pathname,
depth=depth - 1,
root=False)
return StopIteration()

if depth == 0:
yield from sorted(fs.ls(path, detail=False))
yield from fs.ls(path, detail=False)
return StopIteration()

for item in sorted(fs.ls(path, detail=False)):
for item in fs.ls(path, detail=False):
yield from list_partitions(fs, item, depth=depth - 1, root=False)
return StopIteration()

Expand Down Expand Up @@ -373,8 +374,7 @@ def parse(self, partition: str) -> tuple[tuple[str, int], ...]:
raise ValueError(
f'Partition is not driven by this instance: {partition}')
groups: tuple[str, ...] = match.groups()
return tuple((groups[ix], int(groups[ix + 1]))
for ix in range(0, len(groups), 2))
return tuple(zip(groups[::2], map(int, groups[1::2])))

@abc.abstractmethod
def encode(
Expand Down Expand Up @@ -424,4 +424,5 @@ def list_partitions(
Yields:
The partitions.
"""
return list_partitions(fs, path, depth=len(self) - 1)
yield from sorted(list_partitions(fs, path, depth=len(self) - 1),
key=self.parse)

0 comments on commit b113e30

Please sign in to comment.