Skip to content

Commit

Permalink
Shuffle after the fact.
Browse files Browse the repository at this point in the history
  • Loading branch information
ohinds committed Aug 30, 2023
1 parent e6f05b1 commit d89aa8d
Showing 1 changed file with 1 addition and 8 deletions.
9 changes: 1 addition & 8 deletions nobrainer/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ def from_tfrecords(
volume_shape=None,
scalar_labels=False,
n_classes=1,
shuffle=False,
num_parallel_calls=1,
):
"""Function to retrieve a saved tf record as a nobrainer Dataset
Expand All @@ -95,7 +94,7 @@ def from_tfrecords(
# Create dataset of all TFRecord files. After this point, the dataset will have
# two value per iteration: (feature, label).
compressed = _is_gzipped(files[0], filesys=fs)
dataset = tf.data.Dataset.list_files(file_pattern, shuffle=shuffle)
dataset = tf.data.Dataset.list_files(file_pattern, shuffle=False)

# Read each of these files as a TFRecordDataset.
# Assume all files have same compression type as the first file.
Expand Down Expand Up @@ -137,10 +136,7 @@ def from_files(
shard_size=300,
num_parallel_calls=1,
eval_size=0.1,
block_shape=None,
label_mapping=None,
n_classes=1,
batch_size=10,
):
"""Create Nobrainer datasets from data
filepaths: List(str), list of paths to individual input data files.
Expand All @@ -155,10 +151,7 @@ def from_files(
num_parallel_calls: int, number of processes to use for multiprocessing. If
None, will use all available processes.
eval_size: float, proportion of the input files to reserve for validation.
block_shape: tuple, split each example into blocks of this size
label_mapping: function, mapping from input labels to training labels
n_classes: int, number of output classes
batch_size: int, number of examples to process simultaneously during training
"""
n_eval = np.ceil(len(filepaths) * eval_size).astype(int)
n_train = len(filepaths) - n_eval
Expand Down

0 comments on commit d89aa8d

Please sign in to comment.