Skip to content

Commit

Permalink
Merge branch 'master' into eoakes/sync-threadpool
Browse files Browse the repository at this point in the history
  • Loading branch information
edoakes authored Nov 23, 2024
2 parents 12ca092 + 5cd8967 commit a581526
Showing 1 changed file with 38 additions and 1 deletion.
39 changes: 38 additions & 1 deletion release/nightly_tests/dataset/read_tfrecords_benchmark.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import random
import shutil
import tempfile
Expand All @@ -7,11 +8,47 @@
from ray.data.dataset import Dataset

from benchmark import Benchmark
from read_images_benchmark import generate_images
from PIL import Image
import pyarrow as pa
import numpy as np


def generate_images(
num_images: int, sizes: List[Tuple[int, int]], modes: List[str], formats: List[str]
) -> str:
dimensions = []
for mode in modes:
if mode in ["1", "L", "P"]:
dimension = 1
elif mode in ["RGB", "YCbCr", "LAB", "HSV"]:
dimension = 3
elif mode in ["RGBA", "CMYK", "I", "F"]:
dimension = 4
else:
raise ValueError(f"Found unknown image mode: {mode}.")
dimensions.append(dimension)
images_dir = tempfile.mkdtemp()
for image_idx in range(num_images):
size = random.choice(sizes)
file_format = random.choice(formats)
mode_idx = random.randrange(len(modes))
mode = modes[mode_idx]
dimension = dimensions[mode_idx]
width, height = size
file_name = f"{images_dir}/{image_idx}.{file_format}"
pixels_per_dimension = []
for _ in range(dimension):
pixels = os.urandom(width * height)
pixels_per_dimension.append(pixels)
image = Image.new(mode, size)
if len(pixels_per_dimension) == 1:
image.putdata(pixels_per_dimension[0])
else:
image.putdata(list(zip(*pixels_per_dimension)))
image.save(file_name)
return images_dir


def read_tfrecords(path: str) -> Dataset:
return ray.data.read_tfrecords(paths=path).materialize()

Expand Down

0 comments on commit a581526

Please sign in to comment.