Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
ZhangYuanhan-AI committed Jan 3, 2024
1 parent 3146598 commit 8788e67
Show file tree
Hide file tree
Showing 23 changed files with 547 additions and 366 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -166,3 +166,4 @@ checkpoints/
*.txt
pipeline/serve/deploy/otterhd_endpoint.py
pipeline/benchmarks/models/llava_model.py
eval_results/
175 changes: 0 additions & 175 deletions eval_results

This file was deleted.

2 changes: 1 addition & 1 deletion pipeline/accelerate_configs/accelerate_config_ddp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ machine_rank: 0
main_training_function: main
mixed_precision: bf16
num_machines: 1
num_processes: 2
num_processes: 1
rdzv_backend: static
same_network: false
tpu_use_cluster: false
Expand Down
18 changes: 18 additions & 0 deletions pipeline/accelerate_configs/accelerate_config_zero2_pretrain.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
compute_environment: LOCAL_MACHINE
deepspeed_config:
gradient_accumulation_steps: 4
gradient_clipping: 1.0
offload_optimizer_device: none
offload_param_device: none
zero3_init_flag: false
zero_stage: 2
distributed_type: DEEPSPEED
fsdp_config: {}
machine_rank: 0
main_process_ip: null
main_process_port: 29501
main_training_function: main
mixed_precision: bf16
num_machines: 1
num_processes: 8
use_cpu: false
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
compute_environment: LOCAL_MACHINE
deepspeed_config:
deepspeed_multinode_launcher: standard
gradient_accumulation_steps: 4
gradient_accumulation_steps: 1
gradient_clipping: 1.0
offload_optimizer_device: none
offload_param_device: none
Expand Down
5 changes: 3 additions & 2 deletions pipeline/accelerate_configs/accelerate_config_zero3.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
compute_environment: LOCAL_MACHINE
deepspeed_config:
gradient_accumulation_steps: 4
gradient_accumulation_steps: 16
gradient_clipping: 1.0
offload_optimizer_device: none
offload_param_device: none
zero3_init_flag: true
zero3_save_16bit_model: true
stage3_gather_16bit_weights_on_model_save: true
zero_stage: 3
distributed_type: DEEPSPEED
fsdp_config: {}
Expand All @@ -15,5 +16,5 @@ main_process_port: 20333
main_training_function: main
mixed_precision: bf16
num_machines: 1
num_processes: 4
num_processes: 8
use_cpu: false
44 changes: 25 additions & 19 deletions pipeline/benchmarks/datasets/base_eval_dataset.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from abc import ABC, abstractmethod
from PIL import Image
from typing import Dict, List, Any
import base64
import io
import os
import base64
import importlib

AVAILABLE_EVAL_DATASETS: Dict[str, str] = {
Expand All @@ -17,27 +18,24 @@
"mmmu": "MMMUDataset",
}

def get_pil_image(raw_image_data) -> Image.Image:
if isinstance(raw_image_data, Image.Image):
return raw_image_data

elif isinstance(raw_image_data, dict) and "bytes" in raw_image_data:
return Image.open(io.BytesIO(raw_image_data["bytes"]))

elif isinstance(raw_image_data, str): # Assuming this is a base64 encoded string
image_bytes = base64.b64decode(raw_image_data)
return Image.open(io.BytesIO(image_bytes))

else:
raise ValueError("Unsupported image data format")


class BaseEvalDataset(ABC):
def __init__(self, name: str, dataset_path: str, *, max_batch_size: int = 1):
self.name = name
self.dataset_path = dataset_path
self.max_batch_size = max_batch_size

def get_pil_image(self, raw_image_data) -> Image.Image:
if isinstance(raw_image_data, Image.Image):
return raw_image_data.convert("RGB")
elif isinstance(raw_image_data, dict) and "bytes" in raw_image_data:
return Image.open(io.BytesIO(raw_image_data["bytes"])).convert("RGB")
elif isinstance(raw_image_data, str): # Assuming this is a base64 encoded string
image_bytes = base64.b64decode(raw_image_data)
return Image.open(io.BytesIO(image_bytes)).convert("RGB")
else:
raise ValueError("Unsupported image data format")

def evaluate(self, model, **kwargs):
return self._evaluate(model, **kwargs)
# batch = min(model.max_batch_size, self.max_batch_size)
Expand All @@ -50,10 +48,18 @@ def evaluate(self, model, **kwargs):
@abstractmethod
def _evaluate(self, model: str):
pass

# @abstractmethod # TODO: add back after every dataset has been updated
def evaluate_multi_gpu(self, model, model_version, rank, world_size):
pass


def load_dataset(dataset_name: str, dataset_args: Dict[str, str] = {}) -> BaseEvalDataset:
assert dataset_name in AVAILABLE_EVAL_DATASETS, f"{dataset_name} is not an available eval dataset."
def load_dataset(
dataset_name: str,
dataset_args: Dict[str, str] = {},
) -> BaseEvalDataset:
assert (
dataset_name in AVAILABLE_EVAL_DATASETS
), f"{dataset_name} is not an available eval dataset."
module_path = "pipeline.benchmarks.datasets." + dataset_name
dataset_formal_name = AVAILABLE_EVAL_DATASETS[dataset_name]
imported_module = importlib.import_module(module_path)
Expand All @@ -63,4 +69,4 @@ def load_dataset(dataset_name: str, dataset_args: Dict[str, str] = {}) -> BaseEv
# get dataset args without "name"
init_args = dataset_args.copy()
init_args.pop("name")
return dataset_class(**init_args)
return dataset_class(**init_args)
Loading

0 comments on commit 8788e67

Please sign in to comment.