Skip to content

Commit

Permalink
Merge pull request #46 from LHMoritz/ldb/fix-storage
Browse files Browse the repository at this point in the history
debug: fixed experiment data handling in experiment manager;
  • Loading branch information
luccadibe authored Dec 19, 2024
2 parents 2502df3 + 50bcf08 commit 87573bb
Show file tree
Hide file tree
Showing 3 changed files with 369 additions and 22 deletions.
3 changes: 2 additions & 1 deletion backend/Dockerfile.test
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@ RUN uv pip install httpx uvicorn[standard] anyio
RUN mkdir -p /mnt/oxn-data/experiments

# Default command runs tests
CMD ["uv", "run", "pytest", "test_main.py", "-v", "--tb=short", "-s"]
#CMD ["uv", "run", "pytest", "test_main.py", "-v", "--tb=short", "-s"]
CMD ["uv", "run", "pytest", "tests/test_experiment_manager.py", "-v", "--tb=short", "-s"]
51 changes: 31 additions & 20 deletions backend/internal/experiment_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,23 +184,26 @@ def release_lock(self):
self.lock_fd.close()
delattr(self, 'lock_fd')

# Needs to get updated: run id of experiment is now in the filename
# See write_experiment_data for implementation
def get_experiment_response_data(self, experiment_id : str, response_name : str , file_ending : str):
def get_experiment_response_data(self,run: int, experiment_id: str, response_name: str, file_ending: str):
'''gets experiments data for a given id and data format, the given file'''
path = Path(self.experiments_dir) / experiment_id /(response_name + "." + file_ending)
logger.info(f"Path: {path}")
logger.info(f"Suffix: {path.suffix}")
if not path.is_file():
raise FileNotFoundError()
data_path = Path(self.experiments_dir) / experiment_id / 'data'

# List all matching files for the given response name and file ending
matching_files = list(data_path.glob(f"{run}_{experiment_id}_{response_name}.{file_ending}"))

if not matching_files:
raise FileNotFoundError(f"No {file_ending} files found for response {response_name}")

if path.suffix == ".json":
return FileResponse(path, media_type="application/json", filename=f"{response_name}{path.suffix}")
elif path.suffix == ".csv":
return FileResponse(path, media_type="text/csv", filename=f"{response_name}{path.suffix}")
# Match the file name to our convention
path = data_path / f"{run}_{experiment_id}_{response_name}.{file_ending}"

if file_ending == "json":
return FileResponse(path, media_type="application/json", filename=f"{run}_{experiment_id}_{response_name}.{file_ending}")
elif file_ending == "csv":
return FileResponse(path, media_type="text/csv", filename=f"{run}_{experiment_id}_{response_name}.{file_ending}")
else:
logger.info("unexpected behavior inside the filesystem")
raise FileNotFoundError("queried for a not specified error")
logger.info("Unexpected file format requested")
raise FileNotFoundError("Queried for an unsupported file format")

def zip_experiment_data(self, experiment_id : str):
'''zips all the data for a given experiment id'''
Expand All @@ -210,7 +213,7 @@ def zip_experiment_data(self, experiment_id : str):

if not data_path.is_dir():
logger.error(f"experiment directory {experiment_id} does not exist")
return None
raise FileNotFoundError(f"experiment directory {experiment_id} does not exist")

with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED, compresslevel=7) as zipf:
for file in data_path.iterdir():
Expand Down Expand Up @@ -238,30 +241,38 @@ def write_experiment_data(self, run: int, experiment_id : str, responses : Dict[
# Then the columns will be all the different fields of the response
# Then write the response data to a csv file
response.data.to_csv(self.experiments_dir / experiment_id / 'data' / f"{run}_{experiment_id}_{response.name}.csv", index=False)
logger.debug(f"wrote {run}_{experiment_id}_{response.name}.csv")
elif format == "json":
if isinstance(response.data, pd.DataFrame):
response.data = response.data.to_dict(orient='records') # type: ignore
# example filename: <run_id>_<experiment_id>_<response_name>.json
# Then write the response data to a json file
with open(self.experiments_dir / experiment_id / 'data' / f"{run}_{experiment_id}_{response.name}.json", "w") as f:
json.dump(response.data, f)

logger.debug(f"wrote {run}_{experiment_id}_{response.name}.json")

def list_experiment_variables(self, experiment_id : str )-> Optional[Tuple[List[str], List[str]]]:
'''list all files (response varibales) in a given experiment folder, returns None if folder does not exist or is empty'''
path = Path(self.experiments_dir ) / experiment_id
path = Path(self.experiments_dir ) / experiment_id / 'data'
if not path.is_dir():
logger.error(f"experiment directory {experiment_id} does not exist")
return None

# List all files in the data directory
files = list(path.iterdir())
if not files:
logger.info(f"empty experiment directory with ID {experiment_id}, no reponse variables found")
return None

variable_names = [file.name.split('.')[0] for file in path.iterdir() if file.is_file()]
file_endings = [file.suffix[1:] for file in path.iterdir() if file.is_file()]
# Extract just the response variable name (after last underscore, before extension)
variable_names = [file.name.split('_')[-1].split('.')[0] for file in files if file.is_file()]
file_endings = [file.suffix[1:] for file in files if file.is_file()]

if not variable_names:
logger.info(f"empty experiment directory with ID {experiment_id}, no reponse variables found")
return None

return variable_names , file_endings
return variable_names, file_endings



Expand Down
Loading

0 comments on commit 87573bb

Please sign in to comment.