Skip to content

Commit

Permalink
Integrate JSONL with recent changes
Browse files Browse the repository at this point in the history
  • Loading branch information
ml-evs committed Oct 9, 2023
1 parent 4d5ea00 commit 10f1331
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 11 deletions.
1 change: 1 addition & 0 deletions src/mc_optimade/examples/direct_from_jsonl/example.jsonl
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{"x-optimade": {"api_version": "1.2.0"}}
{"formats": ["json"], "description": "structures", "properties": {"_mcloudarchive_energy": {"description": "The total energy per atom as computed by DFT", "unit": "eV/atom", "sortable": null, "type": "float"}, "_mcloudarchive_property_b": {"description": "Alias for some more complicated property_b", "unit": null, "sortable": null, "type": "float"}, "_mcloudarchive_structure_description": {"description": "Provides a human-readable description for this particular entry_type", "unit": null, "sortable": null, "type": "string"}}, "output_fields_by_format": {"json": ["_mcloudarchive_energy", "_mcloudarchive_property_b", "_mcloudarchive_structure_description"]}}
{"id": "structures.zip/structures/cifs/cc1a41b1-a841-4818-baf1-a6c1441dc52a.cif", "type": "structures", "links": null, "meta": null, "attributes": {"immutable_id": null, "last_modified": null, "elements": ["B", "Ir", "Mg", "Zn"], "nelements": 4, "elements_ratios": [0.2, 0.5, 0.2, 0.1], "chemical_formula_descriptive": "B4Ir10Mg4Zn2", "chemical_formula_reduced": "B2Ir5Mg2Zn", "chemical_formula_hill": null, "chemical_formula_anonymous": "A5B2C2D", "dimension_types": [1, 1, 1], "nperiodic_dimensions": 3, "lattice_vectors": [[9.4623270052342, 0.0, 0.0], [0.0, 9.4623270052342, 0.0], [0.0, 0.0, 2.9327245575729]], "cartesian_site_positions": [[3.0564738352234, 1.6746896673437002, 0.0], [7.7876373378905, 3.0564738352234, 0.0], [6.4058531699108, 7.7876373378905, 0.0], [1.6746896673437002, 6.4058531699108, 0.0], [4.7311635025671, 4.7311635025671, 0.0], [0.0, 0.0, 0.0], [8.2578546699396, 5.9356358378617, 0.0], [3.5266911673725, 8.2578546699396, 0.0], [1.2044723352946, 3.5266911673725, 0.0], [5.9356358378617, 1.2044723352946, 0.0], [4.0683890403188, 6.7751547390362, 1.4663622787864], [8.7995525428859, 7.418335768765, 1.4663622787864], [2.6871722661979005, 4.0683890403188, 1.4663622787864], [7.418335768765, 0.66277446224826, 1.4663622787864], [5.3939379649153, 2.6871722661979005, 1.4663622787864], [0.66277446224826, 2.0439912363691, 1.4663622787864], [6.7751547390362, 5.3939379649153, 1.4663622787864], [2.0439912363691, 8.7995525428859, 1.4663622787864], [4.7311635025671, 0.0, 1.4663622787864], [0.0, 4.7311635025671, 1.4663622787864]], "nsites": 20, "species": [{"name": "Ir", "chemical_symbols": ["Ir"], "concentration": [1.0], "mass": null, "original_name": null, "attached": null, "nattached": null}, {"name": "Mg", "chemical_symbols": ["Mg"], "concentration": [1.0], "mass": null, "original_name": null, "attached": null, "nattached": null}, {"name": "B", "chemical_symbols": ["B"], "concentration": [1.0], "mass": null, "original_name": null, "attached": null, "nattached": null}, {"name": "Zn", "chemical_symbols": ["Zn"], "concentration": [1.0], "mass": null, "original_name": null, "attached": null, "nattached": null}], "species_at_sites": ["Mg", "Mg", "Mg", "Mg", "Zn", "Zn", "B", "B", "B", "B", "Ir", "Ir", "Ir", "Ir", "Ir", "Ir", "Ir", "Ir", "Ir", "Ir"], "assemblies": null, "structure_features": [], "_mcloudarchive_energy": -0.45, "_mcloudarchive_property_b": 0.86, "_mcloudarchive_structure_description": "describing something else"}, "relationships": null}
{"id": "structures.zip/structures/cifs/991bec7a-b3a8-49af-ba6d-be5afd685cd4.cif", "type": "structures", "links": null, "meta": null, "attributes": {"immutable_id": null, "last_modified": null, "elements": ["C", "Sr"], "nelements": 2, "elements_ratios": [0.5, 0.5], "chemical_formula_descriptive": "CSr", "chemical_formula_reduced": "CSr", "chemical_formula_hill": null, "chemical_formula_anonymous": "AB", "dimension_types": [1, 1, 1], "nperiodic_dimensions": 3, "lattice_vectors": [[4.006498849786306, 0.0, 0.0], [2.0032494248931525, 3.469729784148075, 0.0], [2.0032494248931525, 1.1565765947160247, 3.271292612341386]], "cartesian_site_positions": [[0.0, 0.0, 0.0], [4.006498849786305, 2.31315318943205, 1.635646306170693]], "nsites": 2, "species": [{"name": "C", "chemical_symbols": ["C"], "concentration": [1.0], "mass": null, "original_name": null, "attached": null, "nattached": null}, {"name": "Sr", "chemical_symbols": ["Sr"], "concentration": [1.0], "mass": null, "original_name": null, "attached": null, "nattached": null}], "species_at_sites": ["Sr", "C"], "assemblies": null, "structure_features": [], "_mcloudarchive_energy": -0.55, "_mcloudarchive_property_b": 1.01, "_mcloudarchive_structure_description": NaN}, "relationships": null}
Expand Down
20 changes: 11 additions & 9 deletions src/mc_optimade/mc_optimade/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class ParsedFiles(BaseModel):
description="The path to an archive or file to be unzipped/decompressed."
)

matches: list[str] = Field(
matches: Optional[list[str]] = Field(
description="A list of matches to be used to filter the file contents. Each match can use simple '*' wildcard syntax.",
examples=[["structures/*.cif", "relaxed-structures/1.cif"]],
)
Expand All @@ -78,10 +78,11 @@ class EntryConfig(BaseModel):

@validator("entry_type")
def check_optimade_entry_type(cls, v):
if v not in ("structures", "references") and not v.startswith("_"):
raise ValueError(
f"OPTIMADE entry type must be either 'structures', 'references', or contain a custom prefix, not {v}"
)
if not isinstance(v, JSONLConfig):
if v not in ("structures", "references") and not v.startswith("_"):
raise ValueError(
f"OPTIMADE entry type must be either 'structures', 'references', or contain a custom prefix, not {v}"
)

return v

Expand Down Expand Up @@ -127,10 +128,11 @@ class Config(BaseModel):

@validator("entries")
def check_one_entry_per_type(cls, v):
if len({e.entry_type for e in v}) != len(v):
raise ValueError(
"Each entry type must be listed only once in the config file."
)
if not isinstance(v, JSONLConfig):
if len({e.entry_type for e in v}) != len(v):
raise ValueError(
"Each entry type must be listed only once in the config file."
)
return v

@staticmethod
Expand Down
4 changes: 2 additions & 2 deletions src/mc_optimade/mc_optimade/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ def convert_archive(archive_path: Path) -> Path:
# if the config specifies just a JSON-L, then extract any archives
# and return the JSONL path
if isinstance(mc_config.entries, JSONLConfig):
if mc_config.entries.archive_file is not None:
inflate_archive(Path(archive_path), Path(mc_config.entries.archive_file))
if mc_config.entries.file is not None:
inflate_archive(Path(archive_path), Path(mc_config.entries.file))
return Path(archive_path) / mc_config.entries.jsonl_path

# first, decompress any provided data paths
Expand Down

0 comments on commit 10f1331

Please sign in to comment.