Skip to content

Commit

Permalink
Metadata functionality in EnsembleTableProvider (#1135)
Browse files Browse the repository at this point in the history
  • Loading branch information
lindjoha authored Oct 27, 2022
1 parent 7c35594 commit 59a1e10
Show file tree
Hide file tree
Showing 7 changed files with 82 additions and 2 deletions.
1 change: 1 addition & 0 deletions .github/workflows/subsurface.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ jobs:
pip install "scipy<1.9.3" # breaking change in scipy==1.9.3
pip install "pytest<7.2.0"
pip install "pytest-xdist<3.0"
pip install "xtgeo<2.20.2"
pip install .
# Testing against our latest release (including pre-releases)
Expand Down
26 changes: 26 additions & 0 deletions tests/unit_tests/provider_tests/test_ensemble_table_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pandas as pd

from webviz_subsurface._providers import (
ColumnMetadata,
EnsembleTableProvider,
EnsembleTableProviderFactory,
)
Expand Down Expand Up @@ -58,6 +59,8 @@ def test_synthetic_get_column_data(testdata_folder: Path) -> None:
assert df.shape == (4, 2)
assert df.columns.tolist() == ["REAL", "STR"]

assert model.column_metadata("REAL") is None


def test_create_from_aggregated_csv_file_smry_csv(
testdata_folder: Path, tmp_path: Path
Expand Down Expand Up @@ -85,6 +88,10 @@ def test_create_from_aggregated_csv_file_smry_csv(
assert valdf.columns[1] == "YEARS"
assert valdf["REAL"].nunique() == 3

# No metadata in csv files
meta: Optional[ColumnMetadata] = provider.column_metadata("FOPR")
assert meta is None


def test_create_from_per_realization_csv_file(
testdata_folder: Path, tmp_path: Path
Expand All @@ -110,6 +117,10 @@ def test_create_from_per_realization_csv_file(
assert valdf["CONIDX"].nunique() == 24
assert sorted(valdf["CONIDX"].unique()) == list(range(1, 25))

# No metadata in csv files
meta: Optional[ColumnMetadata] = provider.column_metadata("CONIDX")
assert meta is None


def test_create_from_per_realization_arrow_file(
testdata_folder: Path, tmp_path: Path
Expand All @@ -126,6 +137,11 @@ def test_create_from_per_realization_arrow_file(
assert "FOPT" in valdf.columns
assert valdf["REAL"].nunique() == 100

# Test metadata
meta: Optional[ColumnMetadata] = provider.column_metadata("FOPR")
assert meta is not None
assert meta.unit == "SM3/DAY"


def test_create_from_per_realization_parameter_file(
testdata_folder: Path, tmp_path: Path
Expand All @@ -140,6 +156,12 @@ def test_create_from_per_realization_parameter_file(
assert "GLOBVAR:FAULT_SEAL_SCALING" in valdf.columns
assert valdf["REAL"].nunique() == 100

# No metadata in parameter files
meta: Optional[ColumnMetadata] = provider.column_metadata(
"GLOBVAR:FAULT_SEAL_SCALING"
)
assert meta is None


def test_create_provider_set_from_aggregated_csv_file(tmp_path: Path) -> None:
"""This tests importing a csv file with an ensemble column with multiple
Expand All @@ -165,3 +187,7 @@ def test_create_provider_set_from_aggregated_csv_file(tmp_path: Path) -> None:
"STOIIP_OIL",
"SOURCE",
}.issubset(set(provider.column_names()))

# No metadata in csv files
meta: Optional[ColumnMetadata] = provider.column_metadata("ZONE")
assert meta is None
1 change: 1 addition & 0 deletions webviz_subsurface/_providers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
SurfaceServer,
)
from .ensemble_table_provider import (
ColumnMetadata,
EnsembleTableProvider,
EnsembleTableProviderFactory,
EnsembleTableProviderImplArrow,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .ensemble_table_provider import EnsembleTableProvider
from .ensemble_table_provider import ColumnMetadata, EnsembleTableProvider
from .ensemble_table_provider_factory import EnsembleTableProviderFactory
from .ensemble_table_provider_impl_arrow import EnsembleTableProviderImplArrow
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from typing import Optional

import pyarrow as pa

from .ensemble_table_provider import ColumnMetadata


def create_column_metadata_from_field_meta(
field: pa.Field,
) -> Optional[ColumnMetadata]:
"""Create VectorMetadata from keywords stored in the field's metadata"""

meta_dict = field.metadata
if not meta_dict:
return None

try:
unit_bytestr = meta_dict[b"unit"]
except KeyError:
return ColumnMetadata(unit=None)

return ColumnMetadata(
unit=unit_bytestr.decode("ascii"),
)
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
import abc
from dataclasses import dataclass
from typing import List, Optional, Sequence

import pandas as pd


@dataclass(frozen=True)
class ColumnMetadata:
unit: Optional[str]


class EnsembleTableProvider(abc.ABC):
@abc.abstractmethod
def column_names(self) -> List[str]:
Expand All @@ -18,3 +24,12 @@ def get_column_data(
self, column_names: Sequence[str], realizations: Optional[Sequence[int]] = None
) -> pd.DataFrame:
...

@abc.abstractmethod
def column_metadata(self, column_name: str) -> Optional[ColumnMetadata]:
"""Returns metadata for the specified column.
Returns None if no metadata is found for the column.
Returns a empty ColumnMetadata object if there is metadata, but it's
not the columns specified in ColumnMetadata.
"""
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
add_per_vector_min_max_to_table_schema_metadata,
find_min_max_for_numeric_table_columns,
)
from .ensemble_table_provider import EnsembleTableProvider
from ._field_metadata import create_column_metadata_from_field_meta
from .ensemble_table_provider import ColumnMetadata, EnsembleTableProvider

# Since PyArrow's actual compute functions are not seen by pylint
# pylint: disable=no-member
Expand Down Expand Up @@ -170,6 +171,13 @@ def from_backing_store(

return None

def _get_or_read_schema(self) -> pa.Schema:
if self._cached_reader:
return self._cached_reader.schema

source = pa.memory_map(self._arrow_file_name, "r")
return pa.ipc.RecordBatchFileReader(source).schema

def column_names(self) -> List[str]:
return self._column_names

Expand Down Expand Up @@ -215,3 +223,8 @@ def get_column_data(
)

return df

def column_metadata(self, column_name: str) -> Optional[ColumnMetadata]:
schema = self._get_or_read_schema()
field = schema.field(column_name)
return create_column_metadata_from_field_meta(field)

0 comments on commit 59a1e10

Please sign in to comment.