Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve inference error checks #206

Merged
merged 3 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion services/inference/tests/test_inference_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
def ts_data_base() -> pd.DataFrame:
# Generate a date range
length = SERIES_LENGTH
date_range = pd.date_range(start="2023-10-01", periods=length, freq="H")
date_range = pd.date_range(start="2023-10-01", periods=length, freq="h")

# Create a DataFrame
df = pd.DataFrame(
Expand Down
81 changes: 78 additions & 3 deletions services/inference/tests/test_inference_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"ttm-1024-96-r1": {"context_length": 1024, "prediction_length": 96},
"ttm-r2": {"context_length": 512, "prediction_length": 96},
"ttm-r2-etth-finetuned": {"context_length": 512, "prediction_length": 96},
"ttm-r2-etth-finetuned-control": {"context_length": 512, "prediction_length": 96},
"ttm-1024-96-r2": {"context_length": 1024, "prediction_length": 96},
"ttm-1536-96-r2": {"context_length": 1536, "prediction_length": 96},
"ibm/test-patchtst": {"context_length": 512, "prediction_length": 96},
Expand Down Expand Up @@ -361,7 +362,7 @@ def test_zero_shot_forecast_inference(ts_data):
assert counts["output_data_points"] == (prediction_length // 4) * len(params["target_columns"][1:])


@pytest.mark.parametrize("ts_data", ["ttm-r2"], indirect=True)
@pytest.mark.parametrize("ts_data", ["ttm-r2-etth-finetuned-control"], indirect=True)
def test_future_data_forecast_inference(ts_data):
test_data, params = ts_data

Expand Down Expand Up @@ -400,6 +401,7 @@ def test_future_data_forecast_inference(ts_data):
"id_columns": params["id_columns"],
"target_columns": target_columns,
"control_columns": [c for c in params["target_columns"] if c not in target_columns],
"freq": "1h",
},
"data": encode_data(test_data_, params["timestamp_column"]),
"future_data": encode_data(future_data, params["timestamp_column"]),
Expand All @@ -409,8 +411,10 @@ def test_future_data_forecast_inference(ts_data):
"Future data should have time series of length that is at least the specified prediction length." in out.text
)

# test multi series, longer future data
test_data_ = test_data.copy()
# test single series, longer future data
test_data_ = test_data[test_data[id_columns[0]] == "a"].copy()
num_ids = 1
# test_data_ = test_data.copy()

target_columns = ["OT"]

Expand All @@ -435,6 +439,7 @@ def test_future_data_forecast_inference(ts_data):
"id_columns": params["id_columns"],
"target_columns": target_columns,
"control_columns": [c for c in params["target_columns"] if c not in target_columns],
"freq": "1h",
},
"data": encode_data(test_data_, params["timestamp_column"]),
"future_data": encode_data(future_data, params["timestamp_column"]),
Expand Down Expand Up @@ -548,6 +553,76 @@ def test_finetuned_model_inference(ts_data):
assert df_out[0].shape[0] == prediction_length


@pytest.mark.parametrize(
"ts_data",
[
"ttm-r2",
],
indirect=True,
)
def test_improper_use_of_zero_shot_model_inference(ts_data):
test_data, params = ts_data
id_columns = params["id_columns"]
model_id = params["model_id"]

# conditional columns for non-conditional model
test_data_ = test_data[test_data[id_columns[0]] == "a"].copy()
encoded_data = encode_data(test_data_, params["timestamp_column"])

msg = {
"model_id": model_id,
"parameters": {
# "prediction_length": params["prediction_length"],
},
"schema": {
"timestamp_column": params["timestamp_column"],
"id_columns": params["id_columns"],
"target_columns": ["OT"],
"freq": "1h",
"conditional_columns": [c for c in params["target_columns"] if c != "OT"],
},
"data": encoded_data,
"future_data": {},
}

out = get_inference_response(msg)
assert (
"Unexpected parameter conditional_columns for a zero-shot model, please confirm you have the correct model_id and schema."
in out.text
)

test_data_ = test_data[test_data[id_columns[0]] == "a"].copy()

future_data = extend_time_series(
select_by_index(test_data_, id_columns=params["id_columns"], start_index=-1),
timestamp_column=params["timestamp_column"],
grouping_columns=params["id_columns"],
total_periods=25,
freq="1h",
)
future_data = future_data.fillna(0)

encoded_data = encode_data(test_data_, params["timestamp_column"])

msg = {
"model_id": model_id,
"parameters": {
# "prediction_length": params["prediction_length"],
},
"schema": {
"timestamp_column": params["timestamp_column"],
"id_columns": params["id_columns"],
"target_columns": ["OT"],
"freq": "1h",
},
"data": encoded_data,
"future_data": encode_data(future_data, params["timestamp_column"]),
}

out = get_inference_response(msg)
assert "Future data was provided, but the model does not support or require future exogenous." in out.text


@pytest.mark.parametrize(
"ts_data",
[
Expand Down
17 changes: 16 additions & 1 deletion services/inference/tsfminference/hf_service_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,21 @@ def _prepare(

preprocessor = self.load_preprocessor(self.model_path)

if self.handler_config.is_finetuned and preprocessor is None:
raise ValueError("Model indicates that it is finetuned but no preprocessor was found.")

if not self.handler_config.is_finetuned and preprocessor is not None:
raise ValueError("Unexpected: model indicates that it is not finetuned but a preprocessor was found.")

if preprocessor is None:
to_check = ["conditional_columns", "control_columns", "observable_columns", "static_categorical_columns"]

for param in to_check:
if param in preprocessor_params and preprocessor_params[param]:
raise ValueError(
f"Unexpected parameter {param} for a zero-shot model, please confirm you have the correct model_id and schema."
)

preprocessor = TimeSeriesPreprocessor(
**preprocessor_params,
scaling=False,
Expand Down Expand Up @@ -280,13 +294,14 @@ def _run(
"""

# warn if future data is not provided, but is needed by the model
# Remember preprocessor.exogenous_channel_indices are the exogenous for which future data is available
if self.preprocessor.exogenous_channel_indices and future_data is None:
raise ValueError(
"Future data should be provided for exogenous columns where the future is known (`control_columns` and `observable_columns`)"
)

if not self.preprocessor.exogenous_channel_indices and future_data is not None:
raise ValueError("Future data future data was provided, but model does not support exogenous")
raise ValueError("Future data was provided, but the model does not support or require future exogenous.")

# future_data checks
if future_data is not None:
Expand Down
1 change: 1 addition & 0 deletions services/inference/tsfminference/tsfm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def __init__(self, **kwargs):
self.minimum_context_length = kwargs.pop("minimum_context_length", 1)
self.maximum_context_length = kwargs.pop("maximum_context_length", None)
self.maximum_prediction_length = kwargs.pop("maximum_prediction_length", None)
self.is_finetuned = kwargs.pop("is_finetuned", False)

# "maximum_prediction_length": 96,
# "minimum_context_length": 512,
Expand Down