-
Notifications
You must be signed in to change notification settings - Fork 359
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
chore: remove e2e_slurm_gpu series tests
Note that there are nightly tests decorated with: - @e2e_slurm - skipif(not torch.cuda.is_available()) So we still have some GPU-specific slurm tests at this point. But those tests were not actually running as part of the e2e_slurm_gpu tests anyway. This is part of a larger effort to get rid of our znode tests, which are notoriously unreliable.
- Loading branch information
1 parent
a0cc818
commit ef348df
Showing
13 changed files
with
82 additions
and
200 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
Empty file.
24 changes: 0 additions & 24 deletions
24
e2e_tests/tests/fixtures/pytorch_identity/distributed.yaml
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1 change: 0 additions & 1 deletion
1
harness/tests/experiment/fixtures/pytorch_identity/distributed.yaml
This file was deleted.
Oops, something went wrong.
1 change: 0 additions & 1 deletion
1
harness/tests/experiment/fixtures/pytorch_identity/model_def.py
This file was deleted.
Oops, something went wrong.
79 changes: 79 additions & 0 deletions
79
harness/tests/experiment/fixtures/pytorch_identity/model_def.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
from typing import Any, Dict, Tuple | ||
|
||
import torch.utils.data | ||
|
||
from determined import pytorch | ||
|
||
|
||
class MetricsCallback(pytorch.PyTorchCallback): | ||
def __init__(self): | ||
self.validation_metrics = [] | ||
|
||
def on_validation_end(self, metrics: Dict[str, Any]) -> None: | ||
self.validation_metrics.append(metrics) | ||
|
||
|
||
class IdentityDataset(torch.utils.data.Dataset): | ||
def __init__(self, initial_value: int = 1): | ||
self.initial_value = initial_value | ||
|
||
def __len__(self) -> int: | ||
return 64 | ||
|
||
def __getitem__(self, index: int) -> Tuple: | ||
v = float(self.initial_value + 0.1 * index) | ||
return torch.Tensor([v]), torch.Tensor([v]) | ||
|
||
|
||
class IdentityPyTorchTrial(pytorch.PyTorchTrial): | ||
def __init__(self, context: pytorch.PyTorchTrialContext) -> None: | ||
self.context = context | ||
|
||
model = torch.nn.Linear(1, 1, False) | ||
model.weight.data.fill_(0) | ||
self.model = context.wrap_model(model) | ||
|
||
self.lr = 0.001 | ||
|
||
optimizer = torch.optim.SGD(self.model.parameters(), self.lr) | ||
self.opt = context.wrap_optimizer(optimizer) | ||
|
||
self.loss_fn = torch.nn.MSELoss(reduction="mean") | ||
self.metrics_callback = MetricsCallback() | ||
|
||
def train_batch( | ||
self, batch: pytorch.TorchData, epoch_idx: int, batch_idx: int | ||
) -> Dict[str, torch.Tensor]: | ||
data, label = batch | ||
|
||
loss = self.loss_fn(self.model(data), label) | ||
|
||
self.context.backward(loss) | ||
|
||
self.context.step_optimizer(self.opt) | ||
|
||
return { | ||
"loss": loss, | ||
} | ||
|
||
def evaluate_batch(self, batch: pytorch.TorchData) -> Dict[str, Any]: | ||
data, label = batch | ||
|
||
loss = self.loss_fn(self.model(data), label) | ||
|
||
weight = self.model.weight.data.item() | ||
|
||
return {"val_loss": loss, "weight": weight} | ||
|
||
def build_training_data_loader(self) -> pytorch.DataLoader: | ||
return pytorch.DataLoader( | ||
IdentityDataset(), batch_size=self.context.get_per_slot_batch_size() | ||
) | ||
|
||
def build_validation_data_loader(self) -> pytorch.DataLoader: | ||
return pytorch.DataLoader( | ||
IdentityDataset(20), batch_size=self.context.get_per_slot_batch_size() | ||
) | ||
|
||
def build_callbacks(self) -> Dict[str, pytorch.PyTorchCallback]: | ||
return {"metrics": self.metrics_callback} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters