diff --git a/README.md b/README.md
index 8675f481..9ef78dcf 100644
--- a/README.md
+++ b/README.md
@@ -43,9 +43,6 @@ To see all available job commands, run `lm_buddy run --help`
 
 Once LM Buddy is installed in your local Python environment, usage is as follows:
 ```
-# Simple test
-lm_buddy run simple --config simple_config.yaml
-
 # LLM finetuning
 lm_buddy run finetuning --config finetuning_config.yaml
 
diff --git a/examples/configs/evaluation/lm_harness_hf_config.yaml b/examples/configs/evaluation/lm_harness_hf_config.yaml
index 6f3271bf..75b62dff 100644
--- a/examples/configs/evaluation/lm_harness_hf_config.yaml
+++ b/examples/configs/evaluation/lm_harness_hf_config.yaml
@@ -4,7 +4,7 @@ model:
   torch_dtype: "bfloat16"
 
 # Settings specific to lm_harness.evaluate
-evaluator:
+evaluation:
   tasks: ["hellaswag"]
   num_fewshot: 5
   limit: 10
diff --git a/examples/configs/evaluation/lm_harness_inference_server_config.yaml b/examples/configs/evaluation/lm_harness_inference_server_config.yaml
index 7f3ead2d..a0568803 100644
--- a/examples/configs/evaluation/lm_harness_inference_server_config.yaml
+++ b/examples/configs/evaluation/lm_harness_inference_server_config.yaml
@@ -5,7 +5,6 @@ model:
     # HuggingFace repo for the engine model being hosted
     engine:
       repo_id: "distilgpt2"
-      revision: "main"
     # # W&B artifact can also be specified as the engine model to generate a lineage
     # engine:
     #   name: "wandb-artifact-name"
@@ -15,7 +14,7 @@ model:
   tokenizer_backend: "huggingface"
 
 # Settings specific to lm_harness.evaluate
-evaluator:
+evaluation:
   tasks: ["gsm8k"]
   num_fewshot: 5
   limit: 10
diff --git a/examples/configs/evaluation/lm_harness_quantized_config.yaml b/examples/configs/evaluation/lm_harness_quantized_config.yaml
deleted file mode 100644
index f73387a9..00000000
--- a/examples/configs/evaluation/lm_harness_quantized_config.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-# Model to evaluate
-model:
-  load_from: "tiiuae/falcon-7b"
-  torch_dtype: "bfloat16"
-
-# Settings specific to lm_harness.evaluate
-evaluator:
-  tasks: ["hellaswag", "mmlu"]
-  num_fewshot: 5
-  limit: 10
-
-quantization:
-  load_in_4bit: True
-  bnb_4bit_quant_type: "fp4"
-  bnb_4bit_compute_dtype: "bfloat16"
-
-# Tracking info for where to log the run results
-tracking:
-  name: "lm-buddy-lm-harness-inference"
-  project: "lm-buddy-examples"
-  entity: "mozilla-ai"
\ No newline at end of file
diff --git a/examples/configs/simple/simple_config.yaml b/examples/configs/simple/simple_config.yaml
deleted file mode 100644
index 339b64c3..00000000
--- a/examples/configs/simple/simple_config.yaml
+++ /dev/null
@@ -1 +0,0 @@
-magic_number: 42
diff --git a/examples/dev_submission/configs/eval_config.yaml b/examples/dev_submission/configs/eval_config.yaml
index cbb35cd6..dd3ea018 100644
--- a/examples/dev_submission/configs/eval_config.yaml
+++ b/examples/dev_submission/configs/eval_config.yaml
@@ -1,12 +1,12 @@
 # Model to evaluate, specificed as a vLLM endpoint
 model:
-    base_url: "http://your.cluster.ip:8000/v1/"
-    tokenizer: "mistralai/Mistral-7B-v0.1"
-    model_name: "mistralai/Mistral-7B-v0.1"
-    tokenizer_backend: "huggingface"
+  base_url: "http://your.cluster.ip:8000/v1/"
+  tokenizer: "mistralai/Mistral-7B-v0.1"
+  model_name: "mistralai/Mistral-7B-v0.1"
+  tokenizer_backend: "huggingface"
 
 # Settings specific to lm_harness.evaluate
-evaluator:
+evaluation:
   tasks: ["gsm8k"]
   num_fewshot: 5
 
diff --git a/examples/notebooks/dev_ray_submission.ipynb b/examples/notebooks/dev_ray_submission.ipynb
index 9da68b9a..0a9ecce3 100644
--- a/examples/notebooks/dev_ray_submission.ipynb
+++ b/examples/notebooks/dev_ray_submission.ipynb
@@ -95,7 +95,7 @@
     "# pip contains an export of the dependencies for the LM Buddy package (see CONTRIBUTING.md for how to generate)\n",
     "\n",
     "runtime_env = {\n",
-    "    \"working_dir\": f\"{root_dir}/examples/configs/simple\",\n",
+    "    \"working_dir\": f\"{root_dir}/examples/configs/finetuning\",\n",
     "    \"env_vars\": {\"WANDB_API_KEY\": os.environ[\"WANDB_API_KEY\"]},  # If running a job that uses W&B\n",
     "    \"py_modules\": [str(lm_buddy_module)],\n",
     "    \"pip\": \"requirements.txt\",  # See CONTRIBUTING.md for how to generate this\n",
@@ -111,8 +111,8 @@
    "source": [
     "# Submit the job to the Ray cluster\n",
     "# Note: LM Buddy is invoked by 'python -m lm_buddy run ...' since the CLI is not installed in the environment\n",
-    "simple_job = client.submit_job(\n",
-    "    entrypoint=f\"python -m lm_buddy run simple --config simple_config.yaml\",\n",
+    "submission_id = client.submit_job(\n",
+    "    entrypoint=f\"python -m lm_buddy run finetuning --config finetuning_config.yaml\",\n",
     "    runtime_env=runtime_env,\n",
     ")"
    ]
@@ -120,16 +120,16 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "a373b21d1d57778b",
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [],
    "source": [
     "# The client outputs a string with a job ID\n",
     "# Jobs can be interacted with and terminated via client methods\n",
-    "client.stop_job(simple_job)"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "id": "a373b21d1d57778b"
+    "client.stop_job(submission_id)"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -199,18 +199,24 @@
    "source": [
     "import os\n",
     "\n",
-    "from lm_buddy.jobs.simple import SimpleJobConfig\n",
+    "from lm_buddy.jobs.configs import FinetuningJobConfig\n",
+    "\n",
+    "# Parameters for a programatic sweep\n",
+    "learning_rates = [1e-5, 1e-4, 1e-3, 1e-2]\n",
+    "\n",
     "\n",
-    "# Generate job configs programatically for sweeps over parameter ranges\n",
-    "magic_numbers = [0, 10, 20, 40]\n",
+    "# Load a \"base\" config from file with some suitable defaults\n",
+    "base_config = FinetuningJobConfig.from_yaml_file(\n",
+    "    f\"{root_dir}/examples/configs/finetuning/finetuning_config.yaml\"\n",
+    ")\n",
     "\n",
-    "for number in magic_numbers:\n",
-    "    # Instantitate config in your workflow script\n",
-    "    # You may also want to read a \"base\" config from file with some suitable defaults\n",
-    "    config = SimpleJobConfig(magic_number=number)\n",
+    "for lr in learning_rates:\n",
+    "    # Modify based on current iteration lr\n",
+    "    job_config = base_config.model_copy(deep=True)\n",
+    "    job_config.trainer.learning_rate = lr\n",
     "\n",
     "    # `config_path` is the fully qualified path to the config file on your local filesystem\n",
-    "    with config.to_tempfile(name=\"config.yaml\") as config_path:\n",
+    "    with job_config.to_tempfile(name=\"config.yaml\") as config_path:\n",
     "        # `config_path.parent` is the working directory\n",
     "        runtime_env = {\n",
     "            \"working_dir\": str(config_path.parent),\n",
@@ -221,7 +227,7 @@
     "\n",
     "        # `config_path.name` is the file name within the working directory, i.e., \"config.yaml\"\n",
     "        client.submit_job(\n",
-    "            entrypoint=f\"python -m lm_buddy run simple --config {config_path.name}\",\n",
+    "            entrypoint=f\"python -m lm_buddy run finetuning --config {config_path.name}\",\n",
     "            runtime_env=runtime_env,\n",
     "        )"
    ]
diff --git a/examples/notebooks/direct_job_execution.ipynb b/examples/notebooks/direct_job_execution.ipynb
index 077cda2a..b9b34487 100644
--- a/examples/notebooks/direct_job_execution.ipynb
+++ b/examples/notebooks/direct_job_execution.ipynb
@@ -13,8 +13,10 @@
    "source": [
     "This notebook illustrates how to use LM Buddy as a library to run jobs directly on the host machine.\n",
     "\n",
-    "Jobs are fully specified by a `lm_buddy.jobs.configs.LMBuddyJobConfig` \n",
-    "and are executed with the `lm_buddy.run_job` method.\n",
+    "Jobs are executed in the following manner:\n",
+    "- Construct an instance of the `lm_buddy.LMBuddy` class\n",
+    "- Construct an instance of your desired job configuration\n",
+    "- Execute a job via the `LMBuddy.finetune` or `LMBuddy.evaluate` methods\n",
     "\n",
     "**Warning**: This workflow is still considered experimental.\n",
     "Some jobs depend on external services (e.g., W&B, Ray cluster) and host-machine GPU resources,\n",
@@ -34,15 +36,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import lm_buddy\n",
+    "from lm_buddy import LMBuddy\n",
     "from lm_buddy.jobs.configs import (\n",
     "    FinetuningJobConfig,\n",
     "    FinetuningRayConfig,\n",
     "    LMHarnessJobConfig,\n",
-    "    LMHarnessEvaluatorConfig,\n",
+    "    LMHarnessEvaluationConfig,\n",
     ")\n",
     "from lm_buddy.integrations.huggingface import (\n",
-    "    HuggingFaceRepoConfig,\n",
     "    AutoModelConfig,\n",
     "    TextDatasetConfig,\n",
     "    TrainerConfig,\n",
@@ -65,13 +66,11 @@
    "outputs": [],
    "source": [
     "# Base model to finetune from HuggingFace\n",
-    "model_config = AutoModelConfig(\n",
-    "    load_from=HuggingFaceRepoConfig(repo_id=\"distilgpt2\"),\n",
-    ")\n",
+    "model_config = AutoModelConfig(load_from=\"distilgpt2\")\n",
     "\n",
     "# Text dataset for finetuning\n",
     "dataset_config = TextDatasetConfig(\n",
-    "    load_from=HuggingFaceRepoConfig(repo_id=\"imdb\"),\n",
+    "    load_from=\"imdb\",\n",
     "    split=\"train[:100]\",\n",
     "    text_field=\"text\",\n",
     ")\n",
@@ -128,7 +127,8 @@
    "outputs": [],
    "source": [
     "# Run the job\n",
-    "lm_buddy.run_job(finetuning_config)"
+    "buddy = LMBuddy()\n",
+    "buddy.finetune(finetuning_config)"
    ]
   },
   {
@@ -146,12 +146,10 @@
    "source": [
     "# Define the model to be evaluated\n",
     "# In this case, loading directly a pretrained model from HuggingFace\n",
-    "model_config = AutoModelConfig(\n",
-    "    load_from=HuggingFaceRepoConfig(repo_id=\"distilgpt2\"),\n",
-    ")\n",
+    "model_config = AutoModelConfig(load_from=\"distilgpt2\")\n",
     "\n",
     "# Define evaluation tasks and settings\n",
-    "evaluator_config = LMHarnessEvaluatorConfig(\n",
+    "evaluation_config = LMHarnessEvaluationConfig(\n",
     "    tasks=[\"hellaswag\"],\n",
     "    limit=10,  # Only run 10 samples per task. Remove for a real run.\n",
     "    num_fewshot=5,\n",
@@ -167,7 +165,7 @@
     "# Full lm-harness job config\n",
     "lm_harness_config = LMHarnessJobConfig(\n",
     "    model=model_config,\n",
-    "    evaluator=evaluator_config,\n",
+    "    evaluation=evaluation_config,\n",
     "    tracking=tracking_config,\n",
     ")"
    ]
@@ -179,7 +177,8 @@
    "outputs": [],
    "source": [
     "# Run the job\n",
-    "lm_buddy.run_job(lm_harness_config)"
+    "buddy = LMBuddy()\n",
+    "eval_results = buddy.evaluate(lm_harness_config)"
    ]
   }
  ],
diff --git a/src/lm_buddy/__init__.py b/src/lm_buddy/__init__.py
index 0f30f312..d6da68b4 100644
--- a/src/lm_buddy/__init__.py
+++ b/src/lm_buddy/__init__.py
@@ -1,3 +1,3 @@
-from lm_buddy.jobs import run_job
+from lm_buddy.buddy import LMBuddy
 
-__all__ = ["run_job"]
+__all__ = ["LMBuddy"]
diff --git a/src/lm_buddy/buddy.py b/src/lm_buddy/buddy.py
new file mode 100644
index 00000000..97d688b9
--- /dev/null
+++ b/src/lm_buddy/buddy.py
@@ -0,0 +1,37 @@
+from lm_buddy.integrations.wandb import ArtifactLoader, WandbArtifactLoader
+from lm_buddy.jobs._entrypoints import run_finetuning, run_lm_harness, run_prometheus
+from lm_buddy.jobs.common import EvaluationResult, FinetuningResult
+from lm_buddy.jobs.configs import (
+    EvaluationJobConfig,
+    FinetuningJobConfig,
+    LMHarnessJobConfig,
+    PrometheusJobConfig,
+)
+
+
+class LMBuddy:
+    """Your buddy in the (L)LM space.
+
+    Simple wrapper around executable functions for tasks available in the library.
+    """
+
+    def __init__(self, artifact_loader: ArtifactLoader = WandbArtifactLoader()):
+        self._artifact_loader = artifact_loader
+
+    def finetune(self, config: FinetuningJobConfig) -> FinetuningResult:
+        """Run a supervised finetuning task with the provided configuration."""
+        finetuning_result = run_finetuning(config, self._artifact_loader)
+        return finetuning_result
+
+    def evaluate(self, config: EvaluationJobConfig) -> EvaluationResult:
+        """Run an evaluation task with the provided configuration.
+
+        The underlying evaluation framework is determined by the configuration type.
+        """
+        match config:
+            case LMHarnessJobConfig() as lm_harness_config:
+                return run_lm_harness(lm_harness_config, self._artifact_loader)
+            case PrometheusJobConfig() as prometheus_config:
+                return run_prometheus(prometheus_config, self._artifact_loader)
+            case _:
+                raise ValueError(f"Invlid configuration for evaluation: {type(config)}")
diff --git a/src/lm_buddy/cli/run.py b/src/lm_buddy/cli/run.py
index 1f0432c7..b0d2f46b 100644
--- a/src/lm_buddy/cli/run.py
+++ b/src/lm_buddy/cli/run.py
@@ -1,16 +1,12 @@
 import click
 
-import lm_buddy
-from lm_buddy.jobs.configs import (
-    FinetuningJobConfig,
-    LMHarnessJobConfig,
-    PrometheusJobConfig,
-    SimpleJobConfig,
-)
+from lm_buddy import LMBuddy
+from lm_buddy.jobs.configs import FinetuningJobConfig, LMHarnessJobConfig, PrometheusJobConfig
 
-# TODO(RD2024-125): We should probably collapse all these commands into a single CLI command
-# - Need to figure out best way to polymorphically deserialize the job config classes
-# - Do we just add type discriminators at the job config level?
+# TODO(RD2024-125): Collapse the run commands into `lm-buddy finetune` and `lm-buddy evaluate`
+#   to match the methods on the `LMBuddy` class
+
+buddy = LMBuddy()
 
 
 @click.group(name="run", help="Run an LM Buddy job.")
@@ -18,29 +14,22 @@ def group():
     pass
 
 
-@group.command("simple", help="Run the simple test job.")
-@click.option("--config", type=str)
-def run_simple(config: str) -> None:
-    config = SimpleJobConfig.from_yaml_file(config)
-    lm_buddy.run_job(config)
-
-
 @group.command("finetuning", help="Run the HuggingFace LLM finetuning job.")
 @click.option("--config", type=str)
 def run_finetuning(config: str) -> None:
     config = FinetuningJobConfig.from_yaml_file(config)
-    lm_buddy.run_job(config)
+    buddy.finetune(config)
 
 
 @group.command("lm-harness", help="Run the lm-harness evaluation job.")
 @click.option("--config", type=str)
 def run_lm_harness(config: str) -> None:
     config = LMHarnessJobConfig.from_yaml_file(config)
-    lm_buddy.run_job(config)
+    buddy.evaluate(config)
 
 
 @group.command("prometheus", help="Run the prometheus evaluation job.")
 @click.option("--config", type=str)
 def run_prometheus(config: str) -> None:
     config = PrometheusJobConfig.from_yaml_file(config)
-    lm_buddy.run_job(config)
+    buddy.evaluate(config)
diff --git a/src/lm_buddy/cli/schema.py b/src/lm_buddy/cli/schema.py
index 4577ee84..8ff1ecab 100644
--- a/src/lm_buddy/cli/schema.py
+++ b/src/lm_buddy/cli/schema.py
@@ -2,12 +2,7 @@
 
 import click
 
-from lm_buddy.jobs.configs import (
-    FinetuningJobConfig,
-    LMHarnessJobConfig,
-    PrometheusJobConfig,
-    SimpleJobConfig,
-)
+from lm_buddy.jobs.configs import FinetuningJobConfig, LMHarnessJobConfig, PrometheusJobConfig
 
 
 @click.group(name="schema", help="Get a job configuration schema.")
@@ -15,12 +10,6 @@ def group():
     pass
 
 
-@group.command("simple", help="Schema for the simple test job configuration.")
-def schema_simple() -> None:
-    schema = SimpleJobConfig.model_json_schema()
-    click.secho(json.dumps(schema, indent=2))
-
-
 @group.command("finetuning", help="Schema for the finetuning job configuration.")
 def schema_finetuning() -> None:
     schema = FinetuningJobConfig.model_json_schema()
diff --git a/src/lm_buddy/integrations/huggingface/__init__.py b/src/lm_buddy/integrations/huggingface/__init__.py
index c42646b2..aacabf25 100644
--- a/src/lm_buddy/integrations/huggingface/__init__.py
+++ b/src/lm_buddy/integrations/huggingface/__init__.py
@@ -1,5 +1,4 @@
 # ruff: noqa: I001
-from lm_buddy.integrations.huggingface.repo_config import *
 from lm_buddy.integrations.huggingface.adapter_config import *
 from lm_buddy.integrations.huggingface.dataset_config import *
 from lm_buddy.integrations.huggingface.model_config import *
diff --git a/src/lm_buddy/integrations/huggingface/asset_loader.py b/src/lm_buddy/integrations/huggingface/asset_loader.py
index 9635655c..ce5a61ed 100644
--- a/src/lm_buddy/integrations/huggingface/asset_loader.py
+++ b/src/lm_buddy/integrations/huggingface/asset_loader.py
@@ -17,7 +17,6 @@
     AutoModelConfig,
     AutoTokenizerConfig,
     DatasetConfig,
-    HuggingFaceRepoConfig,
     QuantizationConfig,
 )
 from lm_buddy.integrations.wandb import (
@@ -25,9 +24,7 @@
     WandbArtifactConfig,
     get_artifact_filesystem_path,
 )
-
-HuggingFaceAssetPath = HuggingFaceRepoConfig | WandbArtifactConfig
-"""Config that can be resolved to a HuggingFace name/path."""
+from lm_buddy.paths import AssetPath, FilePath, HuggingFaceRepoID
 
 
 def resolve_peft_and_pretrained(path: str) -> tuple[str, str | None]:
@@ -66,23 +63,18 @@ class HuggingFaceAssetLoader:
     def __init__(self, artifact_loader: ArtifactLoader):
         self._artifact_loader = artifact_loader
 
-    def resolve_asset_path(self, path: HuggingFaceAssetPath) -> tuple[str, str | None]:
-        """Resolve the actual HuggingFace name/path from a config.
-
-        Currently, two config types contain references to a loadable HuggingFace path:
-        (1) A `HuggingFaceRepoConfig` that contains the repo path directly
-        (2) A `WandbArtifactConfig` where the filesystem path is resolved from the artifact
-        """
+    def resolve_asset_path(self, path: AssetPath) -> str:
+        """Resolve the actual HuggingFace name/path from a `LoadableAssetPath`."""
         match path:
-            case HuggingFaceRepoConfig(repo_id, revision):
-                load_path, revision = repo_id, revision
+            case FilePath(value):
+                return str(value)
+            case HuggingFaceRepoID(repo_id):
+                return repo_id
             case WandbArtifactConfig() as artifact_config:
                 artifact = self._artifact_loader.use_artifact(artifact_config)
-                load_path = get_artifact_filesystem_path(artifact)
-                revision = None
+                return str(get_artifact_filesystem_path(artifact))
             case unknown_path:
                 raise ValueError(f"Unable to resolve asset path from {unknown_path}.")
-        return str(load_path), revision
 
     def load_pretrained_config(
         self,
@@ -92,10 +84,8 @@ def load_pretrained_config(
 
         An exception is raised if the HuggingFace repo does not contain a `config.json` file.
         """
-        model_path, revision = self.resolve_asset_path(config.load_from)
-        return AutoConfig.from_pretrained(
-            pretrained_model_name_or_path=model_path, revision=revision
-        )
+        config_path = self.resolve_asset_path(config.load_from)
+        return AutoConfig.from_pretrained(pretrained_model_name_or_path=config_path)
 
     def load_pretrained_model(
         self,
@@ -122,10 +112,9 @@ def load_pretrained_model(
 
         # TODO: HuggingFace has many AutoModel classes with different "language model heads"
         #   Can we abstract this to load with any type of AutoModel class?
-        model_path, revision = self.resolve_asset_path(config.load_from)
+        model_path = self.resolve_asset_path(config.load_from)
         return AutoModelForCausalLM.from_pretrained(
             pretrained_model_name_or_path=model_path,
-            revision=revision,
             trust_remote_code=config.trust_remote_code,
             torch_dtype=config.torch_dtype,
             quantization_config=bnb_config,
@@ -137,10 +126,9 @@ def load_pretrained_tokenizer(self, config: AutoTokenizerConfig) -> PreTrainedTo
 
         An exception is raised if the HuggingFace repo does not contain a `tokenizer.json` file.
         """
-        tokenizer_path, revision = self.resolve_asset_path(config.load_from)
+        tokenizer_path = self.resolve_asset_path(config.load_from)
         tokenizer = AutoTokenizer.from_pretrained(
             pretrained_model_name_or_path=tokenizer_path,
-            revision=revision,
             trust_remote_code=config.trust_remote_code,
             use_fast=config.use_fast,
         )
@@ -156,10 +144,10 @@ def load_dataset(self, config: DatasetConfig) -> Dataset:
         When loading from HuggingFace directly, the `Dataset` is for the provided split.
         When loading from disk, the saved files must be for a dataset else an exception is raised.
         """
-        dataset_path, revision = self.resolve_asset_path(config.load_from)
+        dataset_path = self.resolve_asset_path(config.load_from)
         # Dataset loading requires a different method if from a HF vs. disk
-        if isinstance(config.load_from, HuggingFaceRepoConfig):
-            return load_dataset(dataset_path, revision=revision, split=config.split)
+        if isinstance(config.load_from, HuggingFaceRepoID):
+            return load_dataset(dataset_path, split=config.split)
         else:
             match load_from_disk(dataset_path):
                 case Dataset() as dataset:
diff --git a/src/lm_buddy/integrations/huggingface/dataset_config.py b/src/lm_buddy/integrations/huggingface/dataset_config.py
index 5ff398c7..b74682b8 100644
--- a/src/lm_buddy/integrations/huggingface/dataset_config.py
+++ b/src/lm_buddy/integrations/huggingface/dataset_config.py
@@ -1,7 +1,6 @@
-from pydantic import field_validator, model_validator
+from pydantic import model_validator
 
-from lm_buddy.integrations.huggingface import HuggingFaceRepoConfig, convert_string_to_repo_config
-from lm_buddy.integrations.wandb import WandbArtifactConfig
+from lm_buddy.paths import AssetPath, HuggingFaceRepoID
 from lm_buddy.types import BaseLMBuddyConfig
 
 DEFAULT_TEXT_FIELD: str = "text"
@@ -10,15 +9,11 @@
 class DatasetConfig(BaseLMBuddyConfig):
     """Base configuration to load a HuggingFace dataset."""
 
-    load_from: HuggingFaceRepoConfig | WandbArtifactConfig
+    load_from: AssetPath
     split: str | None = None
     test_size: float | None = None
     seed: int | None = None
 
-    _validate_load_from_string = field_validator("load_from", mode="before")(
-        convert_string_to_repo_config
-    )
-
     @model_validator(mode="after")
     def validate_split_if_huggingface_repo(cls, config: "DatasetConfig"):
         """
@@ -26,9 +21,7 @@ def validate_split_if_huggingface_repo(cls, config: "DatasetConfig"):
         This makes it such that the `load_dataset` function returns the type `Dataset`
         instead of `DatasetDict`, which makes some of the downstream logic easier.
         """
-        load_from = config.load_from
-        split = config.split
-        if split is None and isinstance(load_from, HuggingFaceRepoConfig):
+        if config.split is None and isinstance(config.load_from, HuggingFaceRepoID):
             raise ValueError(
                 "A `split` must be specified when loading a dataset directly from HuggingFace."
             )
diff --git a/src/lm_buddy/integrations/huggingface/model_config.py b/src/lm_buddy/integrations/huggingface/model_config.py
index 663d70de..41c8cc5d 100644
--- a/src/lm_buddy/integrations/huggingface/model_config.py
+++ b/src/lm_buddy/integrations/huggingface/model_config.py
@@ -1,7 +1,4 @@
-from pydantic import field_validator
-
-from lm_buddy.integrations.huggingface import HuggingFaceRepoConfig, convert_string_to_repo_config
-from lm_buddy.integrations.wandb import WandbArtifactConfig
+from lm_buddy.paths import AssetPath
 from lm_buddy.types import BaseLMBuddyConfig, SerializableTorchDtype
 
 
@@ -11,10 +8,6 @@ class AutoModelConfig(BaseLMBuddyConfig):
     The model to load can either be a HuggingFace repo or an artifact reference on W&B.
     """
 
-    load_from: HuggingFaceRepoConfig | WandbArtifactConfig
+    load_from: AssetPath
     trust_remote_code: bool = False
     torch_dtype: SerializableTorchDtype | None = None
-
-    _validate_load_from_string = field_validator("load_from", mode="before")(
-        convert_string_to_repo_config
-    )
diff --git a/src/lm_buddy/integrations/huggingface/repo_config.py b/src/lm_buddy/integrations/huggingface/repo_config.py
deleted file mode 100644
index 81549c70..00000000
--- a/src/lm_buddy/integrations/huggingface/repo_config.py
+++ /dev/null
@@ -1,42 +0,0 @@
-from typing import Any
-
-from huggingface_hub.utils import HFValidationError, validate_repo_id
-from pydantic import field_validator
-
-from lm_buddy.types import BaseLMBuddyConfig
-
-
-def convert_string_to_repo_config(x: Any):
-    if isinstance(x, str):
-        return HuggingFaceRepoConfig(repo_id=x)
-    return x
-
-
-def is_valid_huggingface_repo_id(s: str):
-    """
-    Simple test to check if an HF model is valid using HuggingFace's tools.
-    Sadly, theirs throws an exception and has no return.
-
-    Args:
-        s: string to test.
-    """
-    try:
-        validate_repo_id(s)
-        return True
-    except HFValidationError:
-        return False
-
-
-class HuggingFaceRepoConfig(BaseLMBuddyConfig):
-    """Configuration for a HuggingFace Hub repository."""
-
-    __match_args__ = ("repo_id", "revision")
-
-    repo_id: str
-    revision: str | None = None
-
-    @field_validator("repo_id", mode="after")
-    def validate_repo_id(cls, x: str):
-        if not is_valid_huggingface_repo_id(x):
-            raise ValueError(f"{x} is not a valid HuggingFace repo ID.")
-        return x
diff --git a/src/lm_buddy/integrations/huggingface/tokenizer_config.py b/src/lm_buddy/integrations/huggingface/tokenizer_config.py
index 157f44f5..8fa7d975 100644
--- a/src/lm_buddy/integrations/huggingface/tokenizer_config.py
+++ b/src/lm_buddy/integrations/huggingface/tokenizer_config.py
@@ -1,17 +1,10 @@
-from pydantic import field_validator
-
-from lm_buddy.integrations.huggingface import HuggingFaceRepoConfig, convert_string_to_repo_config
-from lm_buddy.integrations.wandb import WandbArtifactConfig
+from lm_buddy.paths import AssetPath
 from lm_buddy.types import BaseLMBuddyConfig
 
 
 class AutoTokenizerConfig(BaseLMBuddyConfig):
     """Settings passed to a HuggingFace AutoTokenizer instantiation."""
 
-    load_from: HuggingFaceRepoConfig | WandbArtifactConfig
+    load_from: AssetPath
     trust_remote_code: bool | None = None
     use_fast: bool | None = None
-
-    _validate_load_from_string = field_validator("load_from", mode="before")(
-        convert_string_to_repo_config
-    )
diff --git a/src/lm_buddy/integrations/vllm.py b/src/lm_buddy/integrations/vllm.py
index af99ccf6..74621ebc 100644
--- a/src/lm_buddy/integrations/vllm.py
+++ b/src/lm_buddy/integrations/vllm.py
@@ -1,4 +1,4 @@
-from lm_buddy.integrations.huggingface import HuggingFaceAssetPath
+from lm_buddy.paths import AssetPath
 from lm_buddy.types import BaseLMBuddyConfig
 
 
@@ -16,7 +16,7 @@ class InferenceServerConfig(BaseLMBuddyConfig):
     """
 
     base_url: str
-    engine: str | HuggingFaceAssetPath | None = None
+    engine: str | AssetPath | None = None
 
 
 class VLLMCompletionsConfig(BaseLMBuddyConfig):
diff --git a/src/lm_buddy/integrations/wandb/artifact_config.py b/src/lm_buddy/integrations/wandb/artifact_config.py
index 50106330..c5dad098 100644
--- a/src/lm_buddy/integrations/wandb/artifact_config.py
+++ b/src/lm_buddy/integrations/wandb/artifact_config.py
@@ -8,8 +8,8 @@ class WandbArtifactConfig(BaseLMBuddyConfig):
 
     name: str
     project: str
-    version: str = "latest"
     entity: str | None = None
+    version: str = "latest"
 
     @classmethod
     def from_wandb_path(cls, path: str) -> "WandbArtifactConfig":
diff --git a/src/lm_buddy/integrations/wandb/artifact_utils.py b/src/lm_buddy/integrations/wandb/artifact_utils.py
index 995ba869..0def0eb1 100644
--- a/src/lm_buddy/integrations/wandb/artifact_utils.py
+++ b/src/lm_buddy/integrations/wandb/artifact_utils.py
@@ -1,8 +1,8 @@
 from enum import Enum
 from pathlib import Path
-from typing import Any
 from urllib.parse import ParseResult, urlparse
 
+import pandas as pd
 import wandb
 
 
@@ -91,57 +91,21 @@ def build_directory_artifact(
 def build_table_artifact(
     artifact_name: str,
     artifact_type: ArtifactType,
-    columns: list[str],
-    tables: dict[str, list[list[Any]]],
+    tables: dict[str, pd.DataFrame],
 ) -> wandb.Artifact:
     """Build an artifact containing one or more table entries.
 
     Args:
         artifact_name (str): Name of the artifact.
         artifact_type (ArtifactType): Type of artifact.
-        columns (list[str]): Column names for the tables.
-        tables (dict[str, list[list[Any]]]): Mapping from table name to table rows.
+        tables (dict[str, pd.DataFrame]): Mapping from table name to table data
+            in the form of a `pd.DataFrame` object.
 
     Returns:
         wandb.Artifact: The artifact containing the table(s).
     """
     artifact = wandb.Artifact(artifact_name, type=artifact_type)
     for table_name, table_data in tables.items():
-        table = wandb.Table(data=table_data, columns=columns)
+        table = wandb.Table(data=table_data)
         artifact.add(table, name=table_name)
     return artifact
-
-
-def build_file_artifact(
-    artifact_name: str,
-    artifact_type: ArtifactType,
-    file_path: str | Path,
-    *,
-    reference: bool = False,
-    entry_name: str | None = None,
-) -> wandb.Artifact:
-    """Build an artifact containing a single file
-
-    Args:
-        artifact_name (str): Name of the artifact
-        artifact_type (ArtifactType): Type of artifact
-        file_path (str | Path): The full path (including filename) of the file
-
-    Keyword Args:
-        reference (bool): Only reference the file, do not copy contents. Defaults to False.
-        entry_name (str | None): Name for the file within the artifact. If None, defaults
-                                 to the original filename.
-
-    Returns:
-        wandb.Artifact: The generated artifact.
-    """
-    artifact = wandb.Artifact(name=artifact_name, type=artifact_type)
-
-    if reference:
-        artifact.add_reference(
-            uri=f"{ArtifactURIScheme.FILE}://{file_path}",
-            name=entry_name,
-        )
-    else:
-        artifact.add_file(str(file_path), name=entry_name)
-    return artifact
diff --git a/src/lm_buddy/jobs/__init__.py b/src/lm_buddy/jobs/__init__.py
index 41e57911..e69de29b 100644
--- a/src/lm_buddy/jobs/__init__.py
+++ b/src/lm_buddy/jobs/__init__.py
@@ -1,38 +0,0 @@
-from lm_buddy.integrations.wandb import ArtifactLoader, WandbArtifactLoader
-from lm_buddy.jobs._entrypoints import (
-    run_finetuning,
-    run_lm_harness,
-    run_prometheus,
-    run_simple,
-)
-from lm_buddy.jobs.configs import (
-    FinetuningJobConfig,
-    LMBuddyJobConfig,
-    LMHarnessJobConfig,
-    PrometheusJobConfig,
-    SimpleJobConfig,
-)
-
-
-def run_job(
-    config: LMBuddyJobConfig,
-    artifact_loader: ArtifactLoader = WandbArtifactLoader(),
-) -> None:
-    """Run an LM Buddy job from the configuration.
-
-    Args:
-        config (LMBuddyJobConfig): Configuration defining the job to run.
-        artifact_loader (ArtifactLoader): Implementation of the artifact loader protocol.
-            Defaults to WandbArtifactLoader().
-    """
-    match config:
-        case SimpleJobConfig() as simple_config:
-            run_simple(simple_config)
-        case FinetuningJobConfig() as finetuning_config:
-            run_finetuning(finetuning_config, artifact_loader)
-        case LMHarnessJobConfig() as lm_harness_config:
-            run_lm_harness(lm_harness_config, artifact_loader)
-        case PrometheusJobConfig() as prometheus_config:
-            run_prometheus(prometheus_config, artifact_loader)
-        case _:
-            raise ValueError(f"Received invalid job configuration: {config}")
diff --git a/src/lm_buddy/jobs/_entrypoints/__init__.py b/src/lm_buddy/jobs/_entrypoints/__init__.py
index 26de4304..a5f3145a 100644
--- a/src/lm_buddy/jobs/_entrypoints/__init__.py
+++ b/src/lm_buddy/jobs/_entrypoints/__init__.py
@@ -1,6 +1,5 @@
 from lm_buddy.jobs._entrypoints.finetuning import run_finetuning
 from lm_buddy.jobs._entrypoints.lm_harness import run_lm_harness
 from lm_buddy.jobs._entrypoints.prometheus import run_prometheus
-from lm_buddy.jobs._entrypoints.simple import run_simple
 
-__all__ = ["run_finetuning", "run_lm_harness", "run_prometheus", "run_simple"]
+__all__ = ["run_finetuning", "run_lm_harness", "run_prometheus"]
diff --git a/src/lm_buddy/jobs/_entrypoints/finetuning.py b/src/lm_buddy/jobs/_entrypoints/finetuning.py
index 31092410..39d93438 100644
--- a/src/lm_buddy/jobs/_entrypoints/finetuning.py
+++ b/src/lm_buddy/jobs/_entrypoints/finetuning.py
@@ -1,3 +1,4 @@
+from pathlib import Path
 from typing import Any
 
 import ray
@@ -12,12 +13,13 @@
 from lm_buddy.integrations.wandb import (
     ArtifactLoader,
     ArtifactType,
+    WandbArtifactConfig,
     WandbResumeMode,
     build_directory_artifact,
     default_artifact_name,
     wandb_init_from_config,
 )
-from lm_buddy.jobs.common import LMBuddyJobType
+from lm_buddy.jobs.common import FinetuningResult, LMBuddyJobType
 from lm_buddy.jobs.configs import FinetuningJobConfig
 
 
@@ -62,7 +64,10 @@ def load_and_train(config: FinetuningJobConfig, artifact_loader: ArtifactLoader)
     trainer.train()
 
 
-def run_finetuning(config: FinetuningJobConfig, artifact_loader: ArtifactLoader):
+def run_finetuning(
+    config: FinetuningJobConfig,
+    artifact_loader: ArtifactLoader,
+) -> FinetuningResult:
     # Place the artifact loader in Ray object store
     artifact_loader_ref = ray.put(artifact_loader)
 
@@ -101,14 +106,31 @@ def training_function(config_data: dict[str, Any]):
     print(f"Training result: {result}")
 
     # Register a model artifact if tracking is enabled and Ray saved a checkpoint
-    if config.tracking and result.checkpoint:
-        # Must resume from the just-completed training run
-        with wandb_init_from_config(config.tracking, resume=WandbResumeMode.MUST) as run:
-            model_artifact = build_directory_artifact(
-                artifact_name=default_artifact_name(run.name, ArtifactType.MODEL),
-                artifact_type=ArtifactType.MODEL,
-                dir_path=f"{result.checkpoint.path}/{RayTrainReportCallback.CHECKPOINT_NAME}",
-                reference=True,
-            )
-            print("Logging artifact for model checkpoint...")
-            artifact_loader.log_artifact(model_artifact)
+    ckpt_path, artifact_config = None, None
+    if result.checkpoint:
+        ckpt_path = Path(f"{result.checkpoint.path}/{RayTrainReportCallback.CHECKPOINT_NAME}")
+        if config.tracking:
+            # Must resume from the just-completed training run
+            with wandb_init_from_config(config.tracking, resume=WandbResumeMode.MUST) as run:
+                model_artifact = build_directory_artifact(
+                    artifact_name=default_artifact_name(run.name, ArtifactType.MODEL),
+                    artifact_type=ArtifactType.MODEL,
+                    dir_path=ckpt_path,
+                    reference=True,
+                )
+                print("Logging artifact for model checkpoint...")
+                artifact_loader.log_artifact(model_artifact)
+                # Create an artifact config referencing the new artifact
+                artifact_config = WandbArtifactConfig(
+                    name=model_artifact.name,
+                    project=run.project,
+                    entity=run.entity,
+                )
+
+    # Return finetuning result object
+    return FinetuningResult(
+        checkpoint_path=ckpt_path,
+        checkpoint_artifact=artifact_config,
+        metrics=result.metrics or {},
+        is_adapter=config.adapter is not None,
+    )
diff --git a/src/lm_buddy/jobs/_entrypoints/lm_harness.py b/src/lm_buddy/jobs/_entrypoints/lm_harness.py
index d5af1ace..87034284 100644
--- a/src/lm_buddy/jobs/_entrypoints/lm_harness.py
+++ b/src/lm_buddy/jobs/_entrypoints/lm_harness.py
@@ -1,6 +1,7 @@
 from typing import Any
 
 import lm_eval
+import pandas as pd
 import torch
 from lm_eval.models.huggingface import HFLM
 from lm_eval.models.openai_completions import OpenaiCompletionsLM
@@ -8,36 +9,37 @@
 from lm_buddy.integrations.huggingface import (
     AutoModelConfig,
     HuggingFaceAssetLoader,
-    HuggingFaceAssetPath,
     resolve_peft_and_pretrained,
 )
 from lm_buddy.integrations.wandb import (
     ArtifactLoader,
     ArtifactType,
+    WandbArtifactConfig,
     WandbResumeMode,
     build_table_artifact,
     default_artifact_name,
     wandb_init_from_config,
 )
-from lm_buddy.jobs.common import LMBuddyJobType
+from lm_buddy.jobs.common import EvaluationResult, LMBuddyJobType
 from lm_buddy.jobs.configs import LMHarnessJobConfig, LocalChatCompletionsConfig
+from lm_buddy.paths import AssetPath
 
 
-def get_numeric_metrics(
+def get_per_task_dataframes(
     results: dict[str, dict[str, Any]],
-) -> dict[str, list[tuple[str, float]]]:
-    """Filter non-numeric values from the evaluation results.
+) -> dict[str, pd.DataFrame]:
+    """Create a `pd.DataFrame` of numeric metrics for each evaluation task.
 
     This is necessary because artifact tables must have a single datatype for each column.
 
     lm-harness returns mostly numeric values, but there are also some misc string-valued metrics.
     Filtering down to only numeric values allows us to produce a valid table artifact.
     """
-    numeric_results = {}
-    for key, data in results.items():
+    task_dataframes = {}
+    for task_name, data in results.items():
         numeric_rows = [(k, v) for k, v in data.items() if isinstance(v, int | float)]
-        numeric_results[key] = numeric_rows
-    return numeric_results
+        task_dataframes[task_name] = pd.DataFrame(data=numeric_rows, columns=["metric", "value"])
+    return task_dataframes
 
 
 def load_harness_model(
@@ -48,7 +50,7 @@ def load_harness_model(
     hf_loader = HuggingFaceAssetLoader(artifact_loader)
     match config.model:
         case AutoModelConfig() as model_config:
-            model_path, revision = hf_loader.resolve_asset_path(model_config.load_from)
+            model_path = hf_loader.resolve_asset_path(model_config.load_from)
             model_path, peft_path = resolve_peft_and_pretrained(model_path)
             quantization_kwargs: dict[str, Any] = (
                 config.quantization.model_dump() if config.quantization else {}
@@ -58,7 +60,6 @@ def load_harness_model(
                 pretrained=model_path,
                 tokenizer=model_path,
                 peft=peft_path,
-                revision=revision if revision else "main",
                 device="cuda" if torch.cuda.device_count() > 0 else "cpu",
                 trust_remote_code=config.model.trust_remote_code,
                 dtype=config.model.torch_dtype if config.model.torch_dtype else "auto",
@@ -67,8 +68,8 @@ def load_harness_model(
 
         case LocalChatCompletionsConfig() as local_config:
             model = local_config.inference.engine
-            if isinstance(model, HuggingFaceAssetPath):
-                model, _ = hf_loader.resolve_asset_path(model)
+            if isinstance(model, AssetPath):
+                model = hf_loader.resolve_asset_path(model)
             # If tokenizer is not provided, it is set to the value of model internally
             return OpenaiCompletionsLM(
                 model=model,
@@ -82,43 +83,57 @@ def load_harness_model(
             raise ValueError(f"Unexpected model config type: {type(config.model)}")
 
 
-def load_and_evaluate(
+def run_eval(
     config: LMHarnessJobConfig,
     artifact_loader: ArtifactLoader,
 ) -> dict[str, list[tuple[str, float]]]:
-
     llm = load_harness_model(config, artifact_loader)
     eval_results = lm_eval.simple_evaluate(
         model=llm,
-        tasks=config.evaluator.tasks,
-        batch_size=config.evaluator.batch_size,
-        num_fewshot=config.evaluator.num_fewshot,
-        limit=config.evaluator.limit,
+        tasks=config.evaluation.tasks,
+        batch_size=config.evaluation.batch_size,
+        num_fewshot=config.evaluation.num_fewshot,
+        limit=config.evaluation.limit,
         log_samples=False,
     )
-    eval_results = get_numeric_metrics(eval_results["results"])
     print(f"Obtained evaluation results: {eval_results}")
-    return eval_results
+    return get_per_task_dataframes(eval_results["results"])
 
 
-def run_lm_harness(config: LMHarnessJobConfig, artifact_loader: ArtifactLoader):
-    print(f"Received job configuration:\n {config.model_dump_json(indent=2)}")
+def run_lm_harness(
+    config: LMHarnessJobConfig,
+    artifact_loader: ArtifactLoader,
+) -> EvaluationResult:
+    print(f"Running lm-harness evaluation with configuration:\n {config.model_dump_json(indent=2)}")
 
     if config.tracking is not None:
         with wandb_init_from_config(
             config.tracking,
-            parameters=config.evaluator,  # Log eval settings in W&B run
+            parameters=config.evaluation,  # Log eval settings in W&B run
             resume=WandbResumeMode.ALLOW,
             job_type=LMBuddyJobType.EVALUATION,
         ) as run:
-            eval_results = load_and_evaluate(config, artifact_loader)
-            eval_artifact = build_table_artifact(
+            eval_tables = run_eval(config, artifact_loader)
+            table_artifact = build_table_artifact(
                 artifact_name=default_artifact_name(run.name, ArtifactType.EVALUATION),
                 artifact_type=ArtifactType.EVALUATION,
-                columns=["metric", "value"],
-                tables=eval_results,
+                tables=eval_tables,
             )
             print("Logging artifact for evaluation results...")
-            artifact_loader.log_artifact(eval_artifact)
+            artifact_loader.log_artifact(table_artifact)
+            # Create an artifact config to reference the new table artifact
+            table_artifact_config = WandbArtifactConfig(
+                name=table_artifact.name,
+                project=run.project,
+                entity=run.entity,
+            )
     else:
-        load_and_evaluate(config, artifact_loader)
+        eval_tables = run_eval(config, artifact_loader)
+        table_artifact_config = None
+
+    return EvaluationResult(
+        tables=eval_tables,
+        table_artifact=table_artifact_config,
+        dataset_path=None,
+        dataset_artifact=None,
+    )
diff --git a/src/lm_buddy/jobs/_entrypoints/prometheus.py b/src/lm_buddy/jobs/_entrypoints/prometheus.py
index 75ca71ba..8146f4ed 100644
--- a/src/lm_buddy/jobs/_entrypoints/prometheus.py
+++ b/src/lm_buddy/jobs/_entrypoints/prometheus.py
@@ -13,15 +13,16 @@
 from openai import Completion, OpenAI, OpenAIError
 from tqdm import tqdm
 
-from lm_buddy.integrations.huggingface import HuggingFaceAssetLoader
-from lm_buddy.integrations.huggingface.tokenizer_config import AutoTokenizerConfig
+from lm_buddy.integrations.huggingface import AutoTokenizerConfig, HuggingFaceAssetLoader
 from lm_buddy.integrations.wandb import (
     ArtifactLoader,
     ArtifactType,
+    WandbArtifactConfig,
     build_directory_artifact,
+    default_artifact_name,
     wandb_init_from_config,
 )
-from lm_buddy.jobs.common import LMBuddyJobType
+from lm_buddy.jobs.common import EvaluationResult, LMBuddyJobType
 from lm_buddy.jobs.configs import PrometheusJobConfig
 
 
@@ -107,7 +108,7 @@ def run_eval(
     config: PrometheusJobConfig,
     artifact_loader: ArtifactLoader,
     client: OpenAI,
-) -> str:
+) -> Path:
     # load dataset from W&B artifact
     hf_loader = HuggingFaceAssetLoader(artifact_loader)
     data = hf_loader.load_dataset(config.dataset)
@@ -138,7 +139,7 @@ def run_eval(
             result["prometheus_output"] = []
             result["prometheus_score"] = []
 
-            for idx in range(config.evaluation.num_answers):
+            for _ in range(config.evaluation.num_answers):
                 (feedback, score) = get_response_with_retries(
                     config, client, prompt, config.evaluation.max_retries
                 )
@@ -149,32 +150,50 @@ def run_eval(
             file.write(json.dumps(result) + "\n")
 
     # convert plain json dataset in HF format
-    output_hf_name = str(Path(config.evaluation.output_folder) / "hf" / tracking_name)
+    output_dataset_path = Path(config.evaluation.output_folder) / "hf" / tracking_name
     ds = load_dataset("json", data_files=str(output_fname), split="train")
-    ds.save_to_disk(output_hf_name)
+    ds.save_to_disk(output_dataset_path)
 
-    return str(output_hf_name)
+    return output_dataset_path
 
 
-def run_prometheus(config: PrometheusJobConfig, artifact_loader: ArtifactLoader):
-    # instantiate OpenAI client to speak with the vLLM endpoint
+def run_prometheus(
+    config: PrometheusJobConfig,
+    artifact_loader: ArtifactLoader,
+) -> EvaluationResult:
+    # Instantiate OpenAI client to speak with the vLLM endpoint
     client = OpenAI(base_url=config.prometheus.inference.base_url)
 
-    # Register a dataset file artifact if tracking is enabled
+    # Run eval and store output in local filename
     if config.tracking:
-        with wandb_init_from_config(config.tracking, job_type=LMBuddyJobType.EVALUATION):
-            # run eval and store output in local filename
-            output_dataset_name = run_eval(config, artifact_loader, client)
-
-            # store HF dataset as a directory artifact
-            artifact = build_directory_artifact(
-                dir_path=output_dataset_name,
-                artifact_name=config.tracking.name,
+        with wandb_init_from_config(config.tracking, job_type=LMBuddyJobType.EVALUATION) as run:
+            output_dataset_path = run_eval(config, artifact_loader, client)
+
+            # Create a directory artifact for the HF dataset
+            dataset_artifact = build_directory_artifact(
+                dir_path=output_dataset_path,
+                artifact_name=default_artifact_name(run.name, artifact_type=ArtifactType.DATASET),
                 artifact_type=ArtifactType.DATASET,
                 reference=False,
             )
-            print("Logging artifact for evaluation results...")
-            artifact_loader.log_artifact(artifact)
+
+            print("Logging artifact for evaluation dataset...")
+            artifact_loader.log_artifact(dataset_artifact)
+
+            # Create a config referencing the new artifact
+            dataset_artifact_config = WandbArtifactConfig(
+                name=dataset_artifact.name,
+                project=run.project,
+                entity=run.entity,
+            )
     else:
-        output_dataset_name = run_eval(config, artifact_loader, client)
-        print(f"Evaluation results stored in {output_dataset_name}")
+        output_dataset_path = run_eval(config, artifact_loader, client)
+        dataset_artifact_config = None
+
+    print(f"Evaluation dataset stored at {output_dataset_path}")
+    return EvaluationResult(
+        tables={},
+        table_artifact=None,
+        dataset_artifact=dataset_artifact_config,
+        dataset_path=output_dataset_path,
+    )
diff --git a/src/lm_buddy/jobs/_entrypoints/simple.py b/src/lm_buddy/jobs/_entrypoints/simple.py
deleted file mode 100644
index 13cd0456..00000000
--- a/src/lm_buddy/jobs/_entrypoints/simple.py
+++ /dev/null
@@ -1,17 +0,0 @@
-import ray
-
-from lm_buddy.jobs.configs import SimpleJobConfig
-
-
-@ray.remote
-def get_magic_number(config: SimpleJobConfig) -> int:
-    return config.magic_number
-
-
-def run_simple(config: SimpleJobConfig):
-    """A simple entrypoint to demonstrate the Ray interface."""
-    # Connect to the Ray cluster (if not already running)
-    ray.init(ignore_reinit_error=True)
-    # Run dummy remote task
-    magic_number = ray.get(get_magic_number.remote(config))
-    print(f"The magic number is {magic_number}")
diff --git a/src/lm_buddy/jobs/common.py b/src/lm_buddy/jobs/common.py
index cc170892..c25130ca 100644
--- a/src/lm_buddy/jobs/common.py
+++ b/src/lm_buddy/jobs/common.py
@@ -1,4 +1,11 @@
+from dataclasses import dataclass
 from enum import Enum
+from pathlib import Path
+from typing import Any
+
+import pandas as pd
+
+from lm_buddy.integrations.wandb import WandbArtifactConfig
 
 
 class LMBuddyJobType(str, Enum):
@@ -7,3 +14,23 @@ class LMBuddyJobType(str, Enum):
     PREPROCESSING = "preprocessing"
     FINETUNING = "finetuning"
     EVALUATION = "evaluation"
+
+
+@dataclass
+class FinetuningResult:
+    """Result from a finetuning task."""
+
+    checkpoint_path: Path | None
+    checkpoint_artifact: WandbArtifactConfig | None
+    metrics: dict[str, Any]
+    is_adapter: bool
+
+
+@dataclass
+class EvaluationResult:
+    """Result from an evaluation task, containing aggregate metrics and artifacts."""
+
+    tables: dict[str, pd.DataFrame]
+    table_artifact: WandbArtifactConfig | None
+    dataset_artifact: WandbArtifactConfig | None
+    dataset_path: Path | None
diff --git a/src/lm_buddy/jobs/configs/__init__.py b/src/lm_buddy/jobs/configs/__init__.py
index e7f71236..d4701cdd 100644
--- a/src/lm_buddy/jobs/configs/__init__.py
+++ b/src/lm_buddy/jobs/configs/__init__.py
@@ -1,22 +1,22 @@
 from lm_buddy.jobs.configs.base import LMBuddyJobConfig
 from lm_buddy.jobs.configs.finetuning import FinetuningJobConfig, FinetuningRayConfig
 from lm_buddy.jobs.configs.lm_harness import (
-    LMHarnessEvaluatorConfig,
+    LMHarnessEvaluationConfig,
     LMHarnessJobConfig,
     LocalChatCompletionsConfig,
 )
-from lm_buddy.jobs.configs.prometheus import PrometheusEvaluationTaskConfig, PrometheusJobConfig
-from lm_buddy.jobs.configs.simple import SimpleJobConfig
+from lm_buddy.jobs.configs.prometheus import PrometheusEvaluationConfig, PrometheusJobConfig
+
+EvaluationJobConfig = LMHarnessJobConfig | PrometheusJobConfig
 
 __all__ = [
     "LMBuddyJobConfig",
-    "SimpleJobConfig",
     "FinetuningJobConfig",
     "FinetuningRayConfig",
-    "LMHarnessEvaluatorConfig",
+    "LMHarnessEvaluationConfig",
     "LMHarnessJobConfig",
     "LocalChatCompletionsConfig",
-    "PrometheusEvaluationTaskConfig",
+    "PrometheusEvaluationConfig",
     "PrometheusJobConfig",
-    "SimpleJobConfig",
+    "EvaluationJobConfig",
 ]
diff --git a/src/lm_buddy/jobs/configs/lm_harness.py b/src/lm_buddy/jobs/configs/lm_harness.py
index c1710626..42e8b227 100644
--- a/src/lm_buddy/jobs/configs/lm_harness.py
+++ b/src/lm_buddy/jobs/configs/lm_harness.py
@@ -35,7 +35,7 @@ def validate_inference_engine(cls, config: "LocalChatCompletionsConfig"):
         return config
 
 
-class LMHarnessEvaluatorConfig(BaseLMBuddyConfig):
+class LMHarnessEvaluationConfig(BaseLMBuddyConfig):
     """Misc settings provided to an lm-harness evaluation job."""
 
     tasks: conlist(str, min_length=1)
@@ -48,6 +48,6 @@ class LMHarnessJobConfig(LMBuddyJobConfig):
     """Configuration to run an lm-evaluation-harness evaluation job."""
 
     model: AutoModelConfig | LocalChatCompletionsConfig
-    evaluator: LMHarnessEvaluatorConfig
+    evaluation: LMHarnessEvaluationConfig
     quantization: QuantizationConfig | None = None
     tracking: WandbRunConfig | None = None
diff --git a/src/lm_buddy/jobs/configs/prometheus.py b/src/lm_buddy/jobs/configs/prometheus.py
index db428a97..2500eb5a 100644
--- a/src/lm_buddy/jobs/configs/prometheus.py
+++ b/src/lm_buddy/jobs/configs/prometheus.py
@@ -7,7 +7,7 @@
 from lm_buddy.types import BaseLMBuddyConfig
 
 
-class PrometheusEvaluationTaskConfig(BaseLMBuddyConfig):
+class PrometheusEvaluationConfig(BaseLMBuddyConfig):
     """Parameters specific to Prometheus evaluation."""
 
     num_answers: int = 3
@@ -22,17 +22,16 @@ class PrometheusEvaluationTaskConfig(BaseLMBuddyConfig):
 
 
 class PrometheusJobConfig(LMBuddyJobConfig):
-    """Configuration to run a prometheus job."""
+    """Configuration for a Prometheus judge evaluation task."""
 
+    prometheus: VLLMCompletionsConfig = Field(
+        description="Externally hosted Prometheus judge model."
+    )
     dataset: TextDatasetConfig = Field(
         description="Dataset of text completions to evaluate using the Prometheus judge model."
     )
-
-    # vLLM endpoint configuration
-    prometheus: VLLMCompletionsConfig
-
-    # evaluation task configuration
-    evaluation: PrometheusEvaluationTaskConfig | None = None
-
-    # wandb experiment tracking details
+    evaluation: PrometheusEvaluationConfig = Field(
+        default_factory=PrometheusEvaluationConfig,
+        description="Settings for the Prometheus evaluation.",
+    )
     tracking: WandbRunConfig | None = None
diff --git a/src/lm_buddy/jobs/configs/simple.py b/src/lm_buddy/jobs/configs/simple.py
deleted file mode 100644
index f0270923..00000000
--- a/src/lm_buddy/jobs/configs/simple.py
+++ /dev/null
@@ -1,7 +0,0 @@
-from lm_buddy.jobs.configs import LMBuddyJobConfig
-
-
-class SimpleJobConfig(LMBuddyJobConfig):
-    """Simple job submission config."""
-
-    magic_number: int
diff --git a/src/lm_buddy/paths.py b/src/lm_buddy/paths.py
new file mode 100644
index 00000000..d2e58eb8
--- /dev/null
+++ b/src/lm_buddy/paths.py
@@ -0,0 +1,68 @@
+from pathlib import Path
+from typing import Annotated, Any
+
+from huggingface_hub.utils import HFValidationError, validate_repo_id
+from pydantic import BeforeValidator
+
+from lm_buddy.integrations.wandb import WandbArtifactConfig
+from lm_buddy.types import BaseLMBuddyConfig
+
+
+class FilePath(BaseLMBuddyConfig):
+    """Absolute path to an object on the local filesystem."""
+
+    __match_args__ = ("path",)
+
+    path: Path
+
+
+class HuggingFaceRepoID(BaseLMBuddyConfig):
+    """Repository ID on the HuggingFace Hub."""
+
+    __match_args__ = ("repo_id",)
+
+    repo_id: str
+
+
+def is_valid_huggingface_repo_id(s: str):
+    """
+    Simple test to check if an HF model is valid using HuggingFace's tools.
+    Sadly, theirs throws an exception and has no return.
+
+    Args:
+        s: string to test.
+    """
+    try:
+        validate_repo_id(s)
+        return True
+    except HFValidationError:
+        return False
+
+
+def validate_asset_path(x: Any) -> Any:
+    match x:
+        case Path() as p:
+            return FilePath(path=p)
+        case str() as s if Path(s).is_absolute():
+            return FilePath(path=s)
+        case str() as s if is_valid_huggingface_repo_id(s):
+            return HuggingFaceRepoID(repo_id=s)
+        case str():
+            raise ValueError(f"{x} is neither a valid HuggingFace repo ID or an absolute path.")
+        case _:
+            # Handled by downstream "after" validators
+            return x
+
+
+AssetPath = Annotated[
+    FilePath | HuggingFaceRepoID | WandbArtifactConfig,
+    BeforeValidator(lambda x: validate_asset_path(x)),
+]
+"""Union type representing the name/path for loading HuggingFace asset.
+
+The path is represented by either a `FileSystemPath`, a `HuggingFaceRepoID`
+or a `WandbArtifactConfig` that can be resolved to a path via the artifact's manifest.
+
+This type is annotated with Pydantic validation logic to convert absolute path strings
+to `FilesystemPath`s and other strings to `HuggingFaceRepoID`s.
+"""
diff --git a/tests/integration/test_finetuning.py b/tests/integration/test_finetuning.py
index 01734410..6e2829b6 100644
--- a/tests/integration/test_finetuning.py
+++ b/tests/integration/test_finetuning.py
@@ -1,6 +1,6 @@
 import pytest
 
-import lm_buddy
+from lm_buddy import LMBuddy
 from lm_buddy.integrations.huggingface import AutoModelConfig, TextDatasetConfig, TrainerConfig
 from lm_buddy.integrations.wandb import ArtifactType, WandbArtifactConfig, WandbRunConfig
 from lm_buddy.jobs.configs import FinetuningJobConfig, FinetuningRayConfig
@@ -41,7 +41,8 @@ def test_finetuning_job(llm_model_artifact, text_dataset_artifact, job_config):
     artifact_loader.log_artifact(text_dataset_artifact)
 
     # Run test job
-    lm_buddy.run_job(job_config, artifact_loader=artifact_loader)
+    buddy = LMBuddy(artifact_loader)
+    buddy.finetune(job_config)
 
     # Two input artifacts, and one output model artifact produced
     artifacts = artifact_loader.get_artifacts()
diff --git a/tests/integration/test_lm_harness.py b/tests/integration/test_lm_harness.py
index f8b8dd32..bac69f3d 100644
--- a/tests/integration/test_lm_harness.py
+++ b/tests/integration/test_lm_harness.py
@@ -1,9 +1,9 @@
 import pytest
 
-import lm_buddy
+from lm_buddy import LMBuddy
 from lm_buddy.integrations.huggingface import AutoModelConfig
 from lm_buddy.integrations.wandb import WandbArtifactConfig, WandbRunConfig
-from lm_buddy.jobs.configs import LMHarnessEvaluatorConfig, LMHarnessJobConfig
+from lm_buddy.jobs.configs import LMHarnessEvaluationConfig, LMHarnessJobConfig
 from tests.test_utils import FakeArtifactLoader
 
 
@@ -14,10 +14,10 @@ def job_config(llm_model_artifact):
     )
 
     tracking_config = WandbRunConfig(name="test-lm-harness-job")
-    evaluator_config = LMHarnessEvaluatorConfig(tasks=["hellaswag"], limit=5)
+    evaluation_config = LMHarnessEvaluationConfig(tasks=["hellaswag"], limit=5)
     return LMHarnessJobConfig(
         model=model_config,
-        evaluator=evaluator_config,
+        evaluation=evaluation_config,
         tracking=tracking_config,
     )
 
@@ -28,7 +28,8 @@ def test_lm_harness_job_with_tracking(llm_model_artifact, job_config):
     artifact_loader.log_artifact(llm_model_artifact)
 
     # Run test job
-    lm_buddy.run_job(job_config, artifact_loader=artifact_loader)
+    buddy = LMBuddy(artifact_loader)
+    buddy.evaluate(job_config)
 
     # One input artifact, and one eval artifact produced
     assert artifact_loader.num_artifacts() == 2
@@ -43,7 +44,8 @@ def test_lm_harness_job_no_tracking(llm_model_artifact, job_config):
     artifact_loader.log_artifact(llm_model_artifact)
 
     # Run test job
-    lm_buddy.run_job(job_config, artifact_loader=artifact_loader)
+    buddy = LMBuddy(artifact_loader)
+    buddy.evaluate(job_config)
 
     # One input artifact, no additional eval artifacts produced
     assert artifact_loader.num_artifacts() == 1
diff --git a/tests/integration/test_simple.py b/tests/integration/test_simple.py
deleted file mode 100644
index c1e7f7ca..00000000
--- a/tests/integration/test_simple.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import ray
-
-from lm_buddy.jobs._entrypoints.simple import get_magic_number
-from lm_buddy.jobs.configs import SimpleJobConfig
-
-
-def test_simple_remote_task():
-    config = SimpleJobConfig(magic_number=42)
-    result = ray.get(get_magic_number.remote(config))
-    assert result == 42
diff --git a/tests/unit/integrations/huggingface/test_dataset_config.py b/tests/unit/integrations/huggingface/test_dataset_config.py
index f696c09e..4b215208 100644
--- a/tests/unit/integrations/huggingface/test_dataset_config.py
+++ b/tests/unit/integrations/huggingface/test_dataset_config.py
@@ -1,10 +1,11 @@
 import pytest
 from pydantic import ValidationError
 
-from lm_buddy.integrations.huggingface import DatasetConfig, HuggingFaceRepoConfig
+from lm_buddy.integrations.huggingface import DatasetConfig
+from lm_buddy.paths import HuggingFaceRepoID
 
 
 def test_split_is_required():
     with pytest.raises(ValidationError):
-        repo = HuggingFaceRepoConfig(repo_id="dataset/xyz")
+        repo = HuggingFaceRepoID(repo_id="dataset/xyz")
         DatasetConfig(load_from=repo, split=None)
diff --git a/tests/unit/jobs/configs/test_finetuning_config.py b/tests/unit/jobs/configs/test_finetuning_config.py
index cce77868..6aee111b 100644
--- a/tests/unit/jobs/configs/test_finetuning_config.py
+++ b/tests/unit/jobs/configs/test_finetuning_config.py
@@ -1,8 +1,9 @@
 import pytest
 from pydantic import ValidationError
 
-from lm_buddy.integrations.huggingface import HuggingFaceRepoConfig, TextDatasetConfig
+from lm_buddy.integrations.huggingface import TextDatasetConfig
 from lm_buddy.jobs.configs import FinetuningJobConfig, FinetuningRayConfig
+from lm_buddy.paths import HuggingFaceRepoID
 from tests.test_utils import copy_pydantic_json
 
 
@@ -52,10 +53,10 @@ def test_load_example_config(examples_dir):
 
 
 def test_argument_validation():
-    model_repo = HuggingFaceRepoConfig(repo_id="model_path")
-    tokenizer_repo = HuggingFaceRepoConfig(repo_id="dataset_path")
+    model_repo = HuggingFaceRepoID(repo_id="model_repo_id")
+    tokenizer_repo = HuggingFaceRepoID(repo_id="tokenizer_repo_id")
     dataset_config = TextDatasetConfig(
-        load_from=HuggingFaceRepoConfig(repo_id="dataset_path"),
+        load_from=HuggingFaceRepoID(repo_id="dataset_repo_id"),
         split="train",
     )
 
@@ -70,11 +71,11 @@ def test_argument_validation():
 
     # Check passing invalid arguments is validated for each asset type
     with pytest.raises(ValidationError):
-        FinetuningJobConfig(model=12345, tokenizer="tokenizer_path", dataset="dataset_path")
+        FinetuningJobConfig(model=12345, tokenizer="tokenizer_repo_id", dataset="dataset_repo_id")
     with pytest.raises(ValidationError):
-        FinetuningJobConfig(model="model_path", tokenizer=12345, dataset="dataset_path")
+        FinetuningJobConfig(model="model_repo_id", tokenizer=12345, dataset="dataset_repo_id")
     with pytest.raises(ValidationError):
-        FinetuningJobConfig(model="model_path", tokenizer="tokenizer_path", dataset=12345)
+        FinetuningJobConfig(model="model_repo_id", tokenizer="tokenizer_repo_id", dataset=12345)
 
     # Check that tokenizer is set to model path when absent
     missing_tokenizer_config = FinetuningJobConfig(model=model_repo.repo_id, dataset=dataset_config)
diff --git a/tests/unit/jobs/configs/test_job_config.py b/tests/unit/jobs/configs/test_job_config.py
index 73c83dfe..ebf3bbea 100644
--- a/tests/unit/jobs/configs/test_job_config.py
+++ b/tests/unit/jobs/configs/test_job_config.py
@@ -1,9 +1,12 @@
-from lm_buddy.jobs.configs import SimpleJobConfig
+from lm_buddy.jobs.configs import LMBuddyJobConfig
 
 
 def test_config_as_tempfile():
-    config = SimpleJobConfig(magic_number=42)
+    class TestConfig(LMBuddyJobConfig):
+        magic_number: int
+
+    config = TestConfig(magic_number=42)
     config_name = "my-job-config.yaml"
     with config.to_tempfile(name=config_name) as path:
         assert path.name == config_name
-        assert SimpleJobConfig.from_yaml_file(path) == config
+        assert TestConfig.from_yaml_file(path) == config
diff --git a/tests/unit/jobs/configs/test_lm_harness_config.py b/tests/unit/jobs/configs/test_lm_harness_config.py
index 16135e09..0c28e3c0 100644
--- a/tests/unit/jobs/configs/test_lm_harness_config.py
+++ b/tests/unit/jobs/configs/test_lm_harness_config.py
@@ -3,7 +3,7 @@
 
 from lm_buddy.integrations.vllm import InferenceServerConfig
 from lm_buddy.jobs.configs import (
-    LMHarnessEvaluatorConfig,
+    LMHarnessEvaluationConfig,
     LMHarnessJobConfig,
     LocalChatCompletionsConfig,
 )
@@ -21,8 +21,8 @@ def local_completions_config(inference_server_config):
 
 
 @pytest.fixture
-def lm_harness_evaluator_config():
-    return LMHarnessEvaluatorConfig(
+def lm_harness_evaluation_config():
+    return LMHarnessEvaluationConfig(
         tasks=["task1", "task2", "task3"],
         num_fewshot=5,
     )
@@ -35,19 +35,19 @@ def lm_harness_job_config(
     local_completions_config,
     quantization_config,
     wandb_run_config,
-    lm_harness_evaluator_config,
+    lm_harness_evaluation_config,
 ):
     if request.param == "model_config_with_artifact":
         return LMHarnessJobConfig(
             model=model_config_with_artifact,
-            evaluator=lm_harness_evaluator_config,
+            evaluation=lm_harness_evaluation_config,
             tracking=wandb_run_config,
             quantization=quantization_config,
         )
     elif request.param == "local_completions_config":
         return LMHarnessJobConfig(
             model=local_completions_config,
-            evaluator=lm_harness_evaluator_config,
+            evaluation=lm_harness_evaluation_config,
             tracking=wandb_run_config,
             quantization=quantization_config,
         )
diff --git a/tests/unit/jobs/test_run_job.py b/tests/unit/jobs/test_run_job.py
deleted file mode 100644
index 47d78a34..00000000
--- a/tests/unit/jobs/test_run_job.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import pytest
-
-import lm_buddy
-from lm_buddy.integrations.huggingface import AutoModelConfig
-
-
-def test_invalid_config_error():
-    not_a_job_config = AutoModelConfig(load_from="distilgpt2")
-    with pytest.raises(ValueError):
-        lm_buddy.run_job(not_a_job_config)
diff --git a/tests/unit/test_paths.py b/tests/unit/test_paths.py
new file mode 100644
index 00000000..c77789d8
--- /dev/null
+++ b/tests/unit/test_paths.py
@@ -0,0 +1,30 @@
+from pathlib import Path
+
+import pytest
+from pydantic import TypeAdapter, ValidationError
+
+from lm_buddy.integrations.wandb import WandbArtifactConfig
+from lm_buddy.paths import AssetPath, FilePath, HuggingFaceRepoID
+
+
+def test_asset_path_validation():
+    # Imbues the LoadableAssetPath type with Pydantic validation methods
+    adapter_cls = TypeAdapter(AssetPath)
+
+    repo_string = adapter_cls.validate_python("repo_id")
+    assert isinstance(repo_string, HuggingFaceRepoID)
+
+    path_string = adapter_cls.validate_python("/absolute/path")
+    assert isinstance(path_string, FilePath)
+
+    path_object = adapter_cls.validate_python(Path("/absolute/path"))
+    assert isinstance(path_object, FilePath)
+
+    artifact_config = WandbArtifactConfig(name="artifact", project="project")
+    artifact_config = adapter_cls.validate_python(artifact_config)
+    assert isinstance(artifact_config, WandbArtifactConfig)
+
+    with pytest.raises(ValidationError):
+        adapter_cls.validate_python("bad...repo_id")
+    with pytest.raises(ValidationError):
+        adapter_cls.validate_python(120850120)