scaleway · redanrd · Dec 14, 2023 · Nov 27, 2023 · Nov 28, 2023 · Nov 28, 2023
diff --git a/.gitignore b/.gitignore
@@ -3,9 +3,16 @@
 
 *.tfstate
 *.tfstate.*
+*.tfvars
 
 .terraform.lock.hcl
 
 # Serverless framework
 node_modules/
 .serverless/
+
+# Env files
+*.env
+
+# Python
+venv/
diff --git a/README.md b/README.md
@@ -67,6 +67,12 @@ Table of Contents:
 | **[Terraform NGINX hello world](containers/terraform-nginx-hello-world/README.md)** <br/> A minimal example running the base NGINX image in a serverless container deployed with Terraform. | N/A          | [Terraform]            |
 | **[Triggers with Terraform](containers/terraform-triggers/README.md)** <br/> Configuring two SQS triggers, used to trigger two containers, one public, one private.                         | N/A          | [Terraform]            |
 
+### Jobs
+
+| Example                                                                                                                                                                                     | Language     | Deployment             |
+|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------|------------------------|
+| **[Serverless MLOps](jobs/ml-ops/README.md)** <br/> An example to run a Serverless Machine Leaning workflow                                                                                 | Python       | [Terraform]+[Console]  |
+
 ### 💜 Projects
 
 | Example                                                                                                                                   | Services    | Language | Deployment             |
@@ -77,6 +83,7 @@ Table of Contents:
 
 [Serverless Framework]: https://github.com/scaleway/serverless-scaleway-functions
 [Terraform]: https://registry.terraform.io/providers/scaleway/scaleway/latest/docs
+[Console]: https://console.scaleway.com
 [Python API Framework]: https://github.com/scaleway/serverless-api-project
 
 ## Contributing

diff --git a/jobs/ml-ops/README.md b/jobs/ml-ops/README.md
@@ -0,0 +1,99 @@
+# Serverless MLOps
+
+In this example, we train and deploy a binary classification inference model using Scaleway Serverless Jobs and Container. To do this, we use the following resources:
+
+1. Serverless Job to populate data in S3
+2. Serverless Job for training
+3. Serverless Container for inference
+
+We use object storage to share data between the steps.
+
+## Context
+
+In this example we use a bank telemarketing dataset to predict if a client would engage in a term deposit subscription.
+
+This dataset records marketing phone calls made to clients. The outcome of the phone call is in shown in the `y` column:
+
+* `0` : no subscription
+* `1` : subscription
+
+## Data Source
+
+The dataset has many versions and is open-sourced and published [here](http://archive.ics.uci.edu/dataset/222/bank+marketing) on the UCI Machine Leaning repository and is close to the one analyzed in the following research work:
+
+* [Moro et al., 2014] S. Moro, P. Cortez and P. Rita. A Data-Driven Approach to Predict the Success of Bank Telemarketing. Decision Support Systems, Elsevier, 62:22-31, June 2014
+
+## Running the example
+
+### Step 0. Set up a Scaleway API key
+
+For this example you will need to configure (or reuse) a Scaleway API key with permissions to create and update Serverless Containers and Jobs, as well as write to Object Storage buckets.
+
+### Step 1. Provision resources with Terraform
+
+Set your Scaleway access key, secret key and project ID in environment variables:
+
+```console
+export TF_VAR_access_key=<your-access-key>
+export TF_VAR_secret_key=<your-secret-key>
+export TF_VAR_project_id=<your-project-id>
+
+cd terraform
+terraform init
+terraform plan
+terraform apply
+```
+
+### Step 2. Run the data and training Jobs
+
+To run the jobs for the data and training, we can use the Scaleway CLI:
+
+```
+cd terraform
+scw jobs run $(terraform output -raw data_job_id)
+scw jobs runs ls
+
+scw jobs run $(terraform output -raw training_job_id)
+scw jobs runs ls
+```
+
+You can also trigger the jobs from the [Jobs section](https://console.scaleway.com/serverless-jobs/jobs) of the Scaleway Console.
+
+### Step 3. Use the inference API
+
+```
+cd terraform
+export INFERENCE_URL=$(terraform output raw endpoint)
+
+curl -X POST \
+  -H "Content-Type: application/json" \
+  -d @../inference/example.json
+  ${INFERENCE_URL}/inference
+```
+
+## Local testing
+
+To test the example locally you can use [Docker Compose](https://docs.docker.com/compose/install/).
+
+```
+# Build the containers locally
+docker compose build
+
+# Run the job to set up the data
+docker compose up data
+
+# Run the job to train and upload the model
+docker compose up training
+
+# Run the inference server
+docker compose up inference
+```
+
+Access the inference API locally:
+
+```
+curl -X POST \
+  -H "Content-Type: application/json" \
+  -d @inference/example.json
+  http://localhost:8080/inference
+```
diff --git a/jobs/ml-ops/data/.gitignore b/jobs/ml-ops/data/.gitignore
@@ -0,0 +1 @@
+dataset/
diff --git a/jobs/ml-ops/data/Dockerfile b/jobs/ml-ops/data/Dockerfile
@@ -0,0 +1,16 @@
+FROM python:3.12-slim-bookworm
+
+WORKDIR /app
+
+RUN apt-get update
+RUN apt-get install -y \
+    curl \
+    unzip
+
+RUN pip install --upgrade pip
+COPY requirements.txt .
+RUN pip install -r requirements.txt
+
+COPY . .
+
+CMD ["python", "main.py"]
diff --git a/jobs/ml-ops/data/main.py b/jobs/ml-ops/data/main.py
@@ -0,0 +1,58 @@
+import boto3
+import os
+import urllib.request
+import zipfile
+
+DATA_DIR = "dataset"
+
+ZIP_URL = "http://archive.ics.uci.edu/static/public/222/bank+marketing.zip"
+ZIP_DOWNLOAD_PATH = os.path.join(DATA_DIR, "downloaded.zip")
+NESTED_ZIP_PATH = os.path.join(DATA_DIR, "bank-additional.zip")
+
+DATA_FILE = "bank-additional-full.csv"
+DATA_CSV_PATH = os.path.join(DATA_DIR, "bank-additional", DATA_FILE)
+
+
+def main():
+    """Pulls file from source, and uploads to a target S3 bucket"""
+
+    # Download the zip
+    print(f"Downloading data from {ZIP_URL}")
+    os.makedirs(DATA_DIR, exist_ok=True)
+    urllib.request.urlretrieve(ZIP_URL, ZIP_DOWNLOAD_PATH)
+
+    # Extract
+    with zipfile.ZipFile(ZIP_DOWNLOAD_PATH, "r") as fh:
+        fh.extractall(DATA_DIR)
+
+    # Remove original zip
+    os.remove(ZIP_DOWNLOAD_PATH)
+
+    # Extract zip within the zip
+    with zipfile.ZipFile(NESTED_ZIP_PATH, "r") as fh:
+        fh.extractall(DATA_DIR)
+
+    # Remove nested zip
+    os.remove(NESTED_ZIP_PATH)
+
+    access_key = os.environ["ACCESS_KEY"]
+    secret_key = os.environ["SECRET_KEY"]
+    region_name = os.environ["REGION"]
+
+    bucket_name = os.environ["S3_BUCKET_NAME"]
+    s3_url = os.environ["S3_URL"]
+
+    print(f"Uploading data to {s3_url}/{bucket_name}")
+    s3 = boto3.client(
+        "s3",
+        region_name=region_name,
+        endpoint_url=s3_url,
+        aws_access_key_id=access_key,
+        aws_secret_access_key=secret_key,
+    )
+
+    s3.upload_file(DATA_CSV_PATH, bucket_name, DATA_FILE)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/jobs/ml-ops/data/requirements.txt b/jobs/ml-ops/data/requirements.txt
@@ -0,0 +1,2 @@
+boto3==1.33.2
+requests==2.31.0
diff --git a/jobs/ml-ops/docker-compose.yml b/jobs/ml-ops/docker-compose.yml
@@ -0,0 +1,55 @@
+version: "3"
+
+services:
+  data:
+    build:
+      context: ./data
+    depends_on:
+      - minio
+    environment:
+      - ACCESS_KEY=example
+      - SECRET_KEY=example-password
+      - REGION=foo
+      - S3_BUCKET_NAME=mlops
+      - S3_URL=http://minio:9000
+
+  training:
+    build:
+      context: ./training
+    depends_on:
+      - minio
+    environment:
+      - ACCESS_KEY=example
+      - SECRET_KEY=example-password
+      - REGION=foo
+      - S3_BUCKET_NAME=mlops
+      - S3_URL=http://minio:9000
+
+  inference:
+    build:
+      context: ./inference
+    ports:
+      - 8080:80
+    depends_on:
+      - minio
+    environment:
+      - ACCESS_KEY=example
+      - SECRET_KEY=example-password
+      - REGION=foo
+      - S3_BUCKET_NAME=mlops
+      - S3_URL=http://minio:9000
+
+  minio:
+    image: minio/minio
+    ports:
+      - "9000:9000"
+    volumes:
+      - minio_storage:/data
+    environment:
+      - MINIO_ROOT_USER=example
+      - MINIO_ROOT_PASSWORD=example-password
+    entrypoint: sh
+    command: -c 'mkdir -p /data/mlops && /usr/bin/minio server /data'
+
+volumes:
+  minio_storage: {}
diff --git a/jobs/ml-ops/inference/Dockerfile b/jobs/ml-ops/inference/Dockerfile
@@ -0,0 +1,10 @@
+FROM python:3.12-slim-bookworm
+
+WORKDIR /app
+
+RUN pip install --upgrade pip
+COPY requirements.txt .
+RUN pip install -r requirements.txt
+
+COPY . .
+CMD ["uvicorn", "main:app", "--proxy-headers", "--host", "0.0.0.0", "--port", "80"]