Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CTX-5524: Created template for compiling onnx models to zkml format #177

Merged
merged 6 commits into from
Aug 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions tasks/zkml-convert-model/.mypy.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Global options:

[mypy]
python_version = 3.9
pretty = True
warn_return_any = True
warn_no_return = True
warn_redundant_casts = True
warn_unused_configs = True
warn_unused_ignores = True
warn_unreachable = True
disallow_subclassing_any = True
disallow_untyped_calls = True
disallow_untyped_defs = True
disallow_incomplete_defs = True
no_implicit_optional = True
strict_optional = True
allow_redefinition = False


# Per-module options:
# https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports
[mypy-onnx.*]
ignore_missing_imports = True

[mypy-ezkl.*]
ignore_missing_imports = True
124 changes: 124 additions & 0 deletions tasks/zkml-convert-model/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
from pathlib import Path

import json
import asyncio
import logging

from coretex import currentTaskRun, folder_manager, Model

import ezkl
import onnx
import numpy as np
bogdant36 marked this conversation as resolved.
Show resolved Hide resolved


def linkFolder(source: Path, destination: Path) -> None:
destination.mkdir(exist_ok = True)
for item in source.rglob("*"):
destItem = destination / item.relative_to(source)

if item.is_dir():
continue

destItem.parent.mkdir(parents = True, exist_ok = True)

try:
item.link_to(destItem)
except AttributeError:
destItem.hardlink_to(item) # type: ignore[attr-defined]
except Exception as e:
logging.error(f">> [ZKML] Failed to link {item} to {destItem}: {e}")


def generateDummyInput(onnxModelPath: Path) -> dict[str, list[float]]:
model = onnx.load(onnxModelPath)

inputTensor = model.graph.input[0]
dtype = onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[inputTensor.type.tensor_type.elem_type]
shape = [dim.dim_value for dim in inputTensor.type.tensor_type.shape.dim]
shape[0] = 1

data = np.random.random(shape).astype(dtype)
flattenedData = np.array(data).reshape(-1).tolist()

return dict(input_data = [flattenedData])


async def compileModel(
onnxPath: Path,
settings: Path,
compiledModel: Path,
verifKey: Path,
proofKey: Path
) -> None:

inputPath = folder_manager.temp / "input.json"
with inputPath.open("w") as file:
json.dump(generateDummyInput(onnxPath), file)

await ezkl.calibrate_settings(inputPath, onnxPath, settings, target = "resources", max_logrows = 12, scales = [2])

ezkl.compile_circuit(onnxPath, compiledModel, settings)
await ezkl.get_srs(settings)
ezkl.setup(compiledModel, verifKey, proofKey)


def main() -> None:
taskRun = currentTaskRun()

ctxOnnxModel: Model = taskRun.parameters["onnxModel"]
ctxOnnxModel.download()

onnxPaths = list(ctxOnnxModel.path.rglob("*.onnx"))
if len(onnxPaths) != 1:
raise ValueError(f">> [ZKML] Model files have to contain exactly one .onnx file. Found {len(onnxPaths)}")

onnxPath = onnxPaths[0]

modelDir = folder_manager.createTempFolder("model")
linkFolder(ctxOnnxModel.path, modelDir)

compiledModelDir = modelDir / "compiledModel"
if ctxOnnxModel.path.joinpath(compiledModelDir.name).exists():
raise FileExistsError(">> [ZKML] Output directory \"compiledModel\" found in the input Model. This will cause issues when copying input Model files to output Model")

compiledModelDir.mkdir()

# Define paths
compiledModelPath = compiledModelDir / "model.compiled"
proofKey = compiledModelDir / "prove.pk"
verifKey = compiledModelDir / "verify.pk"
settingsPath = compiledModelDir / "settings.json"

logging.info(">> [ZKML] Setting up EZKL")

visibilities = [
taskRun.parameters["privateInput"],
taskRun.parameters["privateOutput"],
taskRun.parameters["privateModel"]
]

if visibilities.count(True) > 1:
raise ValueError(">> [ZKML] Only one of three privacy parameters can be True")

pyRunArgs = ezkl.PyRunArgs()
pyRunArgs.input_visibility = "private" if visibilities[0] else "public"
pyRunArgs.output_visibility = "private" if visibilities[1] else "public"
pyRunArgs.param_visibility = "private" if visibilities[2] else "fixed"

logging.info(">> [ZKML] Generating settings")
ezkl.gen_settings(onnxPath, settingsPath, py_run_args = pyRunArgs)

logging.info(">> [ZKML] Compiling model")
asyncio.run(compileModel(onnxPath, settingsPath, compiledModelPath, verifKey, proofKey))

logging.info(">> [ZKML] EZKL setup complete")
logging.info(">> [ZKML] Uploading model and EZKL files")
ctxModel = Model.createModel(taskRun.generateEntityName(), taskRun.projectId, ctxOnnxModel.accuracy, {})
ctxModel.upload(modelDir)
dule1322 marked this conversation as resolved.
Show resolved Hide resolved
ctxModel.addTag("verified")

taskRun.submitOutput("outputCircuit", ctxModel)


if __name__ == "__main__":
main()
4 changes: 4 additions & 0 deletions tasks/zkml-convert-model/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
coretex
ezkl
onnx
numpy
37 changes: 37 additions & 0 deletions tasks/zkml-convert-model/task.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
id: zkml-model-conversion
name: zkml-model-conversion
description: Converts a onnx model to an EZKL circuit. Also copies all input model files to the output model allowing for endpoint creation
is_active: true
project_type: 8
param_groups:
- name: inputs
params:
- name: onnxModel
description: ONNX model
value: null
data_type: model
required: true
- name: outputs
params:
- name: outputCircuit
description: The dataset to which this TaskRun will output it's results
value: null
data_type: model
required: false
- name: parameters
params:
- name: privateInput
description: Weather the input data is private (otherwise public). Only one variable can be private
value: false
data_type: bool
required: true
- name: privateOutput
description: Weather the outputs are private (otherwise public). Only one variable can be private
value: false
data_type: bool
required: true
- name: privateModel
description: Weather the model parameters are private (otherwise public). Only one variable can be private
value: false
data_type: bool
required: true
Loading