diff --git a/models/phishing-models/README.md b/models/phishing-models/README.md new file mode 100644 index 0000000000..fcf75eb35d --- /dev/null +++ b/models/phishing-models/README.md @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:878d2ecc560168f3e13903bfd4fb8f59c8a21f563cf389a0ea99f3b52e746e1a +size 1675 diff --git a/models/ransomware-models/ransomw-model-long-rf/checkpoint.tl b/models/ransomware-models/ransomw-model-long-rf/checkpoint.tl index 62b5cce607..6be0593905 100644 Binary files a/models/ransomware-models/ransomw-model-long-rf/checkpoint.tl and b/models/ransomware-models/ransomw-model-long-rf/checkpoint.tl differ diff --git a/models/ransomware-models/ransomw-model-medium-rf/checkpoint.tl b/models/ransomware-models/ransomw-model-medium-rf/checkpoint.tl index 24afedf5d1..cab73f763b 100644 Binary files a/models/ransomware-models/ransomw-model-medium-rf/checkpoint.tl and b/models/ransomware-models/ransomw-model-medium-rf/checkpoint.tl differ diff --git a/models/sid-models/README.md b/models/sid-models/README.md new file mode 100644 index 0000000000..78b7c87e28 --- /dev/null +++ b/models/sid-models/README.md @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2caa0ecb432ca5b9a471aae2c17be1b66dffd85bc4bebe52a0f215992c9c4412 +size 1955 diff --git a/models/triton-model-repo/phishing-bert-trt/1/README.md b/models/triton-model-repo/phishing-bert-trt/1/README.md deleted file mode 100644 index 7c3644943f..0000000000 --- a/models/triton-model-repo/phishing-bert-trt/1/README.md +++ /dev/null @@ -1,28 +0,0 @@ - - -# Generating TRT Models from ONNX - -This model in the `triton-model-repo` directory is intentionally missing the model file. This is due to the fact that TensorRT maximizes performance of models for a *particular machine*. Any pre-compiled TensorRT engine file at best would have poor performance and most likely would not even load on other machines. - -Therefore, it is best to compile a TensorRT engine file for on each machine that it will be run on. To facilitate this, Morpheus contains a utility to input an ONNX file and export the TensorRT engine file. To generate the necessary TensorRT engine file for this model, run the following from the same directory as this README: - -```bash -morpheus tools onnx-to-trt --input_model ../../phishing-bert-onnx/1/model.onnx --output_model ./model.plan --batches 1 8 --batches 1 16 --batches 1 32 --seq_length 128 --max_workspace_size 16000 -``` - -Note: If you get an out-of-memory error, reduce the `--max_workspace_size` argument until it will successfully run. diff --git a/models/triton-model-repo/phishing-bert-trt/config.pbtxt b/models/triton-model-repo/phishing-bert-trt/config.pbtxt deleted file mode 100644 index b5400ecbe0..0000000000 --- a/models/triton-model-repo/phishing-bert-trt/config.pbtxt +++ /dev/null @@ -1,36 +0,0 @@ -name: "phishing-bert-trt" -platform: "tensorrt_plan" -max_batch_size: 32 - -input [ - { - name: "input_ids" - data_type: TYPE_INT32 - dims: [ 128 ] - }, - { - name: "attention_mask" - data_type: TYPE_INT32 - dims: [ 128 ] - } -] -output [ - { - name: "output" - data_type: TYPE_FP32 - dims: [ 2 ] - } -] - -dynamic_batching { - preferred_batch_size: [ 1, 4, 8, 12, 16, 20, 24, 28, 32 ] - max_queue_delay_microseconds: 50000 -} - -instance_group [ - { - count: 1 - kind: KIND_GPU - profile: ["2"] - } -] diff --git a/models/triton-model-repo/sid-minibert-trt/1/README.md b/models/triton-model-repo/sid-minibert-trt/1/README.md deleted file mode 100644 index bf171c797c..0000000000 --- a/models/triton-model-repo/sid-minibert-trt/1/README.md +++ /dev/null @@ -1,35 +0,0 @@ - - -# Generating TRT Models from ONNX - -## Prerequisites -The ONNX to TensorRT conversion utility requires additional packages, which can be installed using the following command: -```bash -conda env update --solver=libmamba -n morpheus --file conda/environments/model-utils_cuda-125_arch-x86_64.yaml -``` - - -This model in the `triton-model-repo` directory is intentionally missing the model file. This is due to the fact that TensorRT maximizes performance of models for a *particular machine*. Any pre-compiled TensorRT engine file at best would have poor performance and most likely would not even load on other machines. - -Therefore, it is best to compile a TensorRT engine file for on each machine that it will be run on. To facilitate this, Morpheus contains a utility to input an ONNX file and export the TensorRT engine file. To generate the necessary TensorRT engine file for this model, run the following from the same directory as this README: - -```bash -morpheus --log_level=info tools onnx-to-trt --input_model ../../sid-minibert-onnx/1/model.onnx --output_model ./model.plan --batches 1 8 --batches 1 16 --batches 1 32 --seq_length 256 --max_workspace_size 16000 -``` - -Note: If you get an out-of-memory error, reduce the `--max_workspace_size` argument until it will successfully run. diff --git a/models/triton-model-repo/sid-minibert-trt/config.pbtxt b/models/triton-model-repo/sid-minibert-trt/config.pbtxt deleted file mode 100644 index a2dba952d2..0000000000 --- a/models/triton-model-repo/sid-minibert-trt/config.pbtxt +++ /dev/null @@ -1,37 +0,0 @@ -name: "sid-minibert-trt" -platform: "tensorrt_plan" -max_batch_size: 32 -# default_model_filename: "sid-minibert-trt_b1-8_b1-16_b1-32.engine" - -input [ - { - name: "input_ids" - data_type: TYPE_INT32 - dims: [ 256 ] - }, - { - name: "attention_mask" - data_type: TYPE_INT32 - dims: [ 256 ] - } -] -output [ - { - name: "output" - data_type: TYPE_FP32 - dims: [ 10 ] - } -] - -dynamic_batching { - preferred_batch_size: [ 1, 4, 8, 12, 16, 20, 24, 28, 32 ] - max_queue_delay_microseconds: 50000 -} - -instance_group [ - { - count: 1 - kind: KIND_GPU - profile: ["2"] - } -]