Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integration with minio doesn't work #265

Open
crus8319 opened this issue Nov 27, 2024 · 4 comments
Open

Integration with minio doesn't work #265

crus8319 opened this issue Nov 27, 2024 · 4 comments

Comments

@crus8319
Copy link

crus8319 commented Nov 27, 2024

ClearML is deployed in docker, the contents of the yml file are:

services:

  apiserver:
    command:
    - apiserver
    container_name: clearml-apiserver
    image: allegroai/clearml:latest
    restart: unless-stopped
    volumes:
    - /opt/clearml/logs:/var/log/clearml
    - /opt/clearml/config:/opt/clearml/config
    - /opt/clearml/data/fileserver:/mnt/fileserver
    depends_on:
      - redis
      - mongo
      - elasticsearch
      - fileserver
    environment:
      CLEARML_ELASTIC_SERVICE_HOST: elasticsearch
      CLEARML_ELASTIC_SERVICE_PORT: 9200
      CLEARML_MONGODB_SERVICE_HOST: mongo
      CLEARML_MONGODB_SERVICE_PORT: 27017
      CLEARML_REDIS_SERVICE_HOST: redis
      CLEARML_REDIS_SERVICE_PORT: 6379
      CLEARML_SERVER_DEPLOYMENT_TYPE: linux
      CLEARML__apiserver__pre_populate__enabled: "true"
      CLEARML__apiserver__pre_populate__zip_files: "/opt/clearml/db-pre-populate"
      CLEARML__apiserver__pre_populate__artifacts_path: "/mnt/fileserver"
      CLEARML__services__async_urls_delete__enabled: "true"
      CLEARML__services__async_urls_delete__fileserver__url_prefixes: "[${CLEARML_FILES_HOST:-}]"
      CLEARML__secure__credentials__services_agent__user_key: ${CLEARML_AGENT_ACCESS_KEY:-}
      CLEARML__secure__credentials__services_agent__user_secret: ${CLEARML_AGENT_SECRET_KEY:-}
    ports:
    - "8008:8008"
    networks:
      - backend
      - frontend

  elasticsearch:
    networks:
      - backend
    container_name: clearml-elastic
    environment:
      bootstrap.memory_lock: "true"
      cluster.name: clearml
      cluster.routing.allocation.node_initial_primaries_recoveries: "500"
      cluster.routing.allocation.disk.watermark.low: 500mb
      cluster.routing.allocation.disk.watermark.high: 500mb
      cluster.routing.allocation.disk.watermark.flood_stage: 500mb
      discovery.type: "single-node"
      http.compression_level: "7"
      node.name: clearml
      reindex.remote.whitelist: "'*.*'"
      xpack.security.enabled: "false"
    ulimits:
      memlock:
        soft: -1
        hard: -1
      nofile:
        soft: 65536
        hard: 65536
    image: ghcr.io/hirotasoshu/elasticsearch:7.17.7
    restart: unless-stopped
    volumes:
      - /opt/clearml/data/elastic_7:/usr/share/elasticsearch/data
      - /usr/share/elasticsearch/logs

  fileserver:
    networks:
      - backend
      - frontend
    command:
    - fileserver
    container_name: clearml-fileserver
    image: allegroai/clearml:latest
    environment:
      CLEARML__fileserver__delete__allow_batch: "true"
    restart: unless-stopped
    volumes:
    - /opt/clearml/logs:/var/log/clearml
    - /opt/clearml/data/fileserver:/mnt/fileserver
    - /opt/clearml/config:/opt/clearml/config
    ports:
    - "8081:8081"

  mongo:
    networks:
      - backend
    container_name: clearml-mongo
    image: mongo:4.4.29
    restart: unless-stopped
    command: --setParameter internalQueryMaxBlockingSortMemoryUsageBytes=196100200
    volumes:
    - /opt/clearml/data/mongo_4/db:/data/db
    - /opt/clearml/data/mongo_4/configdb:/data/configdb

  redis:
    networks:
      - backend
    container_name: clearml-redis
    image: redis:6.2
    restart: unless-stopped
    volumes:
    - /opt/clearml/data/redis:/data

  webserver:
    command:
    - webserver
    container_name: clearml-webserver
    # environment:
    #  CLEARML_SERVER_SUB_PATH : clearml-web # Allow Clearml to be served with a URL path prefix.
    image: allegroai/clearml:latest
    restart: unless-stopped
    depends_on:
      - apiserver
    ports:
    - "8080:80"
    networks:
      - backend
      - frontend

  async_delete:
    depends_on:
      - apiserver
      - redis
      - mongo
      - elasticsearch
      - fileserver
    container_name: async_delete
    image: allegroai/clearml:latest
    networks:
      - backend
    restart: unless-stopped
    environment:
      CLEARML_ELASTIC_SERVICE_HOST: elasticsearch
      CLEARML_ELASTIC_SERVICE_PORT: 9200
      CLEARML_MONGODB_SERVICE_HOST: mongo
      CLEARML_MONGODB_SERVICE_PORT: 27017
      CLEARML_REDIS_SERVICE_HOST: redis
      CLEARML_REDIS_SERVICE_PORT: 6379
      PYTHONPATH: /opt/clearml/apiserver
      CLEARML__services__async_urls_delete__fileserver__url_prefixes: "[${CLEARML_FILES_HOST:-}]"
    entrypoint:
      - python3
      - -m
      - jobs.async_urls_delete
      - --fileserver-host
      - http://localhost:8081
    volumes:
      - /opt/clearml/logs:/var/log/clearml
      - /opt/clearml/config:/opt/clearml/config

  agent-services:
    networks:
      - backend
    container_name: clearml-agent-services
    image: allegroai/clearml-agent-services:latest
    deploy:
      restart_policy:
        condition: on-failure
    privileged: true
    environment:
      CLEARML_HOST_IP: 127.0.0.1
      CLEARML_WEB_HOST: http://localhost:8080
      CLEARML_API_HOST: http://localhost:8008
      CLEARML_FILES_HOST: http://localhost:8081
      CLEARML_API_ACCESS_KEY: ${CLEARML_AGENT_ACCESS_KEY:-$CLEARML_API_ACCESS_KEY}
      CLEARML_API_SECRET_KEY: ${CLEARML_AGENT_SECRET_KEY:-$CLEARML_API_SECRET_KEY}
      CLEARML_AGENT_GIT_USER: ${CLEARML_AGENT_GIT_USER}
      CLEARML_AGENT_GIT_PASS: ${CLEARML_AGENT_GIT_PASS}
      CLEARML_AGENT_UPDATE_VERSION: ${CLEARML_AGENT_UPDATE_VERSION:->=0.17.0}
      CLEARML_AGENT_DEFAULT_BASE_DOCKER: "ubuntu:18.04"
      #AWS_ACCESS_KEY_ID: 2S6GmLJT6zxqAMk9ryyw
      #AWS_SECRET_ACCESS_KEY: QGs72qh7ffNaoy0NtELrywZfoqSTiXG68s6wqyJk
      CLEARML_WORKER_ID: "clearml-default"
      CLEARML_AGENT_DOCKER_HOST_MOUNT: "/opt/clearml/agent:/root/.clearml"
      SHUTDOWN_IF_NO_ACCESS_KEY: 1
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock
      - /opt/clearml/agent:/root/.clearml
    depends_on:
      - apiserver
    entrypoint: >
      bash -c "curl --retry 10 --retry-delay 10 --retry-connrefused 'http://10.10.196.25:8008/debug.ping' && /usr/agent/entrypoint.sh"

networks:
  backend:
    driver: bridge
  frontend:
    driver: bridge

Contents of clearml.conf:

    aws {
        s3 {
            # S3 credentials, used for read/write access by various SDK elements

            # The following settings will be used for any bucket not specified below in the "credentials" section
            # ---------------------------------------------------------------------------------------------------
            #region: ""
            # Specify explicit keys
            #key: ""
            #secret: ""
            # Or enable credentials chain to let Boto3 pick the right credentials.
            # This includes picking credentials from environment variables,
            # credential file and IAM role using metadata service.
            # Refer to the latest Boto3 docs
            use_credentials_chain: false
            # Additional ExtraArgs passed to boto3 when uploading files. Can also be set per-bucket under "credentials".
            extra_args: {}
            # ---------------------------------------------------------------------------------------------------


            credentials: [
                # specifies key/secret credentials to use when handling s3 urls (read or write)
                # {
                      bucket: "clearml"
                      key: "2S6GmLJT6zxqAMk9ryyw"
                      secret: "QGs72qh7ffNaoy0NtELrywZfoqSTiXG68s6wqyJk"
                # },
                # {
                #     # This will apply to all buckets in this host (unless key/value is specifically provided for a given bucket)
                      host: "http://10.10.199.14:9001"
                      multipart: false
                      secure: false
                # }
            ]
        }

Uploading task's artifacts doesn't work. I tried different options for specifying the url:

output_url = minio_url/clearml: same as above
output_url = http://minio_url/clearml
output_url = s3://clearml
output_url = s3://minio_url/clearml
output_url = s3://minio_url:9001/clearml
output_url = http://minio_url:9001/clearml

Sample task:


# Initializing a task

task = Task.init(
    project_name="Test MinIO via ClearML",
    task_name="Upload File to MinIO",
    output_uri="s3://10.10.199.14/clearml/"
)

# Uoload file
file_path = "test_upload_clearml.txt"
task.upload_artifact(name="test_file", artifact_object=file_path)
print(f"File '{file_path}' successfully loaded via ClearML")
task.close()`

Minio definitely works, I checked it through the minio client from the clearml virtual machine. I would be grateful for any help.

@jkhenning
Copy link
Member

@crus8319 I think your clearml.conf is invalid, it should be:

    aws {
        s3 {
            # S3 credentials, used for read/write access by various SDK elements

            # The following settings will be used for any bucket not specified below in the "credentials" section
            # ---------------------------------------------------------------------------------------------------
            #region: ""
            # Specify explicit keys
            #key: ""
            #secret: ""
            # Or enable credentials chain to let Boto3 pick the right credentials.
            # This includes picking credentials from environment variables,
            # credential file and IAM role using metadata service.
            # Refer to the latest Boto3 docs
            use_credentials_chain: false
            # Additional ExtraArgs passed to boto3 when uploading files. Can also be set per-bucket under "credentials".
            extra_args: {}
            # ---------------------------------------------------------------------------------------------------


            credentials: [
                # specifies key/secret credentials to use when handling s3 urls (read or write)
                {
                      host: "10.10.199.14:9001"
                      bucket: "clearml"
                      key: "2S6GmLJT6zxqAMk9ryyw"
                      secret: "QGs72qh7ffNaoy0NtELrywZfoqSTiXG68s6wqyJk"
                      multipart: false
                      secure: false
                }
            ]
        }

i.e. the contents of credentials should be a list of dictionaries (you've commented out the curly brackets for some reason), and the host definition should not contain a scheme (i.e. without http)

@crus8319
Copy link
Author

@crus8319Я думаю, что ваш clearml.conf недействителен, он должен быть таким:

    aws {
        s3 {
            # S3 credentials, used for read/write access by various SDK elements

            # The following settings will be used for any bucket not specified below in the "credentials" section
            # ---------------------------------------------------------------------------------------------------
            #region: ""
            # Specify explicit keys
            #key: ""
            #secret: ""
            # Or enable credentials chain to let Boto3 pick the right credentials.
            # This includes picking credentials from environment variables,
            # credential file and IAM role using metadata service.
            # Refer to the latest Boto3 docs
            use_credentials_chain: false
            # Additional ExtraArgs passed to boto3 when uploading files. Can also be set per-bucket under "credentials".
            extra_args: {}
            # ---------------------------------------------------------------------------------------------------


            credentials: [
                # specifies key/secret credentials to use when handling s3 urls (read or write)
                {
                      host: "10.10.199.14:9001"
                      bucket: "clearml"
                      key: "2S6GmLJT6zxqAMk9ryyw"
                      secret: "QGs72qh7ffNaoy0NtELrywZfoqSTiXG68s6wqyJk"
                      multipart: false
                      secure: false
                }
            ]
        }

т.е. содержимое credentialsдолжно быть списком словарей (вы зачем-то закомментировали фигурные скобки), а определение хоста не должно содержать схему (т.е. без http)

Thank you for your help. I changed the y3 section in clearml.conf to the one you provided. But now when performing a test task of loading a file on minio, an error appears:

ClearML Task: overwriting (reusing) task id=156ffa6693574436a0365a731803fa85
2024-11-27 16:02:04,150 - clearml.storage - ERROR - Failed creating storage object s3://clearml Reason: Missing key and secret for S3 storage access (s3://clearml)
Traceback (most recent call last):
  File "/home/kipadmin/test_clearml_upload.py", line 4, in <module>
    task = Task.init(
           ^^^^^^^^^^
  File "/root/.local/lib/python3.11/site-packages/clearml/task.py", line 629, in init
    task.output_uri = output_uri
    ^^^^^^^^^^^^^^^
  File "/root/.local/lib/python3.11/site-packages/clearml/task.py", line 1369, in output_uri
    raise ValueError("Could not get access credentials for '{}' "
ValueError: Could not get access credentials for 's3://clearml/' , check configuration file ~/clearml.conf

Test task code:

from clearml import Task

# Task init
task = Task.init(
    project_name="Test MinIO via ClearML",
    task_name="Upload File to MinIO",
    output_uri="s3://clearml/"
)

# Upload file
file_path = "test_upload_clearml.txt"
task.upload_artifact(name="test_file", artifact_object=file_path)
print(f"File '{file_path}' successfully uploaded via ClearML.")
task.close()

Should I specify the data for connecting to minio before performing the test task of file transfer? I run the task from the Linux console, do I understand correctly that in this case ClearML SDK is used?

@ainoam
Copy link
Collaborator

ainoam commented Nov 27, 2024

@crus8319 Seems like you're missing the host part of the output URI i.e. output_uri="s3://10.10.199.14:9001/clearml/"?

@crus8319
Copy link
Author

@crus8319 Seems like you're missing the host part of the output URI i.e. output_uri="s3://10.10.199.14:9001/clearml/"?

This type of writing produces the following errors when performing a task:

ClearML Task: overwriting (reusing) task id=156ffa6693574436a0365a731803fa85
2024-11-27 17:09:30,811 - clearml.Task - INFO - No repository found, storing script code instead
2024-11-27 17:09:31,040 - clearml.storage - ERROR - Failed uploading: An error occurred (InvalidArgument) when calling the PutObject operation: S3 API Requests must be made to API port.
Traceback (most recent call last):
  File "/root/.local/lib/python3.11/site-packages/clearml/storage/helper.py", line 2820, in check_write_permissions
    self.delete(path=dest_path)
  File "/root/.local/lib/python3.11/site-packages/clearml/storage/helper.py", line 2805, in delete
    return self._driver.delete_object(self.get_object(path), silent=silent)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/root/.local/lib/python3.11/site-packages/clearml/storage/helper.py", line 630, in delete_object
    object.delete()
  File "/root/.local/lib/python3.11/site-packages/boto3/resources/factory.py", line 581, in do_action
    response = action(self, *args, **kwargs)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/root/.local/lib/python3.11/site-packages/boto3/resources/action.py", line 88, in __call__
    response = getattr(parent.meta.client, operation_name)(*args, **params)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/root/.local/lib/python3.11/site-packages/botocore/client.py", line 569, in _api_call
    return self._make_api_call(operation_name, kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/root/.local/lib/python3.11/site-packages/botocore/client.py", line 1023, in _make_api_call
    raise error_class(parsed_response, operation_name)
botocore.exceptions.ClientError: An error occurred () when calling the DeleteObject operation:

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/kipadmin/test_clearml_upload.py", line 4, in <module>
    task = Task.init(
           ^^^^^^^^^^
  File "/root/.local/lib/python3.11/site-packages/clearml/task.py", line 629, in init
    task.output_uri = output_uri
    ^^^^^^^^^^^^^^^
  File "/root/.local/lib/python3.11/site-packages/clearml/task.py", line 1371, in output_uri
    helper.check_write_permissions(value)
  File "/root/.local/lib/python3.11/site-packages/clearml/storage/helper.py", line 2822, in check_write_permissions
    raise ValueError("Insufficient permissions (delete failed) for {}".format(base_url))
ValueError: Insufficient permissions (delete failed) for s3://10.10.199.14:9001/clearml/

I also refer to this https://github.com/allegroai/clearml/issues/391 discussion thread, which states that it is correct to pass the task URL as s3 ://s3_bucket_name/

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants