Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

S3 external files OS Error #1083

Open
horsto opened this issue May 11, 2023 · 1 comment
Open

S3 external files OS Error #1083

horsto opened this issue May 11, 2023 · 1 comment
Labels

Comments

@horsto
Copy link
Contributor

horsto commented May 11, 2023

I am trying to retrieve external blobs saved in a Linode (Akamai) bucket (S3). This works fine most of the time, but larger blobs seem to run into errors during fetch:

---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
File ~/miniconda3/envs/octo_code/lib/python3.10/site-packages/urllib3/response.py:444, in HTTPResponse._error_catcher(self)
    443 try:
--> 444     yield
    446 except SocketTimeout:
    447     # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
    448     # there is yet no clean way to get at it from this context.

File ~/miniconda3/envs/octo_code/lib/python3.10/site-packages/urllib3/response.py:567, in HTTPResponse.read(self, amt, decode_content, cache_content)
    566 with self._error_catcher():
--> 567     data = self._fp_read(amt) if not fp_closed else b""
    568     if amt is None:

File ~/miniconda3/envs/octo_code/lib/python3.10/site-packages/urllib3/response.py:533, in HTTPResponse._fp_read(self, amt)
    531 else:
    532     # StringIO doesn't like amt=None
--> 533     return self._fp.read(amt) if amt is not None else self._fp.read()

File ~/miniconda3/envs/octo_code/lib/python3.10/http/client.py:481, in HTTPResponse.read(self, amt)
    480 try:
--> 481     s = self._safe_read(self.length)
    482 except IncompleteRead:

File ~/miniconda3/envs/octo_code/lib/python3.10/http/client.py:630, in HTTPResponse._safe_read(self, amt)
    624 """Read the number of bytes requested.
    625 
    626 This function should be used when <amt> bytes "should" be present for
    627 reading. If the bytes are truly not available (due to EOF), then the
    628 IncompleteRead exception can be used to detect the problem.
    629 """
--> 630 data = self.fp.read(amt)
    631 if len(data) < amt:

File ~/miniconda3/envs/octo_code/lib/python3.10/socket.py:705, in SocketIO.readinto(self, b)
    704 try:
--> 705     return self._sock.recv_into(b)
    706 except timeout:

OSError: [Errno 22] Invalid argument

During handling of the above exception, another exception occurred:

ProtocolError                             Traceback (most recent call last)
Cell In[34], line 1
----> 1 data_to_export = (CuttleHeadMantle & key).fetch1(data2retrieve)
      2 additional_data = (CuttleHeadMantle.Behavior & key).fetch1(additional_data_label)
      4 len(data_to_export)

File ~/miniconda3/envs/octo_code/lib/python3.10/site-packages/datajoint/fetch.py:346, in Fetch1.__call__(self, squeeze, download_path, *attrs)
    344 else:  # fetch some attributes, return as tuple
    345     attributes = [a for a in attrs if not is_key(a)]
--> 346     result = self._expression.proj(*attributes).fetch(
    347         squeeze=squeeze, download_path=download_path, format="array"
    348     )
    349     if len(result) != 1:
    350         raise DataJointError(
    351             "fetch1 should only return one tuple. %d tuples found" % len(result)
    352         )

File ~/miniconda3/envs/octo_code/lib/python3.10/site-packages/datajoint/fetch.py:289, in Fetch.__call__(self, offset, limit, order_by, format, as_dict, squeeze, download_path, *attrs)
    286     raise e
    287 for name in heading:
    288     # unpack blobs and externals
--> 289     ret[name] = list(map(partial(get, heading[name]), ret[name]))
    290 if format == "frame":
    291     ret = pandas.DataFrame(ret).set_index(heading.primary_key)

File ~/miniconda3/envs/octo_code/lib/python3.10/site-packages/datajoint/fetch.py:111, in _get(connection, attr, data, squeeze, download_path)
    103         safe_write(local_filepath, data.split(b"\0", 1)[1])
    104     return adapt(str(local_filepath))  # download file from remote store
    106 return adapt(
    107     uuid.UUID(bytes=data)
    108     if attr.uuid
    109     else (
    110         blob.unpack(
--> 111             extern.get(uuid.UUID(bytes=data)) if attr.is_external else data,
    112             squeeze=squeeze,
    113         )
    114         if attr.is_blob
    115         else data
    116     )
    117 )

File ~/miniconda3/envs/octo_code/lib/python3.10/site-packages/datajoint/external.py:203, in ExternalTable.get(self, uuid)
    201 if blob is None:
    202     try:
--> 203         blob = self._download_buffer(self._make_uuid_path(uuid))
    204     except MissingExternalFile:
    205         if not SUPPORT_MIGRATED_BLOBS:

File ~/miniconda3/envs/octo_code/lib/python3.10/site-packages/datajoint/external.py:142, in ExternalTable._download_buffer(self, external_path)
    140 def _download_buffer(self, external_path):
    141     if self.spec["protocol"] == "s3":
--> 142         return self.s3.get(external_path)
    143     if self.spec["protocol"] == "file":
    144         return Path(external_path).read_bytes()

File ~/miniconda3/envs/octo_code/lib/python3.10/site-packages/datajoint/s3.py:71, in Folder.get(self, name)
     69 logger.debug("get: {}:{}".format(self.bucket, name))
     70 try:
---> 71     return self.client.get_object(self.bucket, str(name)).data
     72 except minio.error.S3Error as e:
     73     if e.code == "NoSuchKey":

File ~/miniconda3/envs/octo_code/lib/python3.10/site-packages/urllib3/response.py:306, in HTTPResponse.data(self)
    303     return self._body
    305 if self._fp:
--> 306     return self.read(cache_content=True)

File ~/miniconda3/envs/octo_code/lib/python3.10/site-packages/urllib3/response.py:566, in HTTPResponse.read(self, amt, decode_content, cache_content)
    563 flush_decoder = False
    564 fp_closed = getattr(self._fp, "closed", False)
--> 566 with self._error_catcher():
    567     data = self._fp_read(amt) if not fp_closed else b""
    568     if amt is None:

File ~/miniconda3/envs/octo_code/lib/python3.10/contextlib.py:153, in _GeneratorContextManager.__exit__(self, typ, value, traceback)
    151     value = typ()
    152 try:
--> 153     self.gen.throw(typ, value, traceback)
    154 except StopIteration as exc:
    155     # Suppress StopIteration *unless* it's the same exception that
    156     # was passed to throw().  This prevents a StopIteration
    157     # raised inside the "with" statement from being suppressed.
    158     return exc is not value

File ~/miniconda3/envs/octo_code/lib/python3.10/site-packages/urllib3/response.py:461, in HTTPResponse._error_catcher(self)
    457     raise ReadTimeoutError(self._pool, None, "Read timed out.")
    459 except (HTTPException, SocketError) as e:
    460     # This includes IncompleteRead.
--> 461     raise ProtocolError("Connection broken: %r" % e, e)
    463 # If no exception is thrown, we should avoid cleaning up
    464 # unnecessarily.
    465 clean_exit = True

ProtocolError: ("Connection broken: OSError(22, 'Invalid argument')", OSError(22, 'Invalid argument'))

Reproducibility

Include:

  • OS: MacOS 13.3.1
  • Python 3.10
  • MySQL 8.0
  • MySQL Deployment Strategy: Docker
  • Datajoint python 0.13.8
@horsto horsto added the bug label May 11, 2023
@horsto
Copy link
Contributor Author

horsto commented May 11, 2023

Opened a new issue on minio python api minio/minio-py#1280

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

No branches or pull requests

1 participant