Skip to content

Commit

Permalink
All resources as externally hosted
Browse files Browse the repository at this point in the history
  • Loading branch information
blagojabozinovski committed May 16, 2024
1 parent d15f28b commit 424fbc4
Showing 1 changed file with 27 additions and 21 deletions.
48 changes: 27 additions & 21 deletions ckanext/archiver/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,9 +258,10 @@ def _save(status_id, exception, resource, url_redirected_to=None,

if resource.get('url_type') == 'upload':
upload = uploader.get_resource_uploader(resource)
filepath = upload.get_path(resource['id'])
filepath = upload.get_bucket_path(resource['id'])

hosted_externally = not url.startswith(config['ckan.site_url']) or urlparse(filepath).scheme != ''
# hosted_externally = not url.startswith(config['ckan.site_url']) or urlparse(filepath).scheme != ''
hosted_externally = True
# if resource.get('resource_type') == 'file.upload' and not hosted_externally:
if not hosted_externally:
log.info("Won't attemp to archive resource uploaded locally: %s" % resource['url'])
Expand Down Expand Up @@ -589,18 +590,30 @@ def archive_resource(context, resource, log, result=None, url_timeout=30):
log.error('chmod failed %s: %s', saved_file, e)
raise
log.info('Archived resource as: %s', saved_file)

# upload the file to s3
s3_file_path = saved_file
s3_file_path = s3_file_path.replace(storage_path, "archiver")
blob = bucket.blob(s3_file_path)
blob.upload_from_filename(saved_file)

# delete the file from archive storage
if os.path.isfile(saved_file):
os.remove(saved_file)
# save in database for cachefile_path the path from S3 storage
saved_file = s3_file_path

if resource['url_type'] != 'upload':
# upload the file to s3
s3_file_path = saved_file
s3_file_path = s3_file_path.replace(storage_path, "archiver")
blob = bucket.blob(s3_file_path)
blob.upload_from_filename(saved_file)

# delete the file from archive storage
if os.path.isfile(saved_file):
os.remove(saved_file)
# save in database for cachefile_path the path from S3 storage
saved_file = s3_file_path

# calculate the cache_url for link resource
if not context.get('cache_url_root'):
log.warning('Not saved cache_url because no value for '
'ckanext-archiver.cache_url_root in config')
raise ArchiveError(_('No value for ckanext-archiver.cache_url_root in config'))
cache_url = urljoin(str(context['cache_url_root']),
'%s/%s' % (str(relative_archive_path), str(file_name)))
else:
# cache_url for uploaded resource is the same as download url
cache_url = resource['url']

else:
shutil.move(result['saved_file'], saved_file)
Expand All @@ -612,13 +625,6 @@ def archive_resource(context, resource, log, result=None, url_timeout=30):
raise
log.info('Archived resource as: %s', saved_file)

# calculate the cache_url
if not context.get('cache_url_root'):
log.warning('Not saved cache_url because no value for '
'ckanext-archiver.cache_url_root in config')
raise ArchiveError(_('No value for ckanext-archiver.cache_url_root in config'))
cache_url = urljoin(str(context['cache_url_root']),
'%s/%s' % (str(relative_archive_path), str(file_name)))
return {'cache_filepath': saved_file,
'cache_url': cache_url}

Expand Down

0 comments on commit 424fbc4

Please sign in to comment.