diff --git a/.gitignore b/.gitignore index c43f3e98..2397c28d 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ *.swo *.swp .DS_Store +.env # Import artifacts *_import @@ -12,6 +13,9 @@ sfm_pc/management/commands/data/ country_data/ wwic_download.zip +data/wwic_download/countries/*.csv +data/wwic_download/countries/*.geojson +data/wwic_download/metadata/*.pdf /staticfiles bin/ diff --git a/README.md b/README.md index 6e00c486..5d3a78d9 100644 --- a/README.md +++ b/README.md @@ -322,7 +322,7 @@ docker-compose run --rm app ./manage.py update_composition_index --recreate ``` #### data_archive config -If you need to work with the `data_archive` make recipe, `cp .env. s3.example .env` and add your AWS access tokens. These tokens must be for an IAM user attached to the correct policy for accessing the `wwic-data-archive-staging` S3 bucket. This bucket is configured to work in local development and on the staging server. +If you need to work with the `data_archive` make recipe in your local development environment, `cp .env.s3.example .env` and add your AWS access tokens. These tokens must be for an IAM user attached to the correct policy for accessing the `wwic-data-archive-staging` S3 bucket. This bucket is configured to work in local development and on the staging server. To create an archive locally, run `docker-compose --env-file .env.s3 run --rm app make data_archive` so you can upload the zip archive to S3. Once this is done, the "download" link at the `localhost:8000/en/download/` should work. diff --git a/data/processors/blank_columns.py b/data/processors/blank_columns.py new file mode 100644 index 00000000..60c00686 --- /dev/null +++ b/data/processors/blank_columns.py @@ -0,0 +1,34 @@ +import csv +import sys +import argparse + +# init arg parser +parser = argparse.ArgumentParser() +parser.add_argument('--entity', type=str, required=True) +args = parser.parse_args() + +# init the incoming data as a dict reader +reader = csv.DictReader(sys.stdin) + +# write data to stdout +stdout_csv = csv.DictWriter(sys.stdout, fieldnames=reader.fieldnames) +stdout_csv.writeheader() + +for row in reader: + comment_key = f'{args.entity}:comments:admin' + comments = row.get(comment_key) + + if comments: + row.update({ + comment_key: '' + }) + + owner_key = f'{args.entity}:owner:admin' + owner = row.get(owner_key) + + if owner: + row.update({ + owner_key: '' + }) + + stdout_csv.writerow(row) diff --git a/data/wwic_download/metadata/README.md b/data/wwic_download/metadata/README.md new file mode 100644 index 00000000..9b56e8d8 --- /dev/null +++ b/data/wwic_download/metadata/README.md @@ -0,0 +1,3 @@ +# Who Was in Command data archive + +tk \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 2642d316..ed835e58 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -27,6 +27,8 @@ services: - PG_HOST=${PG_HOST} - PG_USER=${PG_USER} - PG_PASSWORD=${PG_PASSWORD} + - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} + - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} command: python manage.py runserver 0.0.0.0:8000 migration: diff --git a/docket.mk b/docket.mk index 1ab2ac88..0c81dcb4 100644 --- a/docket.mk +++ b/docket.mk @@ -1,35 +1,69 @@ -.PHONY: sfm_pc/management/commands/country_data +.PHONY: sfm_pc/management/commands/country_data data/wwic_download/countries -%_import : %.csv sfm_pc/management/commands/country_data +DATA_ARCHIVE_BUCKET := $(shell cat configs/s3_config.json | jq -r '.data_archive_bucket') + +data_archive : wwic_download.zip + aws s3 cp $< s3://$(DATA_ARCHIVE_BUCKET)/ + +.PHONY: wwic_download.zip +wwic_download.zip : filtered_data data/wwic_download/metadata/sfm_research_handbook.pdf + cd data/wwic_download && zip -r ../../$@ . + +COUNTRY_NAMES=$(shell perl -pe "s/,/ /g" import_docket.csv | cut -d' ' -f5) +ENTITIES=units.csv persons.csv incidents.csv locations.csv locations.geojson sources.csv + +.PHONY : filtered_data +filtered_data: $(foreach country,$(COUNTRY_NAMES),$(patsubst %,data/wwic_download/countries/$(country)_%,$(ENTITIES))) + echo "filtered csvs for entities" + +define filter_entity_data + $(shell csvgrep --columns $(1):status:admin --match 3 $< | \ + python data/processors/blank_columns.py --entity $(1) > $@) +endef + +data/wwic_download/countries/%_units.csv : sfm_pc/management/commands/country_data/countries/%/units.csv + $(call filter_entity_data,unit) + +data/wwic_download/countries/%_persons.csv : sfm_pc/management/commands/country_data/countries/%/persons.csv + $(call filter_entity_data,person) + +data/wwic_download/countries/%_incidents.csv : sfm_pc/management/commands/country_data/countries/%/incidents.csv + $(call filter_entity_data,incident) + +data/wwic_download/countries/%_sources.csv : sfm_pc/management/commands/country_data/countries/%/sources.csv + $(call filter_entity_data,source) + +data/wwic_download/countries/%_locations.csv : sfm_pc/management/commands/country_data/countries/%/locations.csv + cp $< $@ + +data/wwic_download/countries/%_locations.geojson : sfm_pc/management/commands/country_data/countries/%/locations.geojson + cp $< $@ + +.PHONY : data/wwic_download/metadata/sfm_research_handbook.pdf +data/wwic_download/metadata/sfm_research_handbook.pdf : + curl -o $@ https://help.securityforcemonitor.org/_/downloads/en/latest/pdf/ + +%_import : %.csv sfm_pc/management/commands/country_data/countries perl -pe "s/,/ /g" $< | \ xargs -L1 bash -c ' \ echo "Loading data for country code $$3" && (\ python -u manage.py import_country_data \ --country_code $$3 \ - --country_path $(word 2, $^)/countries/$$3 \ + --country_path $(word 2, $^)/$$4 \ --sources_path $(word 2, $^)/sources.csv || \ exit 255 \ )' -DATA_ARCHIVE_BUCKET := $(shell cat configs/s3_config.json | jq -r '.data_archive_bucket') - -data_archive : wwic_download.zip - aws s3 cp $< s3://$(DATA_ARCHIVE_BUCKET)/ - -wwic_download.zip : sfm_pc/management/commands/country_data - # move into the target directory, zip to the root dir - cd $< && zip -r ../../../../$@ . - -sfm_pc/management/commands/country_data : import_docket.csv +sfm_pc/management/commands/country_data/countries : import_docket.csv perl -pe "s/,/ /g" $< | \ xargs -L1 bash -c ' \ - echo "Importing data for country code $$3" && (\ + echo "Importing data for country $$4" && (\ python -u manage.py download_country_data \ --sources_doc_id $$0 \ --location_doc_id $$1 \ --entity_doc_id $$2 \ - --country_code $$3 \ - --parent_directory $@ || \ + --country_name $$4 \ + --target_directory $@ || \ exit 255 \ )' diff --git a/fixtures/import_docket.csv b/fixtures/import_docket.csv index 021d511f..0d2f9437 100644 --- a/fixtures/import_docket.csv +++ b/fixtures/import_docket.csv @@ -1,26 +1,26 @@ -source_document_id,location_document_id,entitity_document_id,sfm:iso -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1ztOfGaQT3WDrq-BOjT0x5VErzgrWQ0Ku,19Hk6OD5AYjWQCUaTWPzkGm9sLqp4e5v_n667M0CgaQQ,ae -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1HpIjYaH_iMCRQD1jP159VGz-2NL4nB5p,1n9NZXDwr9gH6qT1k8-zNz34aq2MncG1RAiGHRfUHfAc,bd -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1j8KgLnpjlnLy6bN4ozkwnBpkHUI6i3si,1IN8uZeR7WJbAmgPkY_QzzIir_cuba_Irzd_FtceUGyQ,bf -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1mjTLMZ1la3zyiVQxLZ56sW497Sp8Lh5m,1tJXt33b5yNJajfTh2j6oXyWAduNaBRTy-XZON5MwspE,bh -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1qZoQciglG1DOeEa3hh5iUvF7q4_bKOQl,1d0YaTdc1Esj0MVncM6cyhch6oT2VkWm7z_K4xEZELDs,eg -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1eZHw6k7xM7Z_ZNMnF0Wc5cjJuVyuOop3,1KCiLecZiMCzD7JXY4r_cYgcjr54aG3CnMOOsrrGfMoQ,jo -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1vnWgeTkq5TUyF7555F4renMJnl4WiFNy,1k9XBHbLM3mGDVpokgmV5H8T0anOSsuWVRX_5PeQ_1ik,kw -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,13XnZOF7U0uwL3EP_QpdTVd1FUh3A3cwi,1CGvLuzRIBmuzKGknc3mUWISVLx9OwlpN22z6nFw3cLY,lr -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1fnTq_ONVxzVBbCyQy_-s6ngmadA6st70,1swzBv27-BOlMEcW3bW0MbS0nK30ra0eZjKXv8hQAEpQ,ma -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1syUOihIFKzk6QsLXd7XNUZIwzZZfAqEH,1sBW4eudBid8kOt-48XW6QQxNnR1NdXfoESudFyv0vMo,ml -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1dU24WM8MAnqApFcBoYCiKPzPevebal6_,1RNAVrz9d4E8mP_k901aKBuKqcD2C-UxEk7-feQjyRgA,mm -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,17Zqej6mrCT3BBBKcKj7949qHyRCa-9SJ,1yv5Gt4TYFplbNHLg7ZJsb7ZVPfCE2xD1GINm2e52Wgs,mr -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1nVWV5_1kGDwyWJ3PPqExKfchs3sAlEuh,1GKJIanAuch2j64-HDqInvfG46dWhULswPQNYbCC97y0,mx -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1qlHquI9EDz2lteBcjz_MheNLspg3mp_q,1KoVVqaPcuNz3Y7GLns8n3BnewwuNSxLK-qbY1dfMhG8,ne -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1boFMPurqyxbfYBvfisRiROmzr8TuPI8j,1UgD7O9e5HSpj60tT4p73i0k2mMdoI8jtEZLwTUdF148,ng -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1nMXXtFwJ3TqeynpKSW11uYAzihSMV8So,1cI_ahU1yEVQdHyhtrxUQ2LccaHNu9uOwwk_diclZgHc,np -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,19o8a1zwxUEYFxvZkqs2AwCyIm0oe_CPF,1BZWTt_ukmo9HxVvf9atN-9g_W7j57K3Q_mms0xAmN4U,ph -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1x4LjK_UWdxZm5EyNCupj7ikv7E-WMrkd,1uZtsbPTX9NVBrOCHLtdCum-2c2N9w4ALRGeo5miKCWE,qa -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1wSzKP9bsXB9w4U8frj4Y5kHrfV3C6Vi8,1PpAqUl5ijUAM_cHgxTIalMwsDnauEf2xojGSLQhGb1U,rw -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1FLls5iHQD71Omy4VEzXYQ4HacMubzg8v,1iVA0il4EkqB6HGuPhgjG_ZFZ9-51Em8LKG_BfrusaRo,sa -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1tNS4yJYlh265zDy9rQnjAZSqSmBZxrRh,1pxX5spQL1oe7fcJHX-2jg-Q2bGe5T9HrOVUN528HEaI,sd -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1l3NE8P8Xi-1qGwqZcdVdvV3Hn1h4Bwjv,1YC3W5Q2EGwq0dPOuZn7rUu9nMKr5j9MrThBv2T8TD6M,sl -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1yPQVFwbQ4edUfBhgAbL2o9DAOljZigTF,1KiMws3gla6Jc82CiGqgOXYyEg0Rvl8AefWznF6V6IKc,td -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1cyrCvMKVRHJtpQtcbTpoboJc9iNZ-oHy,1FZ3WqAlNpBNFv7zWOTFqUC45FXE_zqLvYQzMP3RCiL0,ug -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1Ii31JX8y2InKt-FnHK-6kaqVK41XBOzY,1jBEXeS4Zz01afYLkm_NlL05dkW665K2UVCGYPQCsahY,ye \ No newline at end of file +source_document_id,location_document_id,entitity_document_id,sfm:iso,sfm:country_name +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1ztOfGaQT3WDrq-BOjT0x5VErzgrWQ0Ku,1Ck11zLFVP6iJZFAR0_Xsq0UaeEJrmFl7ysbFX9mGu7c,ae,united-arab-emirates +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1HpIjYaH_iMCRQD1jP159VGz-2NL4nB5p,1EqAi59wjE1v-bYX3cC1qdl6zkThpWJ8YcvSPUC-RGHc,bd,bangladesh +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1j8KgLnpjlnLy6bN4ozkwnBpkHUI6i3si,1wBmSuTkoEhosDzfHtyvZqd9SKez-sWoPoJ9oPonWsSo,bf,burkina-faso +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1mjTLMZ1la3zyiVQxLZ56sW497Sp8Lh5m,1c0O2XlwSpTAtB0AdhkkdgevWbsBUxvsmsETUwPPVIlk,bh,bahrain +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1qZoQciglG1DOeEa3hh5iUvF7q4_bKOQl,1cZVy2PUAzeq2xOoLRLwL9z9mqbry32zv_XY7sjEih2c,eg,egypt +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1eZHw6k7xM7Z_ZNMnF0Wc5cjJuVyuOop3,1CKkNsXwRdwXDiOldwT-6baw9DayXA2Vsn4ttpwP9SuM,jo,jordan +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1vnWgeTkq5TUyF7555F4renMJnl4WiFNy,1Y6-9-9kai-YyK1pXvcv_W6fqUn9lORltUhuFc2YUu1I,kw,kuwait +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,13XnZOF7U0uwL3EP_QpdTVd1FUh3A3cwi,1aGbMvFHzGn9ZlKKcFhiQ2c9egsoGDH11QBgyqmhS-IM,lr,liberia +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1fnTq_ONVxzVBbCyQy_-s6ngmadA6st70,16962grIJlisFbh2Zp9kBAhv6jVnZz6bHgb6RGBUHd3o,ma,morocco +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1syUOihIFKzk6QsLXd7XNUZIwzZZfAqEH,1UcgoJ_ytS-WSWl2_5OuV9h92wSCBWRFBoDtr4Ztqt14,ml,mali +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1dU24WM8MAnqApFcBoYCiKPzPevebal6_,1vwb7ENaOeVRJIc5iCDBbF8K0Oql4SscENmLEdUT77Hg,mm,myanmar +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,17Zqej6mrCT3BBBKcKj7949qHyRCa-9SJ,1cUtCEUuZRMqcxlRqFyoEM9eAdiDdWy2DUocroYivCx4,mr,mauritania +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1nVWV5_1kGDwyWJ3PPqExKfchs3sAlEuh,168KuHwUr9565zWaQVZ5au3qtGOb-qyJx_WOwNzqt_Eo,mx,mexico +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1qlHquI9EDz2lteBcjz_MheNLspg3mp_q,1_Pj5BryFXUPQPmMigII8G2HBUrpsnkK5V-Zu_9LCdGw,ne,niger +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1boFMPurqyxbfYBvfisRiROmzr8TuPI8j,1f3W3qJATCzVjZGw239Wy3D25THs8ThnvoC24aUFaGZQ,ng,nigeria +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1nMXXtFwJ3TqeynpKSW11uYAzihSMV8So,1Uc5eZswLB6mrwQLhd_OYQm7v7ThH99N0eb7RbTtD5iY,np,nepal +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,19o8a1zwxUEYFxvZkqs2AwCyIm0oe_CPF,1h1a0S5aVv9Z3wucgKsYXmg5Z_CWzsKfjJSfJFcXxPSY,ph,philippines +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1x4LjK_UWdxZm5EyNCupj7ikv7E-WMrkd,1UGOxjmJdJ9Dzj8cX3mZkgXAzT_ap_EMD2OqLjzDeGeE,qa,qatar +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1wSzKP9bsXB9w4U8frj4Y5kHrfV3C6Vi8,1QAgVpj0bf_A0HGFzHgwxBbZqgIFurfH4h7u1MnfKzJc,rw,rwanda +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1FLls5iHQD71Omy4VEzXYQ4HacMubzg8v,1a9XRXK5rG4_n0Afw7tIDkIbAmdydqKcU8J8zx5pLnVU,sa,saudi-arabia +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1tNS4yJYlh265zDy9rQnjAZSqSmBZxrRh,11dEjFSe56YdmJfVeKhRZpQKSgRb6mfM1DWKoNFxYg9Y,sd,sudan +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1l3NE8P8Xi-1qGwqZcdVdvV3Hn1h4Bwjv,1YxRrB39ItO_kEPTrMQ9FJlvMEp1Fjby0vchHiwW3C_I,sl,sierra-leone +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1yPQVFwbQ4edUfBhgAbL2o9DAOljZigTF,15cnbBqIlp4LzEXrs2z2L4_RTnY5e1GMrGV150JV615Q,td,chad +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1cyrCvMKVRHJtpQtcbTpoboJc9iNZ-oHy,1WlN4Hbv3JKE76hnNYkr80HU9oNJwjjOnj9nt7mm9ddw,ug,uganda +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1Ii31JX8y2InKt-FnHK-6kaqVK41XBOzY,1r62axKA5xgvJAiSiHrKgHZSATwSkKB-K15fdmLbn3zo,ye,yemen \ No newline at end of file diff --git a/organization/views.py b/organization/views.py index d53a1c71..fb18f5a4 100644 --- a/organization/views.py +++ b/organization/views.py @@ -91,11 +91,6 @@ def get_context_data(self, **kwargs): # Determine if the user is logged in authenticated = self.request.user.is_authenticated - # Generate link to download a CSV of this record - params = '?download_etype=Organization&entity_id={0}'.format(str(context['organization'].uuid)) - - context['download_url'] = reverse('download') + params - # Commanders of this unit context['person_members'] = [] diff --git a/person/views.py b/person/views.py index c770d7ee..bdc4a8a2 100644 --- a/person/views.py +++ b/person/views.py @@ -47,11 +47,6 @@ def get_context_data(self, **kwargs): authenticated = self.request.user.is_authenticated - # Generate link to download a CSV of this record - params = '?download_etype=Person&entity_id={0}'.format(str(context['person'].uuid)) - - context['download_url'] = reverse('download') + params - if authenticated: affiliations = context['person'].memberships else: diff --git a/requirements.txt b/requirements.txt index 4ee565bc..1fcb01b4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,6 +13,7 @@ django-rosetta==0.9.8 django-queryset-csv==1.1.0 boto3==1.24.21 awscli==1.25.21 +csvkit==1.0.4 git+https://github.com/security-force-monitor/complex_fields.git diff --git a/sfm_pc/forms.py b/sfm_pc/forms.py index 6a5cb928..dcc4f3d0 100644 --- a/sfm_pc/forms.py +++ b/sfm_pc/forms.py @@ -548,24 +548,6 @@ def division_choices(): return [(r.value, country_name(r.value)) for r in division_ids] -def download_types(): - return [ - ('basic', _("Basic")), - ('parentage', _("Parentage")), - ('memberships', _("Memberships")), - ('areas', _("Areas of operation")), - ('sites', _("Sites")), - ('personnel', _("Personnel")), - ('sources', _("Sources")), - ] - - -class DownloadForm(forms.Form): - download_type = forms.ChoiceField(label=gettext_lazy("Choose a download type"), choices=download_types) - division_id = forms.ChoiceField(label=gettext_lazy("Country"), choices=division_choices) - confidences = forms.BooleanField(label=gettext_lazy("Include confidence scores"), required=False) - - class ChangeLogForm(forms.Form): from_date = forms.DateTimeField(label=_("Start date"), required=False) to_date = forms.DateTimeField(label=_("End date"), required=False) diff --git a/sfm_pc/management/commands/download_country_data.py b/sfm_pc/management/commands/download_country_data.py index 06e37cd4..6fbc5497 100644 --- a/sfm_pc/management/commands/download_country_data.py +++ b/sfm_pc/management/commands/download_country_data.py @@ -35,24 +35,24 @@ def add_arguments(self, parser): ) parser.add_argument( - '--country_code', - dest='country_code', - help='Country code for the import' + '--country_name', + dest='country_name', + help='Slugified country name' ) parser.add_argument( - '--parent_directory', - dest='parent_directory' + '--target_directory', + dest='target_directory' ) def handle(self, *args, **kwargs): entity_doc_id = kwargs['entity_doc_id'] location_doc_id = kwargs['location_doc_id'] sources_doc_id = kwargs['sources_doc_id'] - country_code = kwargs['country_code'].rstrip() - parent_directory = kwargs['parent_directory'] + country_name = kwargs['country_name'].rstrip() + target_directory = kwargs['target_directory'] - country_subdirectory = f'{parent_directory}/countries/{country_code}' + country_subdirectory = f'{target_directory}/{country_name}' sheets_service = self._build_google_service( scopes=['https://www.googleapis.com/auth/spreadsheets.readonly'], @@ -64,7 +64,7 @@ def handle(self, *args, **kwargs): self._create_csv_files( sheets_service=sheets_service, doc_id=sources_doc_id, - output_directory=parent_directory, + output_directory=target_directory, key_func=lambda key: key == 'sources' ) diff --git a/sfm_pc/views.py b/sfm_pc/views.py index e5aaf58e..ab4db0d1 100644 --- a/sfm_pc/views.py +++ b/sfm_pc/views.py @@ -6,6 +6,7 @@ import csv import logging import os +import requests from django.conf import settings from django.views.generic.base import TemplateView @@ -247,17 +248,39 @@ class DownloadData(TemplateView): def get_context_data(self): context = super().get_context_data() - - download_url = self.get_presigned_url() - - if download_url: - context['download_url'] = download_url - + + download_url, head_object = self._get_s3_object_metadata() + + if download_url and head_object: + # megabytes = (bytes / 1024) / 1024 + file_size_mb = (head_object['ContentLength'] / 1024) / 1024 + context.update({ + 'download_url': download_url, + 'file_size': int(file_size_mb) + }) + return context - - def get_presigned_url(self): + + def _get_s3_object_metadata(self): s3_client = boto3.client('s3') + params = { + 'Bucket': DATA_ARCHIVE_BUCKET, + 'Key': 'wwic_download.zip' + } + + download_url = self._get_presigned_url(s3_client, params) + + if download_url: + # Need to do a HEAD request to get the object size + head_object = s3_client.head_object(**params) + + return download_url, head_object + + return None, None + + + def _get_presigned_url(self, s3_client, params): try: response = s3_client.generate_presigned_url( 'get_object', @@ -272,6 +295,10 @@ def get_presigned_url(self): except ClientError as e: logging.error(e) return None + + def _get_object_head(self, s3_client, params): + response = s3_client.head_object(**params) + return response class Echo: diff --git a/templates/base.html b/templates/base.html index a432f7f9..8bee7b54 100644 --- a/templates/base.html +++ b/templates/base.html @@ -92,6 +92,7 @@ {% endif %}
{% trans "Download the data that powers WhoWasInCommand to answer your own questions about the structure, behaviour and people in charge of security forces like the police and army." %}
+{% trans 'WhoWasInCommand.com is a free, public database of police, military and other security and defence forces. Click the "Download" button below to get a copy of all the data in the WhoWasInCommand.com database.' %}
{% trans "An error occurred fetching the data. This shouldn't happen. Please reload the page or contact us if it keeps happening..." %}
@@ -22,5 +20,25 @@The download file includes spreadsheets containing data for each country in the WhoWasInCommand.com database. Each spreadsheet includes:
+The download file also contains:
+The data on published WhoWasInCommand are created by Security Force Monitor, an investigative team based at the Human Rights Institute of Columbia Law School. We provide this data to assist journalists, human rights workers, litigators and others seeking accountability for human rights abuses perpetrated by security and defense forces.
+