diff --git a/.gitignore b/.gitignore index c43f3e98..2397c28d 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ *.swo *.swp .DS_Store +.env # Import artifacts *_import @@ -12,6 +13,9 @@ sfm_pc/management/commands/data/ country_data/ wwic_download.zip +data/wwic_download/countries/*.csv +data/wwic_download/countries/*.geojson +data/wwic_download/metadata/*.pdf /staticfiles bin/ diff --git a/README.md b/README.md index 6e00c486..5d3a78d9 100644 --- a/README.md +++ b/README.md @@ -322,7 +322,7 @@ docker-compose run --rm app ./manage.py update_composition_index --recreate ``` #### data_archive config -If you need to work with the `data_archive` make recipe, `cp .env. s3.example .env` and add your AWS access tokens. These tokens must be for an IAM user attached to the correct policy for accessing the `wwic-data-archive-staging` S3 bucket. This bucket is configured to work in local development and on the staging server. +If you need to work with the `data_archive` make recipe in your local development environment, `cp .env.s3.example .env` and add your AWS access tokens. These tokens must be for an IAM user attached to the correct policy for accessing the `wwic-data-archive-staging` S3 bucket. This bucket is configured to work in local development and on the staging server. To create an archive locally, run `docker-compose --env-file .env.s3 run --rm app make data_archive` so you can upload the zip archive to S3. Once this is done, the "download" link at the `localhost:8000/en/download/` should work. diff --git a/data/processors/blank_columns.py b/data/processors/blank_columns.py new file mode 100644 index 00000000..60c00686 --- /dev/null +++ b/data/processors/blank_columns.py @@ -0,0 +1,34 @@ +import csv +import sys +import argparse + +# init arg parser +parser = argparse.ArgumentParser() +parser.add_argument('--entity', type=str, required=True) +args = parser.parse_args() + +# init the incoming data as a dict reader +reader = csv.DictReader(sys.stdin) + +# write data to stdout +stdout_csv = csv.DictWriter(sys.stdout, fieldnames=reader.fieldnames) +stdout_csv.writeheader() + +for row in reader: + comment_key = f'{args.entity}:comments:admin' + comments = row.get(comment_key) + + if comments: + row.update({ + comment_key: '' + }) + + owner_key = f'{args.entity}:owner:admin' + owner = row.get(owner_key) + + if owner: + row.update({ + owner_key: '' + }) + + stdout_csv.writerow(row) diff --git a/data/wwic_download/metadata/README.md b/data/wwic_download/metadata/README.md new file mode 100644 index 00000000..9b56e8d8 --- /dev/null +++ b/data/wwic_download/metadata/README.md @@ -0,0 +1,3 @@ +# Who Was in Command data archive + +tk \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 2642d316..ed835e58 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -27,6 +27,8 @@ services: - PG_HOST=${PG_HOST} - PG_USER=${PG_USER} - PG_PASSWORD=${PG_PASSWORD} + - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} + - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} command: python manage.py runserver 0.0.0.0:8000 migration: diff --git a/docket.mk b/docket.mk index 1ab2ac88..0c81dcb4 100644 --- a/docket.mk +++ b/docket.mk @@ -1,35 +1,69 @@ -.PHONY: sfm_pc/management/commands/country_data +.PHONY: sfm_pc/management/commands/country_data data/wwic_download/countries -%_import : %.csv sfm_pc/management/commands/country_data +DATA_ARCHIVE_BUCKET := $(shell cat configs/s3_config.json | jq -r '.data_archive_bucket') + +data_archive : wwic_download.zip + aws s3 cp $< s3://$(DATA_ARCHIVE_BUCKET)/ + +.PHONY: wwic_download.zip +wwic_download.zip : filtered_data data/wwic_download/metadata/sfm_research_handbook.pdf + cd data/wwic_download && zip -r ../../$@ . + +COUNTRY_NAMES=$(shell perl -pe "s/,/ /g" import_docket.csv | cut -d' ' -f5) +ENTITIES=units.csv persons.csv incidents.csv locations.csv locations.geojson sources.csv + +.PHONY : filtered_data +filtered_data: $(foreach country,$(COUNTRY_NAMES),$(patsubst %,data/wwic_download/countries/$(country)_%,$(ENTITIES))) + echo "filtered csvs for entities" + +define filter_entity_data + $(shell csvgrep --columns $(1):status:admin --match 3 $< | \ + python data/processors/blank_columns.py --entity $(1) > $@) +endef + +data/wwic_download/countries/%_units.csv : sfm_pc/management/commands/country_data/countries/%/units.csv + $(call filter_entity_data,unit) + +data/wwic_download/countries/%_persons.csv : sfm_pc/management/commands/country_data/countries/%/persons.csv + $(call filter_entity_data,person) + +data/wwic_download/countries/%_incidents.csv : sfm_pc/management/commands/country_data/countries/%/incidents.csv + $(call filter_entity_data,incident) + +data/wwic_download/countries/%_sources.csv : sfm_pc/management/commands/country_data/countries/%/sources.csv + $(call filter_entity_data,source) + +data/wwic_download/countries/%_locations.csv : sfm_pc/management/commands/country_data/countries/%/locations.csv + cp $< $@ + +data/wwic_download/countries/%_locations.geojson : sfm_pc/management/commands/country_data/countries/%/locations.geojson + cp $< $@ + +.PHONY : data/wwic_download/metadata/sfm_research_handbook.pdf +data/wwic_download/metadata/sfm_research_handbook.pdf : + curl -o $@ https://help.securityforcemonitor.org/_/downloads/en/latest/pdf/ + +%_import : %.csv sfm_pc/management/commands/country_data/countries perl -pe "s/,/ /g" $< | \ xargs -L1 bash -c ' \ echo "Loading data for country code $$3" && (\ python -u manage.py import_country_data \ --country_code $$3 \ - --country_path $(word 2, $^)/countries/$$3 \ + --country_path $(word 2, $^)/$$4 \ --sources_path $(word 2, $^)/sources.csv || \ exit 255 \ )' -DATA_ARCHIVE_BUCKET := $(shell cat configs/s3_config.json | jq -r '.data_archive_bucket') - -data_archive : wwic_download.zip - aws s3 cp $< s3://$(DATA_ARCHIVE_BUCKET)/ - -wwic_download.zip : sfm_pc/management/commands/country_data - # move into the target directory, zip to the root dir - cd $< && zip -r ../../../../$@ . - -sfm_pc/management/commands/country_data : import_docket.csv +sfm_pc/management/commands/country_data/countries : import_docket.csv perl -pe "s/,/ /g" $< | \ xargs -L1 bash -c ' \ - echo "Importing data for country code $$3" && (\ + echo "Importing data for country $$4" && (\ python -u manage.py download_country_data \ --sources_doc_id $$0 \ --location_doc_id $$1 \ --entity_doc_id $$2 \ - --country_code $$3 \ - --parent_directory $@ || \ + --country_name $$4 \ + --target_directory $@ || \ exit 255 \ )' diff --git a/fixtures/import_docket.csv b/fixtures/import_docket.csv index 021d511f..0d2f9437 100644 --- a/fixtures/import_docket.csv +++ b/fixtures/import_docket.csv @@ -1,26 +1,26 @@ -source_document_id,location_document_id,entitity_document_id,sfm:iso -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1ztOfGaQT3WDrq-BOjT0x5VErzgrWQ0Ku,19Hk6OD5AYjWQCUaTWPzkGm9sLqp4e5v_n667M0CgaQQ,ae -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1HpIjYaH_iMCRQD1jP159VGz-2NL4nB5p,1n9NZXDwr9gH6qT1k8-zNz34aq2MncG1RAiGHRfUHfAc,bd -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1j8KgLnpjlnLy6bN4ozkwnBpkHUI6i3si,1IN8uZeR7WJbAmgPkY_QzzIir_cuba_Irzd_FtceUGyQ,bf -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1mjTLMZ1la3zyiVQxLZ56sW497Sp8Lh5m,1tJXt33b5yNJajfTh2j6oXyWAduNaBRTy-XZON5MwspE,bh -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1qZoQciglG1DOeEa3hh5iUvF7q4_bKOQl,1d0YaTdc1Esj0MVncM6cyhch6oT2VkWm7z_K4xEZELDs,eg -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1eZHw6k7xM7Z_ZNMnF0Wc5cjJuVyuOop3,1KCiLecZiMCzD7JXY4r_cYgcjr54aG3CnMOOsrrGfMoQ,jo -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1vnWgeTkq5TUyF7555F4renMJnl4WiFNy,1k9XBHbLM3mGDVpokgmV5H8T0anOSsuWVRX_5PeQ_1ik,kw -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,13XnZOF7U0uwL3EP_QpdTVd1FUh3A3cwi,1CGvLuzRIBmuzKGknc3mUWISVLx9OwlpN22z6nFw3cLY,lr -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1fnTq_ONVxzVBbCyQy_-s6ngmadA6st70,1swzBv27-BOlMEcW3bW0MbS0nK30ra0eZjKXv8hQAEpQ,ma -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1syUOihIFKzk6QsLXd7XNUZIwzZZfAqEH,1sBW4eudBid8kOt-48XW6QQxNnR1NdXfoESudFyv0vMo,ml -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1dU24WM8MAnqApFcBoYCiKPzPevebal6_,1RNAVrz9d4E8mP_k901aKBuKqcD2C-UxEk7-feQjyRgA,mm -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,17Zqej6mrCT3BBBKcKj7949qHyRCa-9SJ,1yv5Gt4TYFplbNHLg7ZJsb7ZVPfCE2xD1GINm2e52Wgs,mr -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1nVWV5_1kGDwyWJ3PPqExKfchs3sAlEuh,1GKJIanAuch2j64-HDqInvfG46dWhULswPQNYbCC97y0,mx -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1qlHquI9EDz2lteBcjz_MheNLspg3mp_q,1KoVVqaPcuNz3Y7GLns8n3BnewwuNSxLK-qbY1dfMhG8,ne -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1boFMPurqyxbfYBvfisRiROmzr8TuPI8j,1UgD7O9e5HSpj60tT4p73i0k2mMdoI8jtEZLwTUdF148,ng -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1nMXXtFwJ3TqeynpKSW11uYAzihSMV8So,1cI_ahU1yEVQdHyhtrxUQ2LccaHNu9uOwwk_diclZgHc,np -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,19o8a1zwxUEYFxvZkqs2AwCyIm0oe_CPF,1BZWTt_ukmo9HxVvf9atN-9g_W7j57K3Q_mms0xAmN4U,ph -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1x4LjK_UWdxZm5EyNCupj7ikv7E-WMrkd,1uZtsbPTX9NVBrOCHLtdCum-2c2N9w4ALRGeo5miKCWE,qa -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1wSzKP9bsXB9w4U8frj4Y5kHrfV3C6Vi8,1PpAqUl5ijUAM_cHgxTIalMwsDnauEf2xojGSLQhGb1U,rw -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1FLls5iHQD71Omy4VEzXYQ4HacMubzg8v,1iVA0il4EkqB6HGuPhgjG_ZFZ9-51Em8LKG_BfrusaRo,sa -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1tNS4yJYlh265zDy9rQnjAZSqSmBZxrRh,1pxX5spQL1oe7fcJHX-2jg-Q2bGe5T9HrOVUN528HEaI,sd -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1l3NE8P8Xi-1qGwqZcdVdvV3Hn1h4Bwjv,1YC3W5Q2EGwq0dPOuZn7rUu9nMKr5j9MrThBv2T8TD6M,sl -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1yPQVFwbQ4edUfBhgAbL2o9DAOljZigTF,1KiMws3gla6Jc82CiGqgOXYyEg0Rvl8AefWznF6V6IKc,td -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1cyrCvMKVRHJtpQtcbTpoboJc9iNZ-oHy,1FZ3WqAlNpBNFv7zWOTFqUC45FXE_zqLvYQzMP3RCiL0,ug -1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1Ii31JX8y2InKt-FnHK-6kaqVK41XBOzY,1jBEXeS4Zz01afYLkm_NlL05dkW665K2UVCGYPQCsahY,ye \ No newline at end of file +source_document_id,location_document_id,entitity_document_id,sfm:iso,sfm:country_name +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1ztOfGaQT3WDrq-BOjT0x5VErzgrWQ0Ku,1Ck11zLFVP6iJZFAR0_Xsq0UaeEJrmFl7ysbFX9mGu7c,ae,united-arab-emirates +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1HpIjYaH_iMCRQD1jP159VGz-2NL4nB5p,1EqAi59wjE1v-bYX3cC1qdl6zkThpWJ8YcvSPUC-RGHc,bd,bangladesh +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1j8KgLnpjlnLy6bN4ozkwnBpkHUI6i3si,1wBmSuTkoEhosDzfHtyvZqd9SKez-sWoPoJ9oPonWsSo,bf,burkina-faso +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1mjTLMZ1la3zyiVQxLZ56sW497Sp8Lh5m,1c0O2XlwSpTAtB0AdhkkdgevWbsBUxvsmsETUwPPVIlk,bh,bahrain +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1qZoQciglG1DOeEa3hh5iUvF7q4_bKOQl,1cZVy2PUAzeq2xOoLRLwL9z9mqbry32zv_XY7sjEih2c,eg,egypt +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1eZHw6k7xM7Z_ZNMnF0Wc5cjJuVyuOop3,1CKkNsXwRdwXDiOldwT-6baw9DayXA2Vsn4ttpwP9SuM,jo,jordan +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1vnWgeTkq5TUyF7555F4renMJnl4WiFNy,1Y6-9-9kai-YyK1pXvcv_W6fqUn9lORltUhuFc2YUu1I,kw,kuwait +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,13XnZOF7U0uwL3EP_QpdTVd1FUh3A3cwi,1aGbMvFHzGn9ZlKKcFhiQ2c9egsoGDH11QBgyqmhS-IM,lr,liberia +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1fnTq_ONVxzVBbCyQy_-s6ngmadA6st70,16962grIJlisFbh2Zp9kBAhv6jVnZz6bHgb6RGBUHd3o,ma,morocco +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1syUOihIFKzk6QsLXd7XNUZIwzZZfAqEH,1UcgoJ_ytS-WSWl2_5OuV9h92wSCBWRFBoDtr4Ztqt14,ml,mali +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1dU24WM8MAnqApFcBoYCiKPzPevebal6_,1vwb7ENaOeVRJIc5iCDBbF8K0Oql4SscENmLEdUT77Hg,mm,myanmar +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,17Zqej6mrCT3BBBKcKj7949qHyRCa-9SJ,1cUtCEUuZRMqcxlRqFyoEM9eAdiDdWy2DUocroYivCx4,mr,mauritania +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1nVWV5_1kGDwyWJ3PPqExKfchs3sAlEuh,168KuHwUr9565zWaQVZ5au3qtGOb-qyJx_WOwNzqt_Eo,mx,mexico +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1qlHquI9EDz2lteBcjz_MheNLspg3mp_q,1_Pj5BryFXUPQPmMigII8G2HBUrpsnkK5V-Zu_9LCdGw,ne,niger +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1boFMPurqyxbfYBvfisRiROmzr8TuPI8j,1f3W3qJATCzVjZGw239Wy3D25THs8ThnvoC24aUFaGZQ,ng,nigeria +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1nMXXtFwJ3TqeynpKSW11uYAzihSMV8So,1Uc5eZswLB6mrwQLhd_OYQm7v7ThH99N0eb7RbTtD5iY,np,nepal +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,19o8a1zwxUEYFxvZkqs2AwCyIm0oe_CPF,1h1a0S5aVv9Z3wucgKsYXmg5Z_CWzsKfjJSfJFcXxPSY,ph,philippines +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1x4LjK_UWdxZm5EyNCupj7ikv7E-WMrkd,1UGOxjmJdJ9Dzj8cX3mZkgXAzT_ap_EMD2OqLjzDeGeE,qa,qatar +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1wSzKP9bsXB9w4U8frj4Y5kHrfV3C6Vi8,1QAgVpj0bf_A0HGFzHgwxBbZqgIFurfH4h7u1MnfKzJc,rw,rwanda +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1FLls5iHQD71Omy4VEzXYQ4HacMubzg8v,1a9XRXK5rG4_n0Afw7tIDkIbAmdydqKcU8J8zx5pLnVU,sa,saudi-arabia +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1tNS4yJYlh265zDy9rQnjAZSqSmBZxrRh,11dEjFSe56YdmJfVeKhRZpQKSgRb6mfM1DWKoNFxYg9Y,sd,sudan +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1l3NE8P8Xi-1qGwqZcdVdvV3Hn1h4Bwjv,1YxRrB39ItO_kEPTrMQ9FJlvMEp1Fjby0vchHiwW3C_I,sl,sierra-leone +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1yPQVFwbQ4edUfBhgAbL2o9DAOljZigTF,15cnbBqIlp4LzEXrs2z2L4_RTnY5e1GMrGV150JV615Q,td,chad +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1cyrCvMKVRHJtpQtcbTpoboJc9iNZ-oHy,1WlN4Hbv3JKE76hnNYkr80HU9oNJwjjOnj9nt7mm9ddw,ug,uganda +1dkGS6Ocyc2YYQ5IopEdjz38t9kaT34XfIAab0iteGoY,1Ii31JX8y2InKt-FnHK-6kaqVK41XBOzY,1r62axKA5xgvJAiSiHrKgHZSATwSkKB-K15fdmLbn3zo,ye,yemen \ No newline at end of file diff --git a/organization/views.py b/organization/views.py index d53a1c71..fb18f5a4 100644 --- a/organization/views.py +++ b/organization/views.py @@ -91,11 +91,6 @@ def get_context_data(self, **kwargs): # Determine if the user is logged in authenticated = self.request.user.is_authenticated - # Generate link to download a CSV of this record - params = '?download_etype=Organization&entity_id={0}'.format(str(context['organization'].uuid)) - - context['download_url'] = reverse('download') + params - # Commanders of this unit context['person_members'] = [] diff --git a/person/views.py b/person/views.py index c770d7ee..bdc4a8a2 100644 --- a/person/views.py +++ b/person/views.py @@ -47,11 +47,6 @@ def get_context_data(self, **kwargs): authenticated = self.request.user.is_authenticated - # Generate link to download a CSV of this record - params = '?download_etype=Person&entity_id={0}'.format(str(context['person'].uuid)) - - context['download_url'] = reverse('download') + params - if authenticated: affiliations = context['person'].memberships else: diff --git a/requirements.txt b/requirements.txt index 4ee565bc..1fcb01b4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,6 +13,7 @@ django-rosetta==0.9.8 django-queryset-csv==1.1.0 boto3==1.24.21 awscli==1.25.21 +csvkit==1.0.4 git+https://github.com/security-force-monitor/complex_fields.git diff --git a/sfm_pc/forms.py b/sfm_pc/forms.py index 6a5cb928..dcc4f3d0 100644 --- a/sfm_pc/forms.py +++ b/sfm_pc/forms.py @@ -548,24 +548,6 @@ def division_choices(): return [(r.value, country_name(r.value)) for r in division_ids] -def download_types(): - return [ - ('basic', _("Basic")), - ('parentage', _("Parentage")), - ('memberships', _("Memberships")), - ('areas', _("Areas of operation")), - ('sites', _("Sites")), - ('personnel', _("Personnel")), - ('sources', _("Sources")), - ] - - -class DownloadForm(forms.Form): - download_type = forms.ChoiceField(label=gettext_lazy("Choose a download type"), choices=download_types) - division_id = forms.ChoiceField(label=gettext_lazy("Country"), choices=division_choices) - confidences = forms.BooleanField(label=gettext_lazy("Include confidence scores"), required=False) - - class ChangeLogForm(forms.Form): from_date = forms.DateTimeField(label=_("Start date"), required=False) to_date = forms.DateTimeField(label=_("End date"), required=False) diff --git a/sfm_pc/management/commands/download_country_data.py b/sfm_pc/management/commands/download_country_data.py index 06e37cd4..6fbc5497 100644 --- a/sfm_pc/management/commands/download_country_data.py +++ b/sfm_pc/management/commands/download_country_data.py @@ -35,24 +35,24 @@ def add_arguments(self, parser): ) parser.add_argument( - '--country_code', - dest='country_code', - help='Country code for the import' + '--country_name', + dest='country_name', + help='Slugified country name' ) parser.add_argument( - '--parent_directory', - dest='parent_directory' + '--target_directory', + dest='target_directory' ) def handle(self, *args, **kwargs): entity_doc_id = kwargs['entity_doc_id'] location_doc_id = kwargs['location_doc_id'] sources_doc_id = kwargs['sources_doc_id'] - country_code = kwargs['country_code'].rstrip() - parent_directory = kwargs['parent_directory'] + country_name = kwargs['country_name'].rstrip() + target_directory = kwargs['target_directory'] - country_subdirectory = f'{parent_directory}/countries/{country_code}' + country_subdirectory = f'{target_directory}/{country_name}' sheets_service = self._build_google_service( scopes=['https://www.googleapis.com/auth/spreadsheets.readonly'], @@ -64,7 +64,7 @@ def handle(self, *args, **kwargs): self._create_csv_files( sheets_service=sheets_service, doc_id=sources_doc_id, - output_directory=parent_directory, + output_directory=target_directory, key_func=lambda key: key == 'sources' ) diff --git a/sfm_pc/views.py b/sfm_pc/views.py index e5aaf58e..ab4db0d1 100644 --- a/sfm_pc/views.py +++ b/sfm_pc/views.py @@ -6,6 +6,7 @@ import csv import logging import os +import requests from django.conf import settings from django.views.generic.base import TemplateView @@ -247,17 +248,39 @@ class DownloadData(TemplateView): def get_context_data(self): context = super().get_context_data() - - download_url = self.get_presigned_url() - - if download_url: - context['download_url'] = download_url - + + download_url, head_object = self._get_s3_object_metadata() + + if download_url and head_object: + # megabytes = (bytes / 1024) / 1024 + file_size_mb = (head_object['ContentLength'] / 1024) / 1024 + context.update({ + 'download_url': download_url, + 'file_size': int(file_size_mb) + }) + return context - - def get_presigned_url(self): + + def _get_s3_object_metadata(self): s3_client = boto3.client('s3') + params = { + 'Bucket': DATA_ARCHIVE_BUCKET, + 'Key': 'wwic_download.zip' + } + + download_url = self._get_presigned_url(s3_client, params) + + if download_url: + # Need to do a HEAD request to get the object size + head_object = s3_client.head_object(**params) + + return download_url, head_object + + return None, None + + + def _get_presigned_url(self, s3_client, params): try: response = s3_client.generate_presigned_url( 'get_object', @@ -272,6 +295,10 @@ def get_presigned_url(self): except ClientError as e: logging.error(e) return None + + def _get_object_head(self, s3_client, params): + response = s3_client.head_object(**params) + return response class Echo: diff --git a/templates/base.html b/templates/base.html index a432f7f9..8bee7b54 100644 --- a/templates/base.html +++ b/templates/base.html @@ -92,6 +92,7 @@ {% endif %}
  • {% trans "About" %}
  • +
  • {% trans "Download" %}
  • {% if user.is_authenticated %}
  • {% trans "Logout" %}
  • {% endif %} diff --git a/templates/download.html b/templates/download.html index f6cc95c9..1b92489c 100644 --- a/templates/download.html +++ b/templates/download.html @@ -1,18 +1,16 @@ {% extends "base.html" %} {% load i18n %} {% block content %} -
    -

    {% trans "Use Our Data." %}

    -

    {% trans "Hold Perpetrators Accountable." %}

    -
    -

    {% trans "Download the data that powers WhoWasInCommand to answer your own questions about the structure, behaviour and people in charge of security forces like the police and army." %}

    +
    +

    {% trans "Download data from WhoWasInCommand.com" %}

    +
    +

    {% trans 'WhoWasInCommand.com is a free, public database of police, military and other security and defence forces. Click the "Download" button below to get a copy of all the data in the WhoWasInCommand.com database.' %}

    -
    +
    {% if download_url %} {% else %}

    {% trans "An error occurred fetching the data. This shouldn't happen. Please reload the page or contact us if it keeps happening..." %}

    @@ -22,5 +20,25 @@

    {% trans "Hold Perpetrators Accountable." %}

    + +
    +

    What's in the download file?

    +

    The download file includes spreadsheets containing data for each country in the WhoWasInCommand.com database. Each spreadsheet includes:

    +
      +
    • the organizational structure and command chain of each branch of the security forces we have researched.
    • +
    • details on command personnel and their postings to different units over time.
    • +
    • the geographical footprint of different forces, including infrastructure and areas of operation.
    • +
    • a file of geospatial information used to represent this data as a map.
    • +
    • all the sources used to evidence each piece of data.
    • +
    + +

    The download file also contains:

    +
      +
    • the Security Force Monitor Research Handbook, which explains what each piece of data means and how it was created. You can also read the Research Handbook online.
    • +
    • copyright and licensing information explaining what you can do with this data, and what your obligations are should you use the data.
    • +
    + +

    The data on published WhoWasInCommand are created by Security Force Monitor, an investigative team based at the Human Rights Institute of Columbia Law School. We provide this data to assist journalists, human rights workers, litigators and others seeking accountability for human rights abuses perpetrated by security and defense forces.

    +
    {% endblock %} diff --git a/templates/organization/view.html b/templates/organization/view.html index f348bc42..65a6280e 100644 --- a/templates/organization/view.html +++ b/templates/organization/view.html @@ -25,7 +25,6 @@

    {% help href='unitrec.html#unit-record-title-area' %} {% cite organization.name.get_value %} - {% trans "Download as CSV" %} {% trans "Print this page" %} diff --git a/templates/partials/organization_search_results.html b/templates/partials/organization_search_results.html index a5a4cfe9..6abbc5f9 100644 --- a/templates/partials/organization_search_results.html +++ b/templates/partials/organization_search_results.html @@ -4,7 +4,6 @@

    {% trans "Units" %} {{ hit_count|intcomma }} {% trans "results" %} - {% trans "Download results" %}

    {% with merge='False' object_list=objects sortable='True' %} diff --git a/templates/partials/person_search_results.html b/templates/partials/person_search_results.html index 26f677b2..ebe884ab 100644 --- a/templates/partials/person_search_results.html +++ b/templates/partials/person_search_results.html @@ -4,7 +4,6 @@

    {% trans "Personnel" %} {{ hit_count|intcomma }} {% trans "results" %} - {% trans "Download results" %}

    {% with merge='False' object_list=objects sortable='True' %} diff --git a/templates/partials/source_search_results.html b/templates/partials/source_search_results.html index 0f1cd996..2c30324d 100644 --- a/templates/partials/source_search_results.html +++ b/templates/partials/source_search_results.html @@ -4,7 +4,6 @@

    {% trans "Sources" %} {{ hit_count|intcomma }} {% trans "results" %} - {% trans "Download results" %}

    {% with object_list=objects sortable='True' %} diff --git a/templates/partials/violation_search_results.html b/templates/partials/violation_search_results.html index 46f228b2..7b081a09 100644 --- a/templates/partials/violation_search_results.html +++ b/templates/partials/violation_search_results.html @@ -4,7 +4,6 @@

    {% trans "Incidents" %} {{ hit_count|intcomma }} {% trans "results" %} - {% trans "Download results" %}

    {% with object_list=objects sortable='True' %} diff --git a/templates/person/view.html b/templates/person/view.html index e138e22b..1db13961 100644 --- a/templates/person/view.html +++ b/templates/person/view.html @@ -26,7 +26,6 @@

    {% help href='personsrec.html#person-record-title-area' %} {% cite person.name.get_value %} - {% trans "Download as CSV" %} {% trans "Print this page" %} diff --git a/templates/violation/view.html b/templates/violation/view.html index bf61ba1b..ff65c83e 100644 --- a/templates/violation/view.html +++ b/templates/violation/view.html @@ -26,7 +26,6 @@

    {# All violation info uses the same source, so we can cite at the end #} {% cite violation.description.get_value %} - {% trans "Download as CSV" %} {% trans "Print this page" %} diff --git a/violation/views.py b/violation/views.py index 5da86ccd..5ece2767 100644 --- a/violation/views.py +++ b/violation/views.py @@ -53,11 +53,6 @@ def get_context_data(self, **kwargs): authenticated = self.request.user.is_authenticated - # Generate link to download a CSV of this record - params = '?download_etype=Violation&entity_id={0}'.format(str(context['violation'].uuid)) - - context['download_url'] = reverse('download') + params - context['location'] = None if context['violation'].location.get_value():