diff --git a/html/modules/custom/reliefweb_import/config/schema/reliefweb_import.schema.yml b/html/modules/custom/reliefweb_import/config/schema/reliefweb_import.schema.yml index 6933a2a6c..bde12ba6e 100644 --- a/html/modules/custom/reliefweb_import/config/schema/reliefweb_import.schema.yml +++ b/html/modules/custom/reliefweb_import/config/schema/reliefweb_import.schema.yml @@ -3,3 +3,31 @@ reliefweb_import.plugin.importer: type: mapping label: 'ReliefWeb importer plugin base settings.' mapping: + enabled: + type: bool + label: 'Whether the plugin is enabled or not.' + +# UNHCR Data importer plugin settings. +reliefweb_import.plugin.importer.unhcr_data: + type: reliefweb_import.plugin.importer + label: 'UNHCR Data importer plugin base settings.' + mapping: + api_url: + type: string + label: 'API URL.' + api_key: + type: string + label: 'API key.' + list_endpoint: + type: string + label: 'Endpoint path to get a list of documents.' + document_endpoint: + type: string + label: 'Endpoint path to get a document.' + timeout: + type: int + label: 'Connection and request timeout.' + provider_uuid: + type: string + label: 'UUID of the Post API provider used associated with this imported plugin.' + diff --git a/html/modules/custom/reliefweb_import/drush.services.yml b/html/modules/custom/reliefweb_import/drush.services.yml index 1ef1cc5bf..407a518dc 100644 --- a/html/modules/custom/reliefweb_import/drush.services.yml +++ b/html/modules/custom/reliefweb_import/drush.services.yml @@ -1,6 +1,6 @@ services: reliefweb_import.commands: class: \Drupal\reliefweb_import\Drush\Commands\ReliefwebImport - arguments: ['@reliefweb_import.job_feeds_importer'] + arguments: ['@config.factory', '@reliefweb_import.job_feeds_importer', '@plugin.manager.reliefweb_import.reliefweb_importer'] tags: - { name: drush.command } diff --git a/html/modules/custom/reliefweb_import/reliefweb_import.info.yml b/html/modules/custom/reliefweb_import/reliefweb_import.info.yml index 2e11f11f2..9259abe91 100644 --- a/html/modules/custom/reliefweb_import/reliefweb_import.info.yml +++ b/html/modules/custom/reliefweb_import/reliefweb_import.info.yml @@ -6,4 +6,5 @@ core_version_requirement: ^10 dependencies: - drupal:reliefweb_api - drupal:reliefweb_entities + - drupal:reliefweb_post_api - drupal:reliefweb_utility diff --git a/html/modules/custom/reliefweb_import/reliefweb_import.services.yml b/html/modules/custom/reliefweb_import/reliefweb_import.services.yml index 963eb03d8..f8c8dd64a 100644 --- a/html/modules/custom/reliefweb_import/reliefweb_import.services.yml +++ b/html/modules/custom/reliefweb_import/reliefweb_import.services.yml @@ -3,6 +3,6 @@ services: class: \Drupal\reliefweb_import\Service\JobFeedsImporter arguments: ['@database', '@entity_type.manager', '@account_switcher', '@http_client', '@logger.factory', '@state'] - plugin.manager.reliefweb_import.importer: - class: Drupal\reliefweb_import\Plugin\ImporterPluginManager + plugin.manager.reliefweb_import.reliefweb_importer: + class: Drupal\reliefweb_import\Plugin\ReliefWebImporterPluginManager parent: default_plugin_manager diff --git a/html/modules/custom/reliefweb_import/src/Drush/Commands/ReliefwebImport.php b/html/modules/custom/reliefweb_import/src/Drush/Commands/ReliefwebImport.php index 792ed63ca..f2dd18fef 100644 --- a/html/modules/custom/reliefweb_import/src/Drush/Commands/ReliefwebImport.php +++ b/html/modules/custom/reliefweb_import/src/Drush/Commands/ReliefwebImport.php @@ -5,6 +5,8 @@ use Consolidation\SiteAlias\SiteAliasManagerAwareInterface; use Consolidation\SiteAlias\SiteAliasManagerAwareTrait; use Consolidation\SiteProcess\ProcessManagerAwareTrait; +use Drupal\Core\Config\ConfigFactoryInterface; +use Drupal\reliefweb_import\Plugin\ReliefwebImporterPluginManagerInterface; use Drupal\reliefweb_import\Service\JobFeedsImporterInterface; use Drush\Commands\DrushCommands; @@ -21,23 +23,83 @@ class ReliefwebImport extends DrushCommands implements SiteAliasManagerAwareInte * {@inheritdoc} */ public function __construct( + protected ConfigFactoryInterface $configFactory, protected JobFeedsImporterInterface $jobImporter, + protected ReliefwebImporterPluginManagerInterface $importerPluginManager, ) {} /** * Import jobs. * * @param int $limit - * Max number of items to send. + * Max number of items to import. * * @command reliefweb_import:jobs + * * @usage reliefweb_import:jobs - * Send emails. + * Import jobs. + * * @validate-module-enabled reliefweb_import * @aliases reliefweb-import-jobs */ - public function jobs(int $limit = 50): void { + public function importJobs(int $limit = 50): void { $this->jobImporter->importJobs($limit); } + /** + * Import content. + * + * @param string $plugin_id + * ID of the importer plugin to use. + * @param int $limit + * Max number of items to import. + * + * @command reliefweb_import:content + * + * @usage reliefweb_import:content test 10 + * Import 10 documents from the 'test' importer plugin. + * + * @validate-module-enabled reliefweb_import + * @aliases reliefweb-import-content + * + * @todo allow passing 'all' to import content from all the enabled plugins. + */ + public function import(string $plugin_id, int $limit = 50): bool { + if (!$this->importerPluginManager->hasDefinition($plugin_id)) { + $this->logger()->error(strtr('Unknown importer plugin: @plugin_id.', [ + '@plugin_id' => $plugin_id, + ])); + return FALSE; + } + + $settings = $this->configFactory + ->get('reliefweb_import') + ->get('plugin.reliefweb_import.importer.' . $plugin_id); + + if (empty($settings)) { + $this->logger()->error(strtr('No settings for importer plugin: @plugin_id.', [ + 'plugin_id' => $plugin_id, + ])); + return FALSE; + } + + /** @var \Drupal\reliefweb_import\Plugin\ImporterPluginInterface|null $plugin */ + $plugin = $this->importerPluginManager->createInstance($plugin_id, $settings); + if (empty($plugin)) { + $this->logger()->error(strtr('Unable to create importer plugin: @plugin_id.', [ + 'plugin_id' => $plugin_id, + ])); + return FALSE; + } + + if (!$plugin->enabled()) { + $this->logger()->notice(strtr('Importer plugin: @plugin_id not enabled.', [ + 'plugin_id' => $plugin_id, + ])); + return TRUE; + } + + return $plugin->importContent($limit); + } + } diff --git a/html/modules/custom/reliefweb_import/src/Exception/ExceptionInterface.php b/html/modules/custom/reliefweb_import/src/Exception/ExceptionInterface.php new file mode 100644 index 000000000..9ad1a6133 --- /dev/null +++ b/html/modules/custom/reliefweb_import/src/Exception/ExceptionInterface.php @@ -0,0 +1,10 @@ + + */ + protected array $countryMapping = [ + // Afghanistan. + 13 => 575, + // Aland Islands (Finland). + 14 => NULL, + // Albania. + 15 => 576, + // Algeria. + 16 => 769, + // American Samoa. + 17 => 778, + // Andorra. + 18 => 577, + // Angola. + 19 => 578, + // Anguilla. + 20 => 770, + // Antigua and Barbuda. + 21 => 579, + // Argentina. + 22 => 580, + // Armenia. + 23 => 581, + // Aruba (The Netherlands). + 24 => 11773, + // Australia. + 25 => 582, + // Austria. + 26 => 583, + // Azerbaijan. + 27 => 584, + // Azores Islands (Portugal). + 28 => NULL, + // Bahamas. + 29 => 592, + // Bahrain. + 30 => 585, + // Bangladesh. + 31 => 591, + // Barbados. + 32 => 586, + // Belarus. + 33 => 595, + // Belgium. + 34 => 588, + // Belize. + 35 => 602, + // Benin. + 36 => 589, + // Bermuda. + 37 => 590, + // Bhutan. + 38 => 593, + // Bolivia (Plurinational State of). + 39 => 596, + // Bonaire, Saint Eustatius and Saba (The Netherlands). + 14894 => NULL, + // Bosnia and Herzegovina. + 40 => 600, + // Botswana. + 41 => 597, + // Brazil. + 42 => 598, + // British Virgin Islands. + 43 => 771, + // Brunei Darussalam. + 44 => 599, + // Bulgaria. + 45 => 601, + // Burkina Faso. + 46 => 594, + // Burundi. + 47 => 587, + // Cabo Verde. + 52 => 615, + // Cambodia. + 48 => 603, + // Cameroon. + 49 => 349, + // Canada. + 50 => 604, + // Canary Islands (Spain). + 51 => 11076, + // Cayman Islands. + 53 => 605, + // Central African Republic. + 54 => 399, + // Chad. + 55 => 410, + // Channel Islands. + 56 => NULL, + // Chile. + 57 => 607, + // China. + 58 => 606, + // China - Hong Kong (Special Administrative Region). + 59 => 646, + // China - Macau (Special Administrative Region). + 60 => 674, + // China - Taiwan Province. + 61 => NULL, + // Christmas Island (Australia). + 62 => NULL, + // Cocos (Keeling) Islands (Australia). + 63 => NULL, + // Colombia. + 64 => 612, + // Comoros. + 65 => 610, + // Congo. + 66 => 476, + // Cook Islands. + 67 => 611, + // Costa Rica. + 68 => 613, + // Côte d'Ivoire. + 69 => 509, + // Croatia. + 70 => 648, + // Cuba. + 71 => 614, + // Curaçao (The Netherlands). + 14893 => 11774, + // Cyprus. + 72 => 616, + // Czechia. + 73 => 617, + // Democratic People's Republic of Korea. + 74 => 663, + // Democratic Republic of the Congo. + 75 => 486, + // Denmark. + 76 => 618, + // Djibouti. + 77 => 12122, + // Dominica. + 78 => 619, + // Dominican Republic. + 79 => 620, + // Easter Island (Chile). + 80 => NULL, + // Ecuador. + 81 => 621, + // Egypt. + 82 => 1, + // El Salvador. + 83 => 720, + // Equatorial Guinea. + 84 => 622, + // Eritrea. + 85 => 157, + // Estonia. + 86 => 623, + // Eswatini. + 223 => 736, + // Ethiopia. + 87 => 160, + // Falkland Islands (Malvinas). + 88 => 772, + // Faroe Islands (Denmark). + 89 => 630, + // Fiji. + 90 => 625, + // Finland. + 91 => 626, + // France. + 92 => 629, + // French Guiana (France). + 93 => 624, + // French Polynesia (France). + 94 => 628, + // Gabon. + 96 => 632, + // Galapagos Islands (Ecuador). + 97 => 12041, + // Gambia. + 98 => 633, + // Georgia. + 100 => 635, + // Germany. + 101 => 636, + // Ghana. + 102 => 637, + // Gibraltar. + 103 => 638, + // Greece. + 104 => 640, + // Greenland (Denmark). + 105 => 785, + // Grenada. + 106 => 641, + // Guadeloupe (France). + 107 => NULL, + // Guam. + 108 => 775, + // Guatemala. + 109 => 12151, + // Guinea. + 110 => 643, + // Guinea-Bissau. + 111 => 639, + // Guyana. + 112 => 644, + // Haiti. + 113 => 645, + // Heard Island and McDonald Islands (Australia). + 114 => NULL, + // Holy See. + 115 => 756, + // Honduras. + 116 => 647, + // Hungary. + 117 => 649, + // Iceland. + 118 => 650, + // India. + 119 => 651, + // Indonesia. + 120 => 652, + // Iran (Islamic Republic of). + 121 => 654, + // Iraq. + 122 => 5, + // Ireland. + 123 => 653, + // Isle of Man (The United Kingdom of Great Britain and Northern Ireland). + 124 => NULL, + // Israel. + 125 => 655, + // Italy. + 126 => 656, + // Jamaica. + 127 => 657, + // Japan. + 128 => 658, + // Jordan. + 129 => 70, + // Kazakhstan. + 130 => 659, + // Kenya. + 131 => 178, + // Kiribati. + 132 => 661, + // Kuwait. + 133 => 664, + // Kyrgyzstan. + 134 => 660, + // Lao People's Democratic Republic (the). + 135 => 665, + // Latvia. + 136 => 673, + // Lebanon. + 137 => 71, + // Lesotho. + 138 => 668, + // Liberia. + 139 => 535, + // Libya. + 140 => 666, + // Liechtenstein. + 141 => 669, + // Lithuania. + 142 => 671, + // Luxembourg. + 143 => 672, + // Madagascar. + 144 => 675, + // Madeira (Portugal). + 145 => NULL, + // Malawi. + 146 => 686, + // Malaysia. + 147 => 685, + // Maldives. + 148 => 681, + // Mali. + 149 => 684, + // Malta. + 150 => 690, + // Marshall Islands. + 151 => 683, + // Martinique (France). + 152 => 676, + // Mauritania. + 153 => 677, + // Mauritius. + 154 => 692, + // Mayotte (France). + 155 => 768, + // Mexico. + 156 => 682, + // Micronesia (Federated States of). + 157 => 631, + // Moldova. + 158 => 680, + // Monaco. + 159 => 679, + // Mongolia. + 160 => 687, + // Montenegro. + 161 => 691, + // Montserrat. + 162 => 773, + // Morocco. + 163 => 688, + // Mozambique. + 164 => 689, + // Myanmar. + 165 => 693, + // Namibia. + 166 => 694, + // Nauru. + 167 => 702, + // Nepal. + 168 => 695, + // Netherlands. + 169 => 696, + // Netherlands Antilles (The Netherlands). + 170 => NULL, + // New Caledonia (France). + 171 => 627, + // New Zealand. + 172 => 703, + // Nicaragua. + 173 => 698, + // Niger. + 174 => 1621, + // Nigeria. + 175 => 699, + // Niue (New Zealand). + 176 => 700, + // Norfolk Island (Australia). + 177 => 779, + // Northern Mariana Islands (The United States of America). + 178 => 776, + // Norway. + 179 => 701, + // Occupied Palestinian territory. + 180 => NULL, + // Oman. + 181 => 704, + // Pakistan. + 182 => 705, + // Palau. + 183 => 710, + // Panama. + 184 => 706, + // Papua New Guinea. + 185 => 711, + // Paraguay. + 186 => 707, + // Peru. + 187 => 708, + // Philippines. + 188 => 709, + // Pitcairn Islands. + 189 => 784, + // Poland. + 190 => 712, + // Portugal. + 191 => 713, + // Puerto Rico (The United States of America). + 192 => 714, + // Qatar. + 193 => 715, + // Republic of Korea. + 194 => 662, + // Réunion (France). + 195 => NULL, + // Romania. + 196 => 716, + // Russian Federation. + 197 => 12065, + // Rwanda. + 198 => 719, + // Saint Barthélemy (France). + 14890 => NULL, + // Saint Helena. + 199 => 783, + // Saint Kitts and Nevis. + 200 => 731, + // Saint Lucia. + 201 => 667, + // Saint Martin (France). + 14891 => NULL, + // Saint Pierre and Miquelon (France). + 202 => 781, + // Saint Vincent and the Grenadines. + 203 => 757, + // Samoa. + 204 => 759, + // San Marino. + 205 => 727, + // Sao Tome and Principe. + 206 => 732, + // Saudi Arabia. + 207 => 721, + // Senegal. + 208 => 723, + // Serbia. + 209 => 722, + // Seychelles. + 210 => 724, + // Sierra Leone. + 211 => 726, + // Singapore. + 212 => 725, + // Sint Maarten (The Netherlands). + 14892 => 12170, + // Slovakia. + 213 => 734, + // Slovenia. + 214 => 735, + // Solomon Islands. + 215 => 728, + // Somalia. + 216 => 192, + // South Africa. + 217 => 717, + // South Sudan. + 8657 => 259, + // Spain. + 218 => 729, + // Sri Lanka. + 219 => 670, + // Sudan. + 220 => 295, + // Suriname. + 221 => 733, + // Svalbard and Jan Mayen Islands. + 222 => 780, + // Sweden. + 224 => 737, + // Switzerland. + 225 => 738, + // Syrian Arab Republic. + 226 => 112, + // Tajikistan. + 227 => 742, + // Thailand. + 228 => 741, + // The Republic of North Macedonia. + 229 => 678, + // Timor-Leste. + 230 => 744, + // Togo. + 231 => 745, + // Tokelau. + 232 => 777, + // Tonga. + 233 => 746, + // Trinidad and Tobago. + 234 => 747, + // Tunisia. + 235 => 748, + // Türkiye. + 236 => 113, + // Turkmenistan. + 237 => 743, + // Turks and Caicos Islands. + 238 => 740, + // Tuvalu. + 239 => 749, + // Uganda. + 240 => 220, + // Ukraine. + 241 => 751, + // United Arab Emirates. + 242 => 750, + // United Kingdom of Great Britain and Northern Ireland. + 243 => 634, + // United Republic of Tanzania. + 244 => 217, + // United States of America. + 245 => 753, + // United States Virgin Islands. + 246 => 774, + // Uruguay. + 247 => 752, + // Uzbekistan. + 248 => 754, + // Vanuatu. + 249 => 755, + // Venezuela (Bolivarian Republic of). + 250 => 758, + // Viet Nam. + 251 => 730, + // Wallis and Futuna (France). + 252 => 782, + // Western Sahara. + 253 => 760, + // World. + 254 => 9999, + // Yemen. + 255 => 225, + // Zambia. + 256 => 761, + // Zimbabwe. + 257 => 762, + ]; + + /** + * Language mapping. + * + * @var array + */ + protected array $languageMapping = [ + 'Arabic' => 6876, + 'English' => 267, + 'French' => 268, + 'Russian' => 10906, + 'Spanish' => 269, + ]; + + /** + * Content format mapping. + * + * @var array + */ + protected array $formatMapping = [ + // Analysis. + '3RP Documents' => 3, + 'Assessments' => 3, + 'Policy Papers' => 3, + 'Population Profiling' => 3, + 'Reports' => 3, + 'Reports and Assessments' => 3, + 'Reports and Policy Papers' => 3, + // Appeal. + 'National Refugee Response Plans' => 4, + 'Regional Response Plans' => 4, + 'Regional RRP Documents' => 4, + // Assessment. + 'CORE' => 5, + // Evaluation and Lessons Learned. + 'Promising Practices and Case Studies' => 6, + // Manual and Guideline. + 'Accountability and Inclusion' => 7, + 'Communication with Communities' => 7, + 'Countering Violent Extremism (CVE)' => 7, + 'CRRF' => 7, + 'Guidance' => 7, + 'Training Materials' => 7, + // News and Press Release. + 'Flash Update' => 8, + 'Media Reports' => 8, + 'Press Releases' => 8, + 'Protection Brief' => 8, + 'Updates' => 8, + // Other. + '3W' => 9, + 'Contact List' => 9, + 'COVID-19' => 9, + 'Funding' => 9, + 'Meeting Minutes' => 9, + 'Operations Cell' => 9, + 'Site Profiles' => 9, + 'Strategy Documents' => 9, + 'Terms of Reference (TOR)' => 9, + 'Webinars' => 9, + 'Who What Where' => 9, + // Situation Report. + 'Situation Reports' => 10, + 'Situation Reports / Updates' => 10, + 'Situation Updates' => 10, + // Map. + 'Maps' => 12, + // Infographic. + 'Dashboards & Factsheets' => 12570, + 'Data & Statistics' => 12570, + 'Statistics' => 12570, + ]; + + /** + * Theme mapping. + * + * @var array + */ + protected array $themeMapping = [ + // Shelter and Non-Food Items. + 'Basic Needs' => 4603, + // No good match. + 'Bureau' => NULL, + // Camp Coordination and Camp Management. + 'Camp Coordination and Management' => 49458, + // Humanitarian Financing. + 'Cash Assistance' => 4597, + // No good match. + 'Country Operation' => NULL, + // Recovery and Reconstruction. + 'Early Recovery' => 4601, + // Education. + 'Education' => 4592, + // Shelter and Non-Food Items. + 'Emergency Shelter and NFI' => 4603, + // Logistics and Telecommunications. + 'Emergency Telecommunications' => 4598, + // Food and Nutrition. + 'Food Security' => 4593, + // Health. + 'Health' => 4595, + // Protection and Human Rights. + 'Human Trafficking' => 4600, + // Logistics and Telecommunications. + 'Logistics' => 4598, + // No good match. + 'Other' => NULL, + // Protection and Human Rights. + 'Protection' => 4600, + // Water Sanitation Hygiene. + 'Water Sanitation Hygiene' => 4604, + ]; + + /** + * {@inheritdoc} + */ + public function buildConfigurationForm(array $form, FormStateInterface $form_state): array { + $form = parent::buildConfigurationForm($form, $form_state); + + return $form; + } + + /** + * {@inheritdoc} + */ + public function importContent(int $limit = 50): bool { + // Get list of documents. + try { + $timeout = $this->getPluginSetting('timeout') ?? 5; + $api_url = $this->getPluginSetting('api_url'); + $api_key = $this->getPluginSetting('api_key'); + $list_endpoint = $this->getPluginSetting('list_endpoint'); + $provider_uuid = $this->getPluginSetting('provider_uuid'); + + // Retrieve the POST API content processor plugin. + $plugin = $this->contentProcessorPluginManager->getPluginByResource('reports'); + + // Ensure the provider is valid. + $plugin->getProvider($provider_uuid); + + // Query the UNHCR API. + $query = http_build_query([ + 'API_KEY' => $api_key, + 'order' => ['created' => 'desc'], + 'limit' => $limit, + ]); + + $url = rtrim($api_url, '/') . '/' . trim($list_endpoint, '?/') . '?' . $query; + + $response = $this->httpClient->get($url, [ + 'connect_timeout' => $timeout, + 'timeout' => $timeout, + ]); + + if ($response->getStatusCode() !== 200) { + // @todo try to retrieve the error message. + throw new \Exception('Failure with response code: ' . $response->getStatusCode()); + } + + $content = $response->getBody()->getContents(); + + if (!empty($content)) { + $documents = json_decode($content, TRUE, flags: \JSON_THROW_ON_ERROR); + } + else { + $this->getLogger()->notice('No documents'); + return TRUE; + } + } + catch (\Exception $exception) { + $this->getLogger()->error($exception->getMessage()); + return FALSE; + } + + $processed = $this->processDocuments($documents, $provider_uuid, $plugin); + + // @todo check if we want to return TRUE only if there was no errors or if + // return TRUE for partial success is fine enough. + return $processed > 0; + } + + /** + * Process the documents retrieved from the UNHCR API. + * + * @param array $documents + * UNHCR documents. + * @param string $provider_uuid + * The provider UUID. + * @param \Drupal\reliefweb_post_api\Plugin\ContentProcessorPluginInterface $plugin + * The Post API content plugin processor used to import the documents. + * + * @return int + * The number of documents that were skipped or imported successfully. + */ + protected function processDocuments(array $documents, string $provider_uuid, ContentProcessorPluginInterface $plugin): int { + $schema = $this->getJsonSchema('report'); + + // Source: UNHCR. + $source = [2868]; + + // @todo we use that currently to allow the submissions because the body + // field is mandatory but this should be replaced with something else or + // allow an empty body. + $default_description = 'Please refer to the attachment.'; + + // The original mapping is ReliefWeb country ID to UNHCR country code for + // convenience in keeping it up to date. We need to flip it to easiy look + // up the ID from the UNHCR code below. + $country_mapping = array_flip(array_filter($this->countryMapping)); + + // Prepare the documents and submit them. + $processed = 0; + foreach ($documents as $document) { + // Retrieve the document ID. + if (!isset($document['id'])) { + $this->getLogger()->notice('Undefined document ID, skipping document import.'); + } + + // Retrieve the document URL. + if (!isset($document['documentLink'])) { + $this->getLogger()->notice(strtr('Undefined document URL for document ID: @id, skipping document import.', [ + '@id' => $document['id'], + ])); + } + $url = $document['documentLink']; + + // Generate the UUID for the document. + $uuid = $this->generateUuid($url); + + // Skip if there is already an entity with the same UUID. There is no + // good data in the UNHCR API that we can use to determine if the document + // has changed so we assume it hasn't and skip its processing. + // + // @todo maybe we can store the `updated` date of the document somewhere + // and use that to determine if the document has been updated. + $node = $this->entityRepository->loadEntityByUuid('node', $uuid); + if (isset($node)) { + $processed++; + continue; + } + + // Retrieve the title and clean it. + $title = trim($document['title'] ?? ''); + + // Retrieve the description and use the default one if empty. + $body = trim($document['description'] ?? '') ?: $default_description; + + // Retrieve the publication date. + $published = $document['publishDate'] ?? $document['created'] ?? NULL; + + // Retrieve the document languages and default to English if none of the + // supported languages were found. + $languages = []; + foreach ($document['languageName'] ?? [] as $language) { + // Note: UNHCR language items have a 'name' property. + if (isset($this->languageMapping[$language['name']])) { + $languages[$language['name']] = $this->languageMapping[$language['name']]; + } + } + if (empty($languages)) { + $languages['English'] = $this->languageMapping['English']; + } + + // Retrieve the content format and map it to 'Other' if there is no match. + $formats = [9]; + foreach ($document['docTypeName'] ?? [] as $type) { + // Note: UNHCR doc type items are name strings directly. + if (isset($this->formatMapping[$type])) { + $formats = [$this->formatMapping[$type]]; + break; + } + } + + // Retrieve the countries. Consider the first one as the primary country. + $countries = []; + foreach ($document['location'] ?? [] as $location) { + // Note: UNHCR location items have a 'code' property. + if (isset($country_mapping[$location['code']])) { + $countries[$location['code']] = $country_mapping[$location['code']]; + } + } + + // Retrieve the themes. + $themes = []; + foreach ($document['sectorName'] ?? [] as $sector) { + // Note: UNHCR sector items are name strings directly. + if (isset($this->themeMapping[$sector])) { + $themes[$sector] = $this->themeMapping[$sector]; + } + } + + // Retrieve the data for the attachment if any. + $files = []; + if (isset($document['downloadLink'])) { + $info = $this->getRemoteFileInfo($document['downloadLink']); + if (!empty($info)) { + $file_url = $document['downloadLink']; + $file_uuid = $this->generateUuid($file_url, $uuid); + $files[] = [ + 'url' => $file_url, + 'uuid' => $file_uuid, + ] + $info; + } + } + + // Submission data. + $data = array_filter([ + 'provider' => $provider_uuid, + 'bundle' => 'report', + 'url' => $url, + 'uuid' => $uuid, + 'title' => $title, + 'body' => $body, + 'source' => $source, + 'published' => $published, + 'origin' => $url, + 'language' => array_values($languages), + 'country' => array_values($countries), + 'format' => array_values($formats), + 'theme' => array_values($themes), + 'file' => array_values($files), + ]); + + // Submit the document directly, no need to go through the queue. + try { + $plugin->process($data, $schema); + $processed++; + } + catch (\Exception $exception) { + $this->getLogger()->error(strtr('Unable to process document @id: @exception', [ + '@id' => $document['id'], + '@exception' => $exception->getMessage(), + ])); + } + } + + return $processed; + } + + /** + * Get the checksum and filename of a remote file. + * + * @param string $url + * Remote file URL. + * @param string $max_size + * Maximum file size (ex: 2MB). Defaults to the environment upload max size. + * + * @return array + * Checksum and filenamne of the remote file. + */ + protected function getRemoteFileInfo(string $url, string $max_size = ''): array { + $max_size = $this->getReportAttachmentAllowedMaxSize(); + if (empty($max_size)) { + throw new \Exception('No allowed file max size.'); + } + + $allowed_extensions = $this->getReportAttachmentAllowedExtensions(); + if (empty($allowed_extensions)) { + throw new \Exception('No allowed file extensions.'); + } + + try { + $response = $this->httpClient->get($url, [ + 'stream' => TRUE, + // @todo retrieve that from the configuration. + 'connect_timeout' => 30, + 'timeout' => 600, + ]); + + if ($max_size > 0 && $response->getHeaderLine('Content-Length') > $max_size) { + throw new \Exception('File is too large.'); + } + + // Retrieve the filename. + $content_disposition = $response->getHeaderLine('Content-Disposition') ?? ''; + if (preg_match('/filename="?([^"]+)"?/i', $content_disposition, $matches) !== 1) { + throw new \Exception('Unable to retrieve file name.'); + } + + // Sanitize the file name. + $filename = $this->sanitizeFileName(urldecode($matches[1]), $allowed_extensions); + if (empty($filename)) { + throw new \Exception(strtr('Invalid filename: @filename.', [ + '@filename' => $matches[1], + ])); + } + + $body = $response->getBody(); + + $content = ''; + if ($max_size > 0) { + $size = 0; + while (!$body->eof()) { + $chunk = $body->read(1024); + $size += strlen($chunk); + if ($size > $max_size) { + $body->close(); + throw new \Exception('File is too large.'); + } + else { + $content .= $chunk; + } + } + } + else { + $content = $body->getContents(); + } + + $checksum = hash('sha256', $content); + } + catch (\Exception $exception) { + $this->getLogger()->notice(strtr('Unable to retrieve file information for @url: @exception', [ + '@url' => $url, + '@exception' => $exception->getMessage(), + ])); + return []; + } + finally { + if (isset($body)) { + $body->close(); + } + } + + return [ + 'checksum' => $checksum, + 'filename' => $filename, + ]; + } + + /** + * Sanitize a file name. + * + * @param string $filename + * File name to sanitize. + * @param array $allowed_extensions + * Allowed file name extensions. + * + * @return string + * Sanitized file name. + * + * @see \Drupal\system\EventSubscriber\SecurityFileUploadEventSubscriber::sanitizeName() + */ + protected function sanitizeFileName(string $filename, array $allowed_extensions = []): string { + if (empty($allowed_extensions)) { + return ''; + } + + // Always rename dot files. + $filename = trim($filename, '.'); + // Remove any null bytes. + // @see https://php.net/manual/security.filesystem.nullbytes.php + $filename = str_replace(chr(0), '', $filename); + // Split up the filename by periods. The first part becomes the basename, + // the last part the final extension. + $filename_parts = explode('.', $filename); + // Remove file basename. + $basename = array_shift($filename_parts); + // Remove final extension. + $extension = strtolower((string) array_pop($filename_parts)); + + // Ensure the extension is allowed. + if (!in_array($extension, $allowed_extensions)) { + return ''; + } + return $basename . '.' . $extension; + } + + /** + * {@inheritdoc} + */ + public function getJsonSchema(string $bundle): string { + $schema = parent::getJsonSchema($bundle); + $decoded = Json::decode($schema); + if ($decoded) { + // Allow attachment URLs without a PDF extension. + unset($decoded['properties']['file']['items']['properties']['url']['pattern']); + $schema = Json::encode($decoded); + } + return $schema; + } + +} diff --git a/html/modules/custom/reliefweb_import/src/Plugin/ImporterPluginBase.php b/html/modules/custom/reliefweb_import/src/Plugin/ReliefWebImporterPluginBase.php similarity index 52% rename from html/modules/custom/reliefweb_import/src/Plugin/ImporterPluginBase.php rename to html/modules/custom/reliefweb_import/src/Plugin/ReliefWebImporterPluginBase.php index ebe24bb41..79e5a1a5a 100644 --- a/html/modules/custom/reliefweb_import/src/Plugin/ImporterPluginBase.php +++ b/html/modules/custom/reliefweb_import/src/Plugin/ReliefWebImporterPluginBase.php @@ -5,21 +5,30 @@ namespace Drupal\reliefweb_import\Plugin; use Drupal\Component\Plugin\ConfigurableInterface; +use Drupal\Component\Utility\Bytes; +use Drupal\Component\Utility\Environment; use Drupal\Component\Utility\NestedArray; use Drupal\Core\Config\ConfigFactoryInterface; +use Drupal\Core\Entity\EntityFieldManagerInterface; +use Drupal\Core\Entity\EntityRepositoryInterface; +use Drupal\Core\Extension\ExtensionPathResolver; use Drupal\Core\Form\FormStateInterface; use Drupal\Core\Logger\LoggerChannelFactoryInterface; use Drupal\Core\Plugin\ContainerFactoryPluginInterface; use Drupal\Core\Plugin\PluginBase; use Drupal\Core\Plugin\PluginFormInterface; use Drupal\reliefweb_import\Exception\InvalidConfigurationException; +use Drupal\reliefweb_post_api\Plugin\ContentProcessorPluginManagerInterface; +use GuzzleHttp\ClientInterface; use Psr\Log\LoggerInterface; use Symfony\Component\DependencyInjection\ContainerInterface; +use Symfony\Component\Mime\MimeTypeGuesserInterface; +use Symfony\Component\Uid\Uuid; /** * Base importer plugin class. */ -abstract class ImporterPluginBase extends PluginBase implements ImporterPluginInterface, ContainerFactoryPluginInterface, PluginFormInterface, ConfigurableInterface { +abstract class ReliefWebImporterPluginBase extends PluginBase implements ReliefWebImporterPluginInterface, ContainerFactoryPluginInterface, PluginFormInterface, ConfigurableInterface { /** * Logger. @@ -41,6 +50,18 @@ abstract class ImporterPluginBase extends PluginBase implements ImporterPluginIn * The config factory service. * @param \Drupal\Core\Logger\LoggerChannelFactoryInterface $loggerFactory * The logger factory service. + * @param \GuzzleHttp\ClientInterface $httpClient + * The HTTP client. + * @param \Symfony\Component\Mime\MimeTypeGuesserInterface $mimeTypeGuesser + * The mime type guesser. + * @param \Drupal\Core\Entity\EntityFieldManagerInterface $entityFieldManager + * The entity field manager. + * @param \Drupal\Core\Entity\EntityRepositoryInterface $entityRepository + * The entity repository. + * @param \Drupal\reliefweb_post_api\Plugin\ContentProcessorPluginManagerInterface $contentProcessorPluginManager + * The Post API content processor plugin manager. + * @param \Drupal\Core\Extension\ExtensionPathResolver $pathResolver + * The path resolver service. */ public function __construct( array $configuration, @@ -48,6 +69,12 @@ public function __construct( $plugin_definition, protected ConfigFactoryInterface $configFactory, protected LoggerChannelFactoryInterface $loggerFactory, + protected ClientInterface $httpClient, + protected MimeTypeGuesserInterface $mimeTypeGuesser, + protected EntityFieldManagerInterface $entityFieldManager, + protected EntityRepositoryInterface $entityRepository, + protected ContentProcessorPluginManagerInterface $contentProcessorPluginManager, + protected ExtensionPathResolver $pathResolver, ) { parent::__construct( $configuration, @@ -66,6 +93,12 @@ public static function create(ContainerInterface $container, array $configuratio $plugin_definition, $container->get('config.factory'), $container->get('logger.factory'), + $container->get('http_client'), + $container->get('file.mime_type.guesser.extension'), + $container->get('entity_field.manager'), + $container->get('entity.repository'), + $container->get('plugin.manager.reliefweb_post_api.content_processor'), + $container->get('extension.path.resolver'), ); } @@ -84,6 +117,13 @@ public function getPluginType(): string { return 'reliefweb_importer'; } + /** + * {@inheritdoc} + */ + public function enabled(): bool { + return $this->getPluginSetting('enabled', FALSE, FALSE); + } + /** * {@inheritdoc} */ @@ -168,4 +208,53 @@ public function defaultConfiguration(): array { return []; } + /** + * {@inheritdoc} + */ + public function getReportAttachmentAllowedExtensions(): array { + $definitions = $this->entityFieldManager->getFieldDefinitions('node', 'report'); + if (isset($definitions['field_file'])) { + $extensions = $definitions['field_file']->getSetting('file_extensions') ?? ''; + return explode(' ', $extensions); + } + return []; + } + + /** + * {@inheritdoc} + */ + public function getReportAttachmentAllowedMaxSize(): int { + $definitions = $this->entityFieldManager->getFieldDefinitions('node', 'report'); + if (isset($definitions['field_file'])) { + $max_size = $definitions['field_file']->getSetting('max_filesize') ?? ''; + $max_size = !empty($max_size) ? Bytes::toNumber($max_size) : Environment::getUploadMaxSize(); + return (int) $max_size; + } + return 0; + } + + /** + * {@inheritdoc} + */ + public function getJsonSchema(string $bundle): string { + $path = $this->pathResolver->getPath('module', 'reliefweb_post_api'); + $schema = @file_get_contents($path . '/schemas/v2/' . $bundle . '.json'); + if ($schema === FALSE) { + throw new ContentProcessorException(strtr('Missing @bundle JSON schema.', [ + '@bundle' => $bundle, + ])); + } + return $schema; + } + + /** + * {@inheritdoc} + */ + public function generateUuid(string $string, ?string $namespace = NULL): string { + /* The default namespace is the UUID generated with + * Uuid::v5(Uuid::fromString(Uuid::NAMESPACE_DNS), 'reliefweb.int')->toRfc4122(); */ + $namespace = $namespace ?? '8e27a998-c362-5d1f-b152-d474e1d36af2'; + return Uuid::v5(Uuid::fromString($namespace), $string)->toRfc4122(); + } + } diff --git a/html/modules/custom/reliefweb_import/src/Plugin/ImporterPluginInterface.php b/html/modules/custom/reliefweb_import/src/Plugin/ReliefWebImporterPluginInterface.php similarity index 58% rename from html/modules/custom/reliefweb_import/src/Plugin/ImporterPluginInterface.php rename to html/modules/custom/reliefweb_import/src/Plugin/ReliefWebImporterPluginInterface.php index 19e1434af..8ac6d3304 100644 --- a/html/modules/custom/reliefweb_import/src/Plugin/ImporterPluginInterface.php +++ b/html/modules/custom/reliefweb_import/src/Plugin/ReliefWebImporterPluginInterface.php @@ -9,7 +9,7 @@ /** * Interface for the importer plugins. */ -interface ImporterPluginInterface { +interface ReliefWebImporterPluginInterface { /** * Get the plugin label. @@ -27,6 +27,14 @@ public function getPluginLabel(): string; */ public function getPluginType(): string; + /** + * Check if the plugin is enabled. + * + * @return bool + * TRUE if enabled. + */ + public function enabled(): bool; + /** * Get the plugin logger. * @@ -65,4 +73,44 @@ public function getPluginSetting(string $key, mixed $default = NULL, bool $throw */ public function importContent(int $limit = 50): bool; + /** + * Get the list of allowed extensions for the report attachments. + * + * @return array + * List of allowed extensions. + */ + public function getReportAttachmentAllowedExtensions(): array; + + /** + * Get the allowed max size of the report attachments. + * + * @return int + * Allowed max size in bytes. + */ + public function getReportAttachmentAllowedMaxSize(): int; + + /** + * Retrieve a Post API schema. + * + * @param string $bundle + * Resource bundle. + * + * @return string + * Schema. + */ + public function getJsonSchema(string $bundle): string; + + /** + * Generate a UUID for a string (ex: URL). + * + * @param string $string + * String for which to generate a UUID. + * @param string|null $namespace + * Optional namespace. Defaults to `Uuid::NAMESPACE_URL`. + * + * @return string + * UUID. + */ + public function generateUuid(string $string, ?string $namespace = NULL): string; + } diff --git a/html/modules/custom/reliefweb_import/src/Plugin/ImporterPluginManager.php b/html/modules/custom/reliefweb_import/src/Plugin/ReliefWebImporterPluginManager.php similarity index 68% rename from html/modules/custom/reliefweb_import/src/Plugin/ImporterPluginManager.php rename to html/modules/custom/reliefweb_import/src/Plugin/ReliefWebImporterPluginManager.php index c5a1d7d98..89579ff96 100644 --- a/html/modules/custom/reliefweb_import/src/Plugin/ImporterPluginManager.php +++ b/html/modules/custom/reliefweb_import/src/Plugin/ReliefWebImporterPluginManager.php @@ -7,12 +7,12 @@ use Drupal\Core\Cache\CacheBackendInterface; use Drupal\Core\Extension\ModuleHandlerInterface; use Drupal\Core\Plugin\DefaultPluginManager; -use Drupal\reliefweb_import\Attribute\Importer; +use Drupal\reliefweb_import\Attribute\ReliefWebImporter; /** - * Plugin manager for the importer plugins. + * Plugin manager for the ReliefWeb importer plugins. */ -class ImporterPluginManager extends DefaultPluginManager implements ImporterPluginManagerInterface { +class ReliefWebImporterPluginManager extends DefaultPluginManager implements ReliefWebImporterPluginManagerInterface { /** * {@inheritdoc} @@ -26,8 +26,8 @@ public function __construct( 'Plugin/ReliefWebImporter', $namespaces, $module_handler, - ImporterPluginInterface::class, - Importer::class + ReliefWebImporterPluginInterface::class, + ReliefWebImporter::class ); $this->setCacheBackend($cache_backend, 'reliefweb_import_reliefweb_importer_plugins'); diff --git a/html/modules/custom/reliefweb_import/src/Plugin/ImporterPluginManagerInterface.php b/html/modules/custom/reliefweb_import/src/Plugin/ReliefWebImporterPluginManagerInterface.php similarity index 71% rename from html/modules/custom/reliefweb_import/src/Plugin/ImporterPluginManagerInterface.php rename to html/modules/custom/reliefweb_import/src/Plugin/ReliefWebImporterPluginManagerInterface.php index 4dd7c5345..d1afeb7d6 100644 --- a/html/modules/custom/reliefweb_import/src/Plugin/ImporterPluginManagerInterface.php +++ b/html/modules/custom/reliefweb_import/src/Plugin/ReliefWebImporterPluginManagerInterface.php @@ -7,4 +7,4 @@ /** * Interface for the importer plugin manager. */ -interface ImporterPluginManagerInterface {} +interface ReliefWebImporterPluginManagerInterface {}