From fbb4b0650934ec55a4d93c22928ea15aa9339af0 Mon Sep 17 00:00:00 2001 From: "Peter Droogmans (attiks)" Date: Wed, 13 Sep 2023 15:20:52 +0200 Subject: [PATCH 01/18] chore: permissions --- README.md | 10 ++ config/core.extension.yml | 1 - config/system.site.yml | 2 +- config/user.role.authenticated.yml | 12 ++ config/views.view.frontpage.yml | 260 ++++++++++++++++++++++------- 5 files changed, 225 insertions(+), 60 deletions(-) diff --git a/README.md b/README.md index ae626c4..44be894 100644 --- a/README.md +++ b/README.md @@ -4,3 +4,13 @@ Uses AI to summarize PDF files +## Flow + +1. User creates a new *Summary* node providing a title and a PDF file +2. A queue item is created to extract the text +3. `drush queue:process ocha_ai_summarize_extract_text` +4. The node is updated and the extracted text is added +5. A queue item is created to summarize the text +6. `drush queue:process ocha_ai_summarize_summarize` +7. The node is updated and the summary is added +8. User can proof-read and publish the node diff --git a/config/core.extension.yml b/config/core.extension.yml index f234c4c..fded26b 100644 --- a/config/core.extension.yml +++ b/config/core.extension.yml @@ -15,7 +15,6 @@ module: ctools: 0 datetime: 0 datetime_range: 0 - devel_php: 0 dynamic_page_cache: 0 editor: 0 entity_reference_revisions: 0 diff --git a/config/system.site.yml b/config/system.site.yml index aa655a2..430bca7 100644 --- a/config/system.site.yml +++ b/config/system.site.yml @@ -8,7 +8,7 @@ slogan: '' page: 403: '' 404: '' - front: /user/login + front: /node admin_compact_mode: false weight_select_max: 100 default_langcode: en diff --git a/config/user.role.authenticated.yml b/config/user.role.authenticated.yml index 80b2849..7c6b3a6 100644 --- a/config/user.role.authenticated.yml +++ b/config/user.role.authenticated.yml @@ -2,8 +2,13 @@ uuid: a0dde013-3f0e-4c0a-9dbc-7689935d777e langcode: en status: true dependencies: + config: + - node.type.summary + - workflows.workflow.summarize module: + - content_moderation - media + - node - system _core: default_config_hash: dJ0L2DNSj5q6XVZAGsuVDpJTh5UeYkIPwKrUOOpr8YI @@ -13,4 +18,11 @@ weight: 1 is_admin: false permissions: - 'access content' + - 'create summary content' + - 'delete own summary content' + - 'edit own summary content' + - 'use summarize transition archive' + - 'use summarize transition pdf_uploaded' + - 'use summarize transition published' + - 'view any unpublished content' - 'view media' diff --git a/config/views.view.frontpage.yml b/config/views.view.frontpage.yml index 142734e..abe9ba8 100644 --- a/config/views.view.frontpage.yml +++ b/config/views.view.frontpage.yml @@ -6,6 +6,7 @@ dependencies: - core.entity_view_mode.node.rss - core.entity_view_mode.node.teaser module: + - content_moderation - node - user _core: @@ -25,7 +26,202 @@ display: position: 0 display_options: title: '' - fields: { } + fields: + title: + id: title + table: node_field_data + field: title + relationship: none + group_type: group + admin_label: '' + entity_type: node + entity_field: title + plugin_id: field + label: Title + exclude: false + alter: + alter_text: false + text: '' + make_link: false + path: '' + absolute: false + external: false + replace_spaces: false + path_case: none + trim_whitespace: false + alt: '' + rel: '' + link_class: '' + prefix: '' + suffix: '' + target: '' + nl2br: false + max_length: 0 + word_boundary: true + ellipsis: true + more_link: false + more_link_text: '' + more_link_path: '' + strip_tags: false + trim: false + preserve_tags: '' + html: false + element_type: '' + element_class: '' + element_label_type: '' + element_label_class: '' + element_label_colon: true + element_wrapper_type: '' + element_wrapper_class: '' + element_default_classes: true + empty: '' + hide_empty: false + empty_zero: false + hide_alter_empty: true + click_sort_column: value + type: string + settings: + link_to_entity: true + group_column: value + group_columns: { } + group_rows: true + delta_limit: 0 + delta_offset: 0 + delta_reversed: false + delta_first_last: false + multi_type: separator + separator: ', ' + field_api_classes: false + status: + id: status + table: node_field_data + field: status + relationship: none + group_type: group + admin_label: '' + entity_type: node + entity_field: status + plugin_id: field + label: Published + exclude: false + alter: + alter_text: false + text: '' + make_link: false + path: '' + absolute: false + external: false + replace_spaces: false + path_case: none + trim_whitespace: false + alt: '' + rel: '' + link_class: '' + prefix: '' + suffix: '' + target: '' + nl2br: false + max_length: 0 + word_boundary: true + ellipsis: true + more_link: false + more_link_text: '' + more_link_path: '' + strip_tags: false + trim: false + preserve_tags: '' + html: false + element_type: '' + element_class: '' + element_label_type: '' + element_label_class: '' + element_label_colon: true + element_wrapper_type: '' + element_wrapper_class: '' + element_default_classes: true + empty: '' + hide_empty: false + empty_zero: false + hide_alter_empty: true + click_sort_column: value + type: boolean + settings: + format: default + format_custom_false: '' + format_custom_true: '' + group_column: value + group_columns: { } + group_rows: true + delta_limit: 0 + delta_offset: 0 + delta_reversed: false + delta_first_last: false + multi_type: separator + separator: ', ' + field_api_classes: false + moderation_state: + id: moderation_state + table: node_field_data + field: moderation_state + relationship: none + group_type: group + admin_label: '' + entity_type: node + plugin_id: moderation_state_field + label: 'Moderation state' + exclude: false + alter: + alter_text: false + text: '' + make_link: false + path: '' + absolute: false + external: false + replace_spaces: false + path_case: none + trim_whitespace: false + alt: '' + rel: '' + link_class: '' + prefix: '' + suffix: '' + target: '' + nl2br: false + max_length: 0 + word_boundary: true + ellipsis: true + more_link: false + more_link_text: '' + more_link_path: '' + strip_tags: false + trim: false + preserve_tags: '' + html: false + element_type: '' + element_class: '' + element_label_type: '' + element_label_class: '' + element_label_colon: true + element_wrapper_type: '' + element_wrapper_class: '' + element_default_classes: true + empty: '' + hide_empty: false + empty_zero: false + hide_alter_empty: true + click_sort_column: value + type: content_moderation_state + settings: { } + group_column: value + group_columns: { } + group_rows: true + delta_limit: 0 + delta_offset: 0 + delta_reversed: false + delta_first_last: false + multi_type: separator + separator: ', ' + field_api_classes: false pager: type: full options: @@ -133,59 +329,6 @@ display: granularity: second arguments: { } filters: - promote: - id: promote - table: node_field_data - field: promote - relationship: none - group_type: group - admin_label: '' - entity_type: node - entity_field: promote - plugin_id: boolean - operator: '=' - value: '1' - group: 1 - exposed: false - expose: - operator_id: '' - label: '' - description: '' - use_operator: false - operator: '' - operator_limit_selection: false - operator_list: { } - identifier: '' - required: false - remember: false - multiple: false - remember_roles: - authenticated: authenticated - is_grouped: false - group_info: - label: '' - description: '' - identifier: '' - optional: true - widget: select - multiple: false - remember: false - default_group: All - default_group_multiple: { } - group_items: { } - status: - id: status - table: node_field_data - field: status - entity_type: node - entity_field: status - plugin_id: boolean - value: '1' - group: 1 - expose: - operator: '' - operator_limit_selection: false - operator_list: { } langcode: id: langcode table: node_field_data @@ -229,12 +372,10 @@ display: default_group_multiple: { } group_items: { } style: - type: default + type: table options: - grouping: { } row_class: '' - default_row_class: true - uses_fields: false + default_row_class: false row: type: 'entity:node' options: @@ -254,6 +395,7 @@ display: cache_metadata: max-age: -1 contexts: + - 'languages:language_content' - 'languages:language_interface' - url.query_args - 'user.node_grants:view' @@ -290,6 +432,7 @@ display: cache_metadata: max-age: -1 contexts: + - 'languages:language_content' - 'languages:language_interface' - 'user.node_grants:view' - user.permissions @@ -305,6 +448,7 @@ display: cache_metadata: max-age: -1 contexts: + - 'languages:language_content' - 'languages:language_interface' - url.query_args - 'user.node_grants:view' From 647df5930f7936eff2e3dcb258d0de4efee22a01 Mon Sep 17 00:00:00 2001 From: "Peter Droogmans (attiks)" Date: Wed, 13 Sep 2023 15:55:42 +0200 Subject: [PATCH 02/18] chore: Add pdf tools --- docker/Dockerfile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 475c4d3..3a1046c 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,5 +1,5 @@ # Build the code. -FROM public.ecr.aws/unocha/unified-builder:8.1-stable as builder +FROM public.ecr.aws/unocha/php-k8s:8.1-stable as builder ARG BRANCH_ENVIRONMENT @@ -10,6 +10,10 @@ COPY . /srv/www WORKDIR /srv/www +# Add pdfinfo and pdftotext +RUN apk add -U poppler-utils && \ + rm -rf /var/cache/apk/* + # Clean up previous composer installation and run new one. RUN rm -rf ./vendor && composer install --no-interaction --no-dev From b1eed1ecd853a5f9a843b61561f77cce7653de43 Mon Sep 17 00:00:00 2001 From: "Peter Droogmans (attiks)" Date: Wed, 13 Sep 2023 16:00:19 +0200 Subject: [PATCH 03/18] chore: Add pdf tools --- docker/Dockerfile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 3a1046c..4311423 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -10,9 +10,8 @@ COPY . /srv/www WORKDIR /srv/www -# Add pdfinfo and pdftotext -RUN apk add -U poppler-utils && \ - rm -rf /var/cache/apk/* +# Add pdfinfo and pdftotext. +RUN apk add -U --no-cache poppler-utils # Clean up previous composer installation and run new one. RUN rm -rf ./vendor && composer install --no-interaction --no-dev From b30126c280c2bfbc67780f6d20f265e6d4c65287 Mon Sep 17 00:00:00 2001 From: "Peter Droogmans (attiks)" Date: Thu, 14 Sep 2023 11:32:19 +0200 Subject: [PATCH 04/18] feat: Choose brain, automatic mode --- README.md | 19 +- ...tity_form_display.node.summary.default.yml | 55 +++--- ...tity_view_display.node.summary.default.yml | 32 +++- ...ntity_view_display.node.summary.teaser.yml | 4 + ...ield.field.node.summary.field_ai_brain.yml | 23 +++ ...ield.node.summary.field_automatic_mode.yml | 23 +++ config/field.storage.node.field_ai_brain.yml | 27 +++ ...ield.storage.node.field_automatic_mode.yml | 18 ++ config/user.role.authenticated.yml | 4 + config/workflows.workflow.summarize.yml | 58 +++++-- .../ocha_ai_summarize.module | 164 ++++++++++++++++-- .../OchaAiSummarizeExtractText.php | 19 +- .../QueueWorker/OchaAiSummarizeSummarize.php | 68 +++++--- 13 files changed, 418 insertions(+), 96 deletions(-) create mode 100644 config/field.field.node.summary.field_ai_brain.yml create mode 100644 config/field.field.node.summary.field_automatic_mode.yml create mode 100644 config/field.storage.node.field_ai_brain.yml create mode 100644 config/field.storage.node.field_automatic_mode.yml diff --git a/README.md b/README.md index 44be894..7dd3b9e 100644 --- a/README.md +++ b/README.md @@ -4,9 +4,26 @@ Uses AI to summarize PDF files +## Settings + +```php +$config['ocha_ai_summarize.settings']['openai_token'] = 'xxx'; +$config['ocha_ai_summarize.settings']['azure_endpoint'] = 'https://tst003.openai.azure.com/openai/deployments/tst003/chat/completions?api-version=2023-03-15-preview'; +$config['ocha_ai_summarize.settings']['azure_apikey'] = 'yyy'; +``` + +## Cron + +```bash +drush queue:process ocha_ai_summarize_extract_text +drush queue:process ocha_ai_summarize_summarize +``` + +We can either use cron to run the queues or run them separatly + ## Flow -1. User creates a new *Summary* node providing a title and a PDF file +1. User creates a new *Summary* node providing a title and a PDF file and which brain to use 2. A queue item is created to extract the text 3. `drush queue:process ocha_ai_summarize_extract_text` 4. The node is updated and the extracted text is added diff --git a/config/core.entity_form_display.node.summary.default.yml b/config/core.entity_form_display.node.summary.default.yml index 0adcdaa..c156116 100644 --- a/config/core.entity_form_display.node.summary.default.yml +++ b/config/core.entity_form_display.node.summary.default.yml @@ -3,6 +3,8 @@ langcode: en status: true dependencies: config: + - field.field.node.summary.field_ai_brain + - field.field.node.summary.field_automatic_mode - field.field.node.summary.field_pdf - field.field.node.summary.field_pdf_text - field.field.node.summary.field_summary @@ -19,20 +21,33 @@ mode: default content: created: type: datetime_timestamp - weight: 10 + weight: 11 + region: content + settings: { } + third_party_settings: { } + field_ai_brain: + type: options_select + weight: 2 region: content settings: { } third_party_settings: { } + field_automatic_mode: + type: boolean_checkbox + weight: 3 + region: content + settings: + display_label: true + third_party_settings: { } field_pdf: type: file_generic - weight: 122 + weight: 4 region: content settings: progress_indicator: throbber third_party_settings: { } field_pdf_text: type: string_textarea - weight: 123 + weight: 5 region: content settings: rows: 5 @@ -40,7 +55,7 @@ content: third_party_settings: { } field_summary: type: string_textarea - weight: 124 + weight: 6 region: content settings: rows: 5 @@ -48,47 +63,33 @@ content: third_party_settings: { } langcode: type: language_select - weight: 2 + weight: 1 region: content settings: include_locked: true third_party_settings: { } moderation_state: type: moderation_state_default - weight: 100 + weight: 7 region: content settings: { } third_party_settings: { } path: type: path - weight: 30 + weight: 8 region: content settings: { } third_party_settings: { } - promote: - type: boolean_checkbox - weight: 15 - region: content - settings: - display_label: true - third_party_settings: { } status: type: boolean_checkbox - weight: 120 - region: content - settings: - display_label: true - third_party_settings: { } - sticky: - type: boolean_checkbox - weight: 16 + weight: 10 region: content settings: display_label: true third_party_settings: { } title: type: string_textfield - weight: -5 + weight: 0 region: content settings: size: 60 @@ -96,7 +97,7 @@ content: third_party_settings: { } uid: type: entity_reference_autocomplete - weight: 5 + weight: 12 region: content settings: match_operator: CONTAINS @@ -105,8 +106,10 @@ content: placeholder: '' third_party_settings: { } url_redirects: - weight: 50 + weight: 9 region: content settings: { } third_party_settings: { } -hidden: { } +hidden: + promote: true + sticky: true diff --git a/config/core.entity_view_display.node.summary.default.yml b/config/core.entity_view_display.node.summary.default.yml index 3d4cb9b..760fda0 100644 --- a/config/core.entity_view_display.node.summary.default.yml +++ b/config/core.entity_view_display.node.summary.default.yml @@ -3,39 +3,63 @@ langcode: en status: true dependencies: config: + - field.field.node.summary.field_ai_brain + - field.field.node.summary.field_automatic_mode - field.field.node.summary.field_pdf - field.field.node.summary.field_pdf_text - field.field.node.summary.field_summary - node.type.summary module: - file + - options - user id: node.summary.default targetEntityType: node bundle: summary mode: default content: + content_moderation_control: + settings: { } + third_party_settings: { } + weight: 4 + region: content + field_ai_brain: + type: list_default + label: inline + settings: { } + third_party_settings: { } + weight: 1 + region: content + field_automatic_mode: + type: boolean + label: inline + settings: + format: default + format_custom_false: '' + format_custom_true: '' + third_party_settings: { } + weight: 0 + region: content field_pdf: type: file_default label: inline settings: use_description_as_link_text: true third_party_settings: { } - weight: 0 + weight: 2 region: content field_summary: type: basic_string label: hidden settings: { } third_party_settings: { } - weight: 1 + weight: 3 region: content links: settings: { } third_party_settings: { } - weight: 2 + weight: 5 region: content hidden: - content_moderation_control: true field_pdf_text: true langcode: true diff --git a/config/core.entity_view_display.node.summary.teaser.yml b/config/core.entity_view_display.node.summary.teaser.yml index 0b3ac80..83b2c99 100644 --- a/config/core.entity_view_display.node.summary.teaser.yml +++ b/config/core.entity_view_display.node.summary.teaser.yml @@ -4,6 +4,8 @@ status: true dependencies: config: - core.entity_view_mode.node.teaser + - field.field.node.summary.field_ai_brain + - field.field.node.summary.field_automatic_mode - field.field.node.summary.field_pdf - field.field.node.summary.field_pdf_text - field.field.node.summary.field_summary @@ -26,6 +28,8 @@ content: weight: 100 region: content hidden: + field_ai_brain: true + field_automatic_mode: true field_pdf: true field_pdf_text: true field_summary: true diff --git a/config/field.field.node.summary.field_ai_brain.yml b/config/field.field.node.summary.field_ai_brain.yml new file mode 100644 index 0000000..2af4a7c --- /dev/null +++ b/config/field.field.node.summary.field_ai_brain.yml @@ -0,0 +1,23 @@ +uuid: 6d6ea603-35ad-402e-a8a8-8b6394f74b47 +langcode: en +status: true +dependencies: + config: + - field.storage.node.field_ai_brain + - node.type.summary + module: + - options +id: node.summary.field_ai_brain +field_name: field_ai_brain +entity_type: node +bundle: summary +label: 'AI Brain' +description: '' +required: false +translatable: false +default_value: + - + value: openai +default_value_callback: '' +settings: { } +field_type: list_string diff --git a/config/field.field.node.summary.field_automatic_mode.yml b/config/field.field.node.summary.field_automatic_mode.yml new file mode 100644 index 0000000..b539e42 --- /dev/null +++ b/config/field.field.node.summary.field_automatic_mode.yml @@ -0,0 +1,23 @@ +uuid: 2a2707bf-8013-4683-972b-0ad787b17223 +langcode: en +status: true +dependencies: + config: + - field.storage.node.field_automatic_mode + - node.type.summary +id: node.summary.field_automatic_mode +field_name: field_automatic_mode +entity_type: node +bundle: summary +label: 'Automatic mode' +description: 'When enabled will automatically run the whole flow without any user interaction needed.' +required: false +translatable: false +default_value: + - + value: 1 +default_value_callback: '' +settings: + on_label: 'On' + off_label: 'Off' +field_type: boolean diff --git a/config/field.storage.node.field_ai_brain.yml b/config/field.storage.node.field_ai_brain.yml new file mode 100644 index 0000000..b782346 --- /dev/null +++ b/config/field.storage.node.field_ai_brain.yml @@ -0,0 +1,27 @@ +uuid: ff29ae84-1cdf-49a6-ae9c-c71f8dfd7f0d +langcode: en +status: true +dependencies: + module: + - node + - options +id: node.field_ai_brain +field_name: field_ai_brain +entity_type: node +type: list_string +settings: + allowed_values: + - + value: openai + label: 'Open AI (chat-gpt-3.5)' + - + value: azure_trained + label: 'Trained Azure AI' + allowed_values_function: '' +module: options +locked: false +cardinality: 1 +translatable: true +indexes: { } +persist_with_no_fields: false +custom_storage: false diff --git a/config/field.storage.node.field_automatic_mode.yml b/config/field.storage.node.field_automatic_mode.yml new file mode 100644 index 0000000..4711735 --- /dev/null +++ b/config/field.storage.node.field_automatic_mode.yml @@ -0,0 +1,18 @@ +uuid: ef0e4fe9-eab6-47f4-97a0-6d1dd7373811 +langcode: en +status: true +dependencies: + module: + - node +id: node.field_automatic_mode +field_name: field_automatic_mode +entity_type: node +type: boolean +settings: { } +module: core +locked: false +cardinality: 1 +translatable: true +indexes: { } +persist_with_no_fields: false +custom_storage: false diff --git a/config/user.role.authenticated.yml b/config/user.role.authenticated.yml index 7c6b3a6..8c0c44e 100644 --- a/config/user.role.authenticated.yml +++ b/config/user.role.authenticated.yml @@ -22,7 +22,11 @@ permissions: - 'delete own summary content' - 'edit own summary content' - 'use summarize transition archive' + - 'use summarize transition extrat_text' - 'use summarize transition pdf_uploaded' - 'use summarize transition published' + - 'use summarize transition summarize' + - 'use summarize transition summarized_update' + - 'use summarize transition text_extracted_update' - 'view any unpublished content' - 'view media' diff --git a/config/workflows.workflow.summarize.yml b/config/workflows.workflow.summarize.yml index 14ea942..fba5559 100644 --- a/config/workflows.workflow.summarize.yml +++ b/config/workflows.workflow.summarize.yml @@ -13,27 +13,37 @@ type_settings: states: archived: label: Archived - weight: 2 + weight: 3 published: false default_revision: false draft: label: Draft - weight: -3 + weight: -4 + published: false + default_revision: false + extract_text: + label: 'Extract text' + weight: -2 published: false default_revision: false pdf_uploaded: label: 'PDF uploaded' - weight: -2 + weight: -3 published: false default_revision: false published: label: Published - weight: 1 + weight: 2 published: true default_revision: true + summarize: + label: Summarize + weight: 0 + published: false + default_revision: false summarized: label: Summarized - weight: 0 + weight: 1 published: false default_revision: false text_extracted: @@ -47,35 +57,59 @@ type_settings: from: - published to: archived - weight: 1 + weight: 2 archived_published: label: Restore from: - archived to: published - weight: 2 + weight: 3 + extrat_text: + label: 'Extrat text' + from: + - pdf_uploaded + to: extract_text + weight: -4 pdf_uploaded: label: 'PDF uploaded' from: - draft to: pdf_uploaded - weight: -3 + weight: -5 published: label: Published from: - summarized to: published - weight: 0 + weight: 1 + summarize: + label: Summarize + from: + - text_extracted + to: summarize + weight: -1 summarized: label: Summarized from: - - text_extracted + - summarize to: summarized - weight: -1 + weight: 0 + summarized_update: + label: 'Summarized (update)' + from: + - summarized + to: summarized + weight: 4 text_extracted: label: 'Text extracted' from: - - pdf_uploaded + - extract_text + to: text_extracted + weight: -3 + text_extracted_update: + label: 'Text extracted (update)' + from: + - text_extracted to: text_extracted weight: -2 entity_types: diff --git a/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module b/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module index 092566b..16e9e08 100644 --- a/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module +++ b/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module @@ -8,6 +8,7 @@ use Drupal\content_moderation\Entity\ContentModerationState; use Drupal\Core\Entity\Display\EntityViewDisplayInterface; use Drupal\Core\Entity\EntityInterface; +use Drupal\Core\Form\FormStateInterface; use Symfony\Component\Process\Process; /** @@ -25,21 +26,46 @@ function ocha_ai_summarize_node_update(EntityInterface $entity) { return; } - $queue_name = ''; - $content_moderation_state = ContentModerationState::loadFromModeratedEntity($entity); - if (!$content_moderation_state) { - $queue_name = 'ocha_ai_summarize_extract_text'; + /** @var \Drupal\content_moderation\ModerationInformationInterface $moderation_info */ + $moderation_info = Drupal::service('content_moderation.moderation_information'); + + // Check to see if the content is moderated or not. + $is_moderated = $moderation_info->isModeratedEntity($entity); + + if (!$is_moderated) { + return; } - else { - switch ($content_moderation_state->get('moderation_state')->value) { - case 'pdf_uploaded': - $queue_name = 'ocha_ai_summarize_extract_text'; - break; - - case 'text_extracted': - $queue_name = 'ocha_ai_summarize_summarize'; - break; - } + + $current_state = $entity->moderation_state->value; + $automatic_mode = $entity->field_automatic_mode->value; + + if (!$current_state) { + return; + } + + $queue_name = ''; + switch ($current_state) { + case 'pdf_uploaded': + if ($automatic_mode) { + $entity->set('moderation_state', 'extract_text'); + $entity->save(); + } + break; + + case 'extract_text': + $queue_name = 'ocha_ai_summarize_extract_text'; + break; + + case 'text_extracted': + if ($automatic_mode) { + $entity->set('moderation_state', 'summarize'); + $entity->save(); + } + break; + + case 'summarize': + $queue_name = 'ocha_ai_summarize_summarize'; + break; } if (empty($queue_name)) { @@ -49,9 +75,75 @@ function ocha_ai_summarize_node_update(EntityInterface $entity) { $queue = \Drupal::service('queue')->get($queue_name); $item = new \stdClass(); $item->nid = $entity->id(); + $item->brain = $entity->get('field_ai_brain')->value; $queue->createItem($item); } +/** + * Implements hook_form_FORM_ID_alter(). + */ +function ocha_ai_summarize_form_node_summary_form_alter(&$form, FormStateInterface $form_state, $form_id) { + ocha_ai_summarize_form_node_summary_alter($form, $form_state, $form_id); +} + +/** + * Implements hook_form_FORM_ID_alter(). + */ +function ocha_ai_summarize_form_node_summary_edit_form_alter(&$form, FormStateInterface $form_state, $form_id) { + ocha_ai_summarize_form_node_summary_alter($form, $form_state, $form_id); +} + +/** + * Alter summary form based on state. + */ +function ocha_ai_summarize_form_node_summary_alter(&$form, FormStateInterface $form_state, $form_id) { + if ($form_id === 'node_summary_form') { + $form['field_pdf_text']['#access'] = FALSE; + $form['field_summary']['#access'] = FALSE; + $form['moderation_state']['#access'] = FALSE; + $form['moderation_state']['widget'][0]['state']['#default_value'] = 'pdf_uploaded'; + return; + } + + $node = $form_state->getFormObject()->getEntity(); + $current_state = $node->moderation_state->value; + if (!$current_state) { + return; + } + + switch ($current_state) { + case 'draft': + case 'pdf_uploaded': + case 'extract_text': + $form['field_pdf_text']['#access'] = FALSE; + $form['field_summary']['#access'] = FALSE; + $form['moderation_state']['#access'] = FALSE; + break; + + case 'text_extracted': + $form['field_summary']['#access'] = FALSE; + break; + + case 'summarize': + $form['field_pdf_text']['#disabled'] = TRUE; + unset($form['field_pdf_text']['widget'][$form['field_pdf_text']['widget']['#max_delta']]); + unset($form['field_pdf_text']['widget']['add_more']); + + $form['field_summary']['#access'] = FALSE; + $form['moderation_state']['#access'] = FALSE; + break; + + case 'summarized': + $form['field_pdf_text']['#disabled'] = TRUE; + unset($form['field_pdf_text']['widget'][$form['field_pdf_text']['widget']['#max_delta']]); + unset($form['field_pdf_text']['widget']['add_more']); + + $form['field_summary']['#disabled'] = TRUE; + break; + + } +} + /** * Implements hook_ENTITY_TYPE_view(). */ @@ -79,12 +171,24 @@ function ocha_ai_summarize_node_view(array &$build, EntityInterface $entity, Ent 'class' => [], ], ], + 'extract_text' => [ + '#markup' => 'Extract text', + '#wrapper_attributes' => [ + 'class' => [], + ], + ], 'text_extracted' => [ '#markup' => 'Text extracted', '#wrapper_attributes' => [ 'class' => [], ], ], + 'summarize' => [ + '#markup' => 'Summarize', + '#wrapper_attributes' => [ + 'class' => [], + ], + ], 'summarized' => [ '#markup' => 'Summarized', '#wrapper_attributes' => [ @@ -209,9 +313,9 @@ function ocha_ai_summarize_extract_pages($filename) { } /** - * Make chat call. + * Make chat call to OpenAi. */ -function ocha_ai_summarize_http_call_chat($query) { +function ocha_ai_summarize_http_call_openai($query) { $config = \Drupal::config('ocha_ai_summarize.settings'); $http_client = \Drupal::httpClient(); @@ -234,3 +338,31 @@ function ocha_ai_summarize_http_call_chat($query) { $body = $response->getBody() . ''; return json_decode($body, TRUE); } + +/** + * Make chat call to Azure. + */ +function ocha_ai_summarize_http_call_azure($query) { + $config = \Drupal::config('ocha_ai_summarize.settings'); + $endpoint = $config->get('azure_endpoint'); + $key = $config->get('azure_apikey'); + + $http_client = \Drupal::httpClient(); + + $headers = [ + 'Content-Type' => 'application/json', + 'API-KEY' => $key, + ]; + + $response = $http_client->request( + 'POST', + $endpoint, + [ + 'headers' => $headers, + 'json' => $query, + ], + ); + + $body = $response->getBody() . ''; + return json_decode($body, TRUE); +} diff --git a/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeExtractText.php b/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeExtractText.php index 31d0e81..a7f32df 100644 --- a/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeExtractText.php +++ b/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeExtractText.php @@ -6,7 +6,6 @@ use Drupal\Core\Entity\EntityTypeManagerInterface; use Drupal\Core\File\FileSystem; use Drupal\Core\Plugin\ContainerFactoryPluginInterface; -use Drupal\Core\Queue\QueueFactory; use Drupal\Core\Queue\QueueWorkerBase; use Symfony\Component\DependencyInjection\ContainerInterface; @@ -28,13 +27,6 @@ class OchaAiSummarizeExtractText extends QueueWorkerBase implements ContainerFac */ protected $entityTypeManager; - /** - * Queue. - * - * @var \Drupal\Core\Queue\QueueFactory - */ - protected $queue; - /** * Queue. * @@ -45,10 +37,9 @@ class OchaAiSummarizeExtractText extends QueueWorkerBase implements ContainerFac /** * {@inheritdoc} */ - public function __construct(array $configuration, $plugin_id, $plugin_definition, EntityTypeManagerInterface $entity_type_manager, QueueFactory $queue, FileSystem $file_system) { + public function __construct(array $configuration, $plugin_id, $plugin_definition, EntityTypeManagerInterface $entity_type_manager, FileSystem $file_system) { parent::__construct($configuration, $plugin_id, $plugin_definition); $this->entityTypeManager = $entity_type_manager; - $this->queue = $queue; $this->fileSystem = $file_system; } @@ -61,7 +52,6 @@ public static function create(ContainerInterface $container, array $configuratio $plugin_id, $plugin_definition, $container->get('entity_type.manager'), - $container->get('queue'), $container->get('file_system'), ); } @@ -87,7 +77,7 @@ public function processItem($data) { return; } - if ($content_moderation_state->get('moderation_state')->value !== 'pdf_uploaded') { + if ($content_moderation_state->get('moderation_state')->value !== 'extract_text') { return; } @@ -113,11 +103,6 @@ public function processItem($data) { $node->set('field_pdf_text', $text); $node->set('moderation_state', 'text_extracted'); $node->save(); - - $queue = $this->queue->get('ocha_ai_summarize_summarize'); - $item = new \stdClass(); - $item->nid = $node->id(); - $queue->createItem($item); } } diff --git a/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeSummarize.php b/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeSummarize.php index 414da2d..9c4d69e 100644 --- a/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeSummarize.php +++ b/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeSummarize.php @@ -50,6 +50,7 @@ public static function create(ContainerInterface $container, array $configuratio * {@inheritdoc} */ public function processItem($data) { + $bot = $data->brain ?? 'openai'; $nid = $data->nid; if (empty($nid)) { return; @@ -68,7 +69,7 @@ public function processItem($data) { return; } - if ($content_moderation_state->get('moderation_state')->value !== 'text_extracted') { + if ($content_moderation_state->get('moderation_state')->value !== 'summarize') { return; } @@ -85,29 +86,38 @@ public function processItem($data) { continue; } - $results[] = ocha_ai_summarize_http_call_chat( - [ - 'model' => 'gpt-3.5-turbo-16k', - 'messages' => [ - [ - 'role' => 'user', - 'content' => "Summerize the following text:\n\n" . $text, - ], - ], - 'temperature' => .2, - 'max_tokens' => 300, - ], - ); + if ($bot == 'openai') { + $results[] = $this->sendToOpenAi("Summerize the following text in 3 paragraphs:\n\n" . $text); + } + else { + $results[] = $this->sendToAzureAi("Summerize the following text in 3 paragraphs:\n\n" . $text); + } } // Summarize the summaries. $text = ''; foreach ($results as $row) { - $text .= $row['choices'][0]['message']['content'] ?? ''; + $text .= $row; $text .= "\n"; } - $result = ocha_ai_summarize_http_call_chat( + if ($bot == 'openai') { + $summary = $this->sendToOpenAi("Summerize the following text in 3 paragraphs:\n\n" . $text); + } + else { + $summary = $this->sendToAzureAi("Summerize the following text in 3 paragraphs:\n\n" . $text); + } + + $node->set('field_summary', $summary); + $node->set('moderation_state', 'summarized'); + $node->save(); + } + + /** + * Send query to OpenAi. + */ + protected function sendToOpenAi($text) : string { + $result = ocha_ai_summarize_http_call_openai( [ 'model' => 'gpt-3.5-turbo-16k', 'messages' => [ @@ -121,11 +131,29 @@ public function processItem($data) { ], ); - $summary = $result['choices'][0]['message']['content']; + return $result['choices'][0]['message']['content'] ?? ''; + } - $node->set('field_summary', $summary); - $node->set('moderation_state', 'summarized'); - $node->save(); + /** + * Send query to Azure AI. + */ + protected function sendToAzureAi($text) : string { + $result = ocha_ai_summarize_http_call_azure( + [ + 'messages' => [ + [ + 'role' => 'system', + 'content' => 'You are an AI assistant that summarizes information.', + ], + [ + 'role' => 'user', + 'content' => $text, + ], + ], + ], + ); + + return $result['choices'][0]['message']['content'] ?? ''; } } From 6b908ad40aa391167ce79decb4cdbc826f76d264 Mon Sep 17 00:00:00 2001 From: "Peter Droogmans (attiks)" Date: Fri, 15 Sep 2023 11:48:52 +0200 Subject: [PATCH 05/18] feat: BedRock support --- composer.json | 1 + composer.lock | 217 +++++++++++++++++- .../ocha_ai_summarize.module | 50 ++++ .../QueueWorker/OchaAiSummarizeSummarize.php | 42 +++- 4 files changed, 299 insertions(+), 11 deletions(-) diff --git a/composer.json b/composer.json index d80f63c..54b6fd4 100644 --- a/composer.json +++ b/composer.json @@ -21,6 +21,7 @@ ], "require": { "php": ">=8.1", + "aws/aws-sdk-php": "^3.281", "composer/installers": "^1.12", "cweagans/composer-patches": "^1.7", "drupal/admin_denied": "^2.0", diff --git a/composer.lock b/composer.lock index 5a9604f..3c7b6ae 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "8b0f34b5d0c05c4dd4f591c50038c532", + "content-hash": "8b1d00cbee5645cc0d6f55f4b45bfaf1", "packages": [ { "name": "asm89/stack-cors", @@ -62,6 +62,155 @@ }, "time": "2022-01-18T09:12:03+00:00" }, + { + "name": "aws/aws-crt-php", + "version": "v1.2.2", + "source": { + "type": "git", + "url": "https://github.com/awslabs/aws-crt-php.git", + "reference": "2f1dc7b7eda080498be96a4a6d683a41583030e9" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/awslabs/aws-crt-php/zipball/2f1dc7b7eda080498be96a4a6d683a41583030e9", + "reference": "2f1dc7b7eda080498be96a4a6d683a41583030e9", + "shasum": "" + }, + "require": { + "php": ">=5.5" + }, + "require-dev": { + "phpunit/phpunit": "^4.8.35||^5.6.3||^9.5", + "yoast/phpunit-polyfills": "^1.0" + }, + "suggest": { + "ext-awscrt": "Make sure you install awscrt native extension to use any of the functionality." + }, + "type": "library", + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "Apache-2.0" + ], + "authors": [ + { + "name": "AWS SDK Common Runtime Team", + "email": "aws-sdk-common-runtime@amazon.com" + } + ], + "description": "AWS Common Runtime for PHP", + "homepage": "https://github.com/awslabs/aws-crt-php", + "keywords": [ + "amazon", + "aws", + "crt", + "sdk" + ], + "support": { + "issues": "https://github.com/awslabs/aws-crt-php/issues", + "source": "https://github.com/awslabs/aws-crt-php/tree/v1.2.2" + }, + "time": "2023-07-20T16:49:55+00:00" + }, + { + "name": "aws/aws-sdk-php", + "version": "3.281.7", + "source": { + "type": "git", + "url": "https://github.com/aws/aws-sdk-php.git", + "reference": "926cea9a41a545ca9801ac304f2a9ffd23ac68c9" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/aws/aws-sdk-php/zipball/926cea9a41a545ca9801ac304f2a9ffd23ac68c9", + "reference": "926cea9a41a545ca9801ac304f2a9ffd23ac68c9", + "shasum": "" + }, + "require": { + "aws/aws-crt-php": "^1.0.4", + "ext-json": "*", + "ext-pcre": "*", + "ext-simplexml": "*", + "guzzlehttp/guzzle": "^6.5.8 || ^7.4.5", + "guzzlehttp/promises": "^1.4.0 || ^2.0", + "guzzlehttp/psr7": "^1.9.1 || ^2.4.5", + "mtdowling/jmespath.php": "^2.6", + "php": ">=7.2.5", + "psr/http-message": "^1.0 || ^2.0" + }, + "require-dev": { + "andrewsville/php-token-reflection": "^1.4", + "aws/aws-php-sns-message-validator": "~1.0", + "behat/behat": "~3.0", + "composer/composer": "^1.10.22", + "dms/phpunit-arraysubset-asserts": "^0.4.0", + "doctrine/cache": "~1.4", + "ext-dom": "*", + "ext-openssl": "*", + "ext-pcntl": "*", + "ext-sockets": "*", + "nette/neon": "^2.3", + "paragonie/random_compat": ">= 2", + "phpunit/phpunit": "^5.6.3 || ^8.5 || ^9.5", + "psr/cache": "^1.0", + "psr/simple-cache": "^1.0", + "sebastian/comparator": "^1.2.3 || ^4.0", + "yoast/phpunit-polyfills": "^1.0" + }, + "suggest": { + "aws/aws-php-sns-message-validator": "To validate incoming SNS notifications", + "doctrine/cache": "To use the DoctrineCacheAdapter", + "ext-curl": "To send requests using cURL", + "ext-openssl": "Allows working with CloudFront private distributions and verifying received SNS messages", + "ext-sockets": "To use client-side monitoring" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "3.0-dev" + } + }, + "autoload": { + "files": [ + "src/functions.php" + ], + "psr-4": { + "Aws\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "Apache-2.0" + ], + "authors": [ + { + "name": "Amazon Web Services", + "homepage": "http://aws.amazon.com" + } + ], + "description": "AWS SDK for PHP - Use Amazon Web Services in your PHP project", + "homepage": "http://aws.amazon.com/sdkforphp", + "keywords": [ + "amazon", + "aws", + "cloud", + "dynamodb", + "ec2", + "glacier", + "s3", + "sdk" + ], + "support": { + "forum": "https://forums.aws.amazon.com/forum.jspa?forumID=80", + "issues": "https://github.com/aws/aws-sdk-php/issues", + "source": "https://github.com/aws/aws-sdk-php/tree/3.281.7" + }, + "time": "2023-09-14T18:05:11+00:00" + }, { "name": "behat/mink", "version": "v1.10.0", @@ -5840,6 +5989,72 @@ }, "time": "2022-02-23T02:02:42+00:00" }, + { + "name": "mtdowling/jmespath.php", + "version": "2.7.0", + "source": { + "type": "git", + "url": "https://github.com/jmespath/jmespath.php.git", + "reference": "bbb69a935c2cbb0c03d7f481a238027430f6440b" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/jmespath/jmespath.php/zipball/bbb69a935c2cbb0c03d7f481a238027430f6440b", + "reference": "bbb69a935c2cbb0c03d7f481a238027430f6440b", + "shasum": "" + }, + "require": { + "php": "^7.2.5 || ^8.0", + "symfony/polyfill-mbstring": "^1.17" + }, + "require-dev": { + "composer/xdebug-handler": "^3.0.3", + "phpunit/phpunit": "^8.5.33" + }, + "bin": [ + "bin/jp.php" + ], + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "2.7-dev" + } + }, + "autoload": { + "files": [ + "src/JmesPath.php" + ], + "psr-4": { + "JmesPath\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Graham Campbell", + "email": "hello@gjcampbell.co.uk", + "homepage": "https://github.com/GrahamCampbell" + }, + { + "name": "Michael Dowling", + "email": "mtdowling@gmail.com", + "homepage": "https://github.com/mtdowling" + } + ], + "description": "Declaratively specify how to extract elements from a JSON document", + "keywords": [ + "json", + "jsonpath" + ], + "support": { + "issues": "https://github.com/jmespath/jmespath.php/issues", + "source": "https://github.com/jmespath/jmespath.php/tree/2.7.0" + }, + "time": "2023-08-25T10:54:48+00:00" + }, { "name": "myclabs/deep-copy", "version": "1.11.1", diff --git a/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module b/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module index 16e9e08..e7ad595 100644 --- a/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module +++ b/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module @@ -5,10 +5,14 @@ * Use AI to summarize PDF files. */ +use Aws\Credentials\Credentials; +use Aws\Signature\SignatureV4; use Drupal\content_moderation\Entity\ContentModerationState; use Drupal\Core\Entity\Display\EntityViewDisplayInterface; use Drupal\Core\Entity\EntityInterface; use Drupal\Core\Form\FormStateInterface; +use GuzzleHttp\Client; +use GuzzleHttp\Psr7\Request; use Symfony\Component\Process\Process; /** @@ -366,3 +370,49 @@ function ocha_ai_summarize_http_call_azure($query) { $body = $response->getBody() . ''; return json_decode($body, TRUE); } + +/** + * Make chat call to BedRock. + */ +function ocha_ai_summarize_http_call_bedrock($prompt) { + $config = \Drupal::config('ocha_ai_summarize.settings'); + $endpoint = $config->get('bedrock_endpoint'); + $access_key = $config->get('bedrock_access_key'); + $secret_key = $config->get('bedrock_secret_key'); + $model = $config->get('bedrock_model'); + + $headers = [ + 'Content-Type' => 'application/json', + 'Accept' => 'application/json', + 'modelId' => $model, + ]; + + $version = '1.1'; + + $body = [ + 'inputText' => $prompt, + 'textGenerationConfig' => [ + 'maxTokenCount' => 4000, + 'stopSequences' => [], + 'temperature' => 0, + 'topP' => 1, + ], + ]; + + $client = new Client([ + 'timeout' => 30, + ]); + + $request = new Request('POST', $endpoint, $headers, json_encode($body), $version); + + $region = 'us-east-1'; + $service = 'bedrock'; + $signature = new SignatureV4($service, $region); + $credentials = new Credentials($access_key, $secret_key); + + $request = $signature->signRequest($request, $credentials); + $response = $client->send($request); + + $body = $response->getBody() . ''; + return json_decode($body, TRUE); +} diff --git a/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeSummarize.php b/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeSummarize.php index 9c4d69e..3e0be90 100644 --- a/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeSummarize.php +++ b/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeSummarize.php @@ -86,11 +86,18 @@ public function processItem($data) { continue; } - if ($bot == 'openai') { - $results[] = $this->sendToOpenAi("Summerize the following text in 3 paragraphs:\n\n" . $text); - } - else { - $results[] = $this->sendToAzureAi("Summerize the following text in 3 paragraphs:\n\n" . $text); + switch ($bot) { + case 'openai': + $results[] = $this->sendToOpenAi("Summerize the following text in 3 paragraphs:\n\n" . $text); + break; + + case 'azure_trained': + $results[] = $this->sendToAzureAi("Summerize the following text in 3 paragraphs:\n\n" . $text); + break; + + case 'bedrock': + $results[] = $this->sendToBedRock("Summerize the following text in 3 paragraphs:\n\n" . $text); + break; } } @@ -101,11 +108,18 @@ public function processItem($data) { $text .= "\n"; } - if ($bot == 'openai') { - $summary = $this->sendToOpenAi("Summerize the following text in 3 paragraphs:\n\n" . $text); - } - else { - $summary = $this->sendToAzureAi("Summerize the following text in 3 paragraphs:\n\n" . $text); + switch ($bot) { + case 'openai': + $summary = $this->sendToOpenAi("Summerize the following text in 5 paragraphs:\n\n" . $text); + break; + + case 'azure_trained': + $summary = $this->sendToAzureAi("Summerize the following text in 5 paragraphs:\n\n" . $text); + break; + + case 'bedrock': + $summary = $this->sendToBedRock("Summerize the following text in 5 paragraphs:\n\n" . $text); + break; } $node->set('field_summary', $summary); @@ -156,4 +170,12 @@ protected function sendToAzureAi($text) : string { return $result['choices'][0]['message']['content'] ?? ''; } + /** + * Send query to BedRock. + */ + protected function sendToBedRock($text) : string { + $result = ocha_ai_summarize_http_call_bedrock($text); + return $result['results'][0]['outputText'] ?? ''; + } + } From 3a75699b99767d226849a387494f3ed5d575fa43 Mon Sep 17 00:00:00 2001 From: "Peter Droogmans (attiks)" Date: Fri, 15 Sep 2023 12:00:33 +0200 Subject: [PATCH 06/18] feat: BedRock support --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 7dd3b9e..80e28bd 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,14 @@ Uses AI to summarize PDF files ```php $config['ocha_ai_summarize.settings']['openai_token'] = 'xxx'; + $config['ocha_ai_summarize.settings']['azure_endpoint'] = 'https://tst003.openai.azure.com/openai/deployments/tst003/chat/completions?api-version=2023-03-15-preview'; $config['ocha_ai_summarize.settings']['azure_apikey'] = 'yyy'; + +$config['ocha_ai_summarize.settings']['bedrock_endpoint'] = 'https://bedrock.us-east-1.amazonaws.com/model/amazon.titan-tg1-large/invoke'; +$config['ocha_ai_summarize.settings']['bedrock_model'] = 'amazon.titan-tg1-large'; +$config['ocha_ai_summarize.settings']['bedrock_access_key'] = 'x1'; +$config['ocha_ai_summarize.settings']['bedrock_secret_key'] = 'x2'; ``` ## Cron From f6530f96edda38e9c0061ecd3df230bce8ac92b5 Mon Sep 17 00:00:00 2001 From: "Peter Droogmans (attiks)" Date: Fri, 15 Sep 2023 16:42:03 +0200 Subject: [PATCH 07/18] feat: Use AWS Textract --- .../ocha_ai_summarize.module | 126 ++++++++++++++++++ .../QueueWorker/OchaAiSummarizeTextract.php | 109 +++++++++++++++ 2 files changed, 235 insertions(+) create mode 100644 html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeTextract.php diff --git a/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module b/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module index e7ad595..570aae0 100644 --- a/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module +++ b/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module @@ -6,7 +6,9 @@ */ use Aws\Credentials\Credentials; +use Aws\S3\S3Client; use Aws\Signature\SignatureV4; +use Aws\Textract\TextractClient; use Drupal\content_moderation\Entity\ContentModerationState; use Drupal\Core\Entity\Display\EntityViewDisplayInterface; use Drupal\Core\Entity\EntityInterface; @@ -316,6 +318,130 @@ function ocha_ai_summarize_extract_pages($filename) { return $pages; } +/** + * Upload file to S3. + */ +function ocha_ai_summarize_upload_to_s3($file_name) { + $config = \Drupal::config('ocha_ai_summarize.settings'); + $access_key = $config->get('bedrock_access_key'); + $secret_key = $config->get('bedrock_secret_key'); + + $client = new S3Client([ + 'credentials' => [ + 'key' => $access_key, + 'secret' => $secret_key, + ], + 'region' => 'us-east-1', + ]); + + $bucket_name = 'ai-summarize-pdfs'; + $key_name = basename($file_name); + + $client->putObject([ + 'Bucket' => $bucket_name, + 'Key' => $key_name, + 'SourceFile' => $file_name, + ]); +} + +/** + * Extract text using AWS Textract. + */ +function ocha_ai_summarize_texttract($file_name) { + $config = \Drupal::config('ocha_ai_summarize.settings'); + $access_key = $config->get('bedrock_access_key'); + $secret_key = $config->get('bedrock_secret_key'); + + $client = new TextractClient([ + 'region' => 'us-east-1', + 'credentials' => [ + 'key' => $access_key, + 'secret' => $secret_key, + ], + ]); + + $bucket_name = 'ai-summarize-pdfs'; + $key_name = basename($file_name); + + $options = [ + 'DocumentLocation' => [ + 'S3Object' => [ + 'Bucket' => $bucket_name, + 'Name' => $key_name, + ], + ], + 'FeatureTypes' => [], + ]; + + $result = $client->startDocumentTextDetection($options); + $job_id = $result->get('JobId'); + return $job_id; +} + +/** + * Get extracted text using AWS Textract. + */ +function ocha_ai_summarize_texttract_get_text($job_id) { + $config = \Drupal::config('ocha_ai_summarize.settings'); + $access_key = $config->get('bedrock_access_key'); + $secret_key = $config->get('bedrock_secret_key'); + + $client = new TextractClient([ + 'region' => 'us-east-1', + 'credentials' => [ + 'key' => $access_key, + 'secret' => $secret_key, + ], + ]); + + $options = [ + 'JobId' => $job_id, + ]; + + $result = $client->GetDocumentTextDetection($options); + $blocks = $result->get('Blocks'); + $status = $result->get('JobStatus'); + + if ($status == 'SUCCEEDED') { + $output = ''; + + // Concatenate. + foreach ($blocks as $value) { + if (isset($value['BlockType']) && $value['BlockType']) { + $blockType = $value['BlockType']; + if (isset($value['Text']) && $value['Text']) { + $text = $value['Text']; + if ($blockType == 'WORD') { + $output .= ' ' . $text; + } + elseif ($blockType == 'LINE') { + $output .= "\n" . $text; + } + } + } + } + + $output .= "\n"; + return $output; + } + + return ''; +} + +/** + * Test run for AWS Textract. + */ +function ocha_ai_summarize_testit() { + $file_name = '/var/www/private_files/2023-09/2018_OCHA_Aide Memoire.pdf'; + ocha_ai_summarize_upload_to_s3($file_name); + $job_id = ocha_ai_summarize_texttract($file_name); + + sleep(5); + + $text = ocha_ai_summarize_texttract_get_text($job_id); + return $text; +} + /** * Make chat call to OpenAi. */ diff --git a/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeTextract.php b/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeTextract.php new file mode 100644 index 0000000..bed579d --- /dev/null +++ b/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeTextract.php @@ -0,0 +1,109 @@ +entityTypeManager = $entity_type_manager; + $this->fileSystem = $file_system; + } + + /** + * {@inheritdoc} + */ + public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) { + return new static( + $configuration, + $plugin_id, + $plugin_definition, + $container->get('entity_type.manager'), + $container->get('file_system'), + ); + } + + /** + * {@inheritdoc} + */ + public function processItem($data) { + $nid = $data->nid; + if (empty($nid)) { + return; + } + + /** @var \Drupal\node\Entity\Node $node */ + $node = $this->entityTypeManager->getStorage('node')->load($nid); + + if (!$node || $node->bundle() !== 'summary') { + return; + } + + $content_moderation_state = ContentModerationState::loadFromModeratedEntity($node); + if (!$content_moderation_state) { + return; + } + + if ($content_moderation_state->get('moderation_state')->value !== 'extract_text') { + return; + } + + if (!$node->field_pdf_text->isEmpty()) { + return; + } + + /** @var \Drupal\file\Plugin\Field\FieldType\FileItem $file_item */ + $file_item = $node->get('field_pdf')->first() ?? NULL; + if (!$file_item) { + return; + } + + /** @var \Drupal\file\Entity\File $file */ + $file = $this->entityTypeManager->getStorage('file')->load($file_item->getValue()['target_id']); + if (!$file) { + return; + } + + $absolute_path = $this->fileSystem->realpath($file->getFileUri()); + + $text = ocha_ai_summarize_texttract($absolute_path); + // Split in BLOB of 3000 characters. + $node->set('field_pdf_text', $text); + $node->set('moderation_state', 'text_extracted'); + $node->save(); + } + +} From 3a1508add350c0ff157c73a527fc1e846ffbea31 Mon Sep 17 00:00:00 2001 From: "Peter Droogmans (attiks)" Date: Tue, 19 Sep 2023 10:05:45 +0200 Subject: [PATCH 08/18] chore: Add roles and permissions --- composer.json | 1 + composer.lock | 54 ++++++++++++++++++- config/core.extension.yml | 1 + config/field.storage.node.field_ai_brain.yml | 3 ++ ...tion.user_add_role_action.pdf_analyzer.yml | 14 +++++ ...tion.user_add_role_action.user_manager.yml | 14 +++++ ...n.user_remove_role_action.pdf_analyzer.yml | 14 +++++ ...n.user_remove_role_action.user_manager.yml | 14 +++++ config/user.role.administrator.yml | 2 +- config/user.role.anonymous.yml | 7 +-- config/user.role.authenticated.yml | 21 ++------ config/user.role.editor.yml | 12 ++++- config/user.role.pdf_analyzer.yml | 31 +++++++++++ config/user.role.user_manager.yml | 22 ++++++++ 14 files changed, 187 insertions(+), 23 deletions(-) create mode 100644 config/system.action.user_add_role_action.pdf_analyzer.yml create mode 100644 config/system.action.user_add_role_action.user_manager.yml create mode 100644 config/system.action.user_remove_role_action.pdf_analyzer.yml create mode 100644 config/system.action.user_remove_role_action.user_manager.yml create mode 100644 config/user.role.pdf_analyzer.yml create mode 100644 config/user.role.user_manager.yml diff --git a/composer.json b/composer.json index 54b6fd4..fcf5085 100644 --- a/composer.json +++ b/composer.json @@ -40,6 +40,7 @@ "drupal/maintenance200": "^1.1", "drupal/memcache": "^2.5", "drupal/metatag": "^1.22", + "drupal/node_view_permissions": "^1.6", "drupal/paragraphs": "^1.15", "drupal/pathauto": "^1.8", "drupal/queue_ui": "^3.1", diff --git a/composer.lock b/composer.lock index 3c7b6ae..9327892 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "8b1d00cbee5645cc0d6f55f4b45bfaf1", + "content-hash": "25626d5695fd25fdeff207152ec8edb0", "packages": [ { "name": "asm89/stack-cors", @@ -3755,6 +3755,58 @@ "docs": "https://www.drupal.org/docs/8/modules/metatag" } }, + { + "name": "drupal/node_view_permissions", + "version": "1.6.0", + "source": { + "type": "git", + "url": "https://git.drupalcode.org/project/node_view_permissions.git", + "reference": "8.x-1.6" + }, + "dist": { + "type": "zip", + "url": "https://ftp.drupal.org/files/projects/node_view_permissions-8.x-1.6.zip", + "reference": "8.x-1.6", + "shasum": "f56e0d43f41f384c73d93e18abfbe38809533d5e" + }, + "require": { + "drupal/core": "^8 || ^9 || ^10" + }, + "type": "drupal-module", + "extra": { + "drupal": { + "version": "8.x-1.6", + "datestamp": "1686297669", + "security-coverage": { + "status": "covered", + "message": "Covered by Drupal's security advisory policy" + } + } + }, + "notification-url": "https://packages.drupal.org/8/downloads", + "license": [ + "GPL-2.0-or-later" + ], + "authors": [ + { + "name": "adcillc", + "homepage": "https://www.drupal.org/user/366450" + }, + { + "name": "adci_contributor", + "homepage": "https://www.drupal.org/user/1830536" + }, + { + "name": "hxdef", + "homepage": "https://www.drupal.org/user/2622115" + } + ], + "description": "enables permissions 'View own content' and 'View any content' for each content type on permissions page", + "homepage": "http://drupal.org/project/node_view_permissions", + "support": { + "source": "https://git.drupalcode.org/project/node_view_permissions" + } + }, { "name": "drupal/paragraphs", "version": "1.16.0", diff --git a/config/core.extension.yml b/config/core.extension.yml index fded26b..e820f05 100644 --- a/config/core.extension.yml +++ b/config/core.extension.yml @@ -44,6 +44,7 @@ module: menu_ui: 0 mysql: 0 node: 0 + node_view_permissions: 0 ocha_ai_summarize: 0 options: 0 page_cache: 0 diff --git a/config/field.storage.node.field_ai_brain.yml b/config/field.storage.node.field_ai_brain.yml index b782346..8833700 100644 --- a/config/field.storage.node.field_ai_brain.yml +++ b/config/field.storage.node.field_ai_brain.yml @@ -17,6 +17,9 @@ settings: - value: azure_trained label: 'Trained Azure AI' + - + value: bedrock + label: 'BedRock (Titan)' allowed_values_function: '' module: options locked: false diff --git a/config/system.action.user_add_role_action.pdf_analyzer.yml b/config/system.action.user_add_role_action.pdf_analyzer.yml new file mode 100644 index 0000000..48fd9a3 --- /dev/null +++ b/config/system.action.user_add_role_action.pdf_analyzer.yml @@ -0,0 +1,14 @@ +uuid: 46fae3d7-ce2c-407c-9d81-c2b8b16a3a57 +langcode: en +status: true +dependencies: + config: + - user.role.pdf_analyzer + module: + - user +id: user_add_role_action.pdf_analyzer +label: 'Add the PDF Analyzer role to the selected user(s)' +type: user +plugin: user_add_role_action +configuration: + rid: pdf_analyzer diff --git a/config/system.action.user_add_role_action.user_manager.yml b/config/system.action.user_add_role_action.user_manager.yml new file mode 100644 index 0000000..c481006 --- /dev/null +++ b/config/system.action.user_add_role_action.user_manager.yml @@ -0,0 +1,14 @@ +uuid: dcf2aae6-e71d-4098-a877-9c705836a79e +langcode: en +status: true +dependencies: + config: + - user.role.user_manager + module: + - user +id: user_add_role_action.user_manager +label: 'Add the User manager role to the selected user(s)' +type: user +plugin: user_add_role_action +configuration: + rid: user_manager diff --git a/config/system.action.user_remove_role_action.pdf_analyzer.yml b/config/system.action.user_remove_role_action.pdf_analyzer.yml new file mode 100644 index 0000000..1a251b0 --- /dev/null +++ b/config/system.action.user_remove_role_action.pdf_analyzer.yml @@ -0,0 +1,14 @@ +uuid: b4c53584-6d8b-4277-aa80-dcfa59ec5b5f +langcode: en +status: true +dependencies: + config: + - user.role.pdf_analyzer + module: + - user +id: user_remove_role_action.pdf_analyzer +label: 'Remove the PDF Analyzer role from the selected user(s)' +type: user +plugin: user_remove_role_action +configuration: + rid: pdf_analyzer diff --git a/config/system.action.user_remove_role_action.user_manager.yml b/config/system.action.user_remove_role_action.user_manager.yml new file mode 100644 index 0000000..b561cc9 --- /dev/null +++ b/config/system.action.user_remove_role_action.user_manager.yml @@ -0,0 +1,14 @@ +uuid: 6d0ddae9-c892-4dd3-a8bc-52f8782a3c55 +langcode: en +status: true +dependencies: + config: + - user.role.user_manager + module: + - user +id: user_remove_role_action.user_manager +label: 'Remove the User manager role from the selected user(s)' +type: user +plugin: user_remove_role_action +configuration: + rid: user_manager diff --git a/config/user.role.administrator.yml b/config/user.role.administrator.yml index e359668..6add0e1 100644 --- a/config/user.role.administrator.yml +++ b/config/user.role.administrator.yml @@ -23,7 +23,7 @@ dependencies: - user_expire id: administrator label: Administrator -weight: 2 +weight: -8 is_admin: true permissions: - 'access administration pages' diff --git a/config/user.role.anonymous.yml b/config/user.role.anonymous.yml index 420f1ca..c8ddb92 100644 --- a/config/user.role.anonymous.yml +++ b/config/user.role.anonymous.yml @@ -3,14 +3,15 @@ langcode: en status: true dependencies: module: - - media + - node_view_permissions - system _core: default_config_hash: j5zLMOdJBqC0bMvSdth5UebkprJB8g_2FXHqhfpJzow id: anonymous label: 'Anonymous user' -weight: 0 +weight: -10 is_admin: false permissions: - 'access content' - - 'view media' + - 'view any page content' + - 'view own page content' diff --git a/config/user.role.authenticated.yml b/config/user.role.authenticated.yml index 8c0c44e..5b15d7c 100644 --- a/config/user.role.authenticated.yml +++ b/config/user.role.authenticated.yml @@ -2,31 +2,18 @@ uuid: a0dde013-3f0e-4c0a-9dbc-7689935d777e langcode: en status: true dependencies: - config: - - node.type.summary - - workflows.workflow.summarize module: - - content_moderation - media - - node + - node_view_permissions - system _core: default_config_hash: dJ0L2DNSj5q6XVZAGsuVDpJTh5UeYkIPwKrUOOpr8YI id: authenticated label: 'Authenticated user' -weight: 1 +weight: -9 is_admin: false permissions: - 'access content' - - 'create summary content' - - 'delete own summary content' - - 'edit own summary content' - - 'use summarize transition archive' - - 'use summarize transition extrat_text' - - 'use summarize transition pdf_uploaded' - - 'use summarize transition published' - - 'use summarize transition summarize' - - 'use summarize transition summarized_update' - - 'use summarize transition text_extracted_update' - - 'view any unpublished content' + - 'view any page content' - 'view media' + - 'view own page content' diff --git a/config/user.role.editor.yml b/config/user.role.editor.yml index 8ea771f..1cdcb73 100644 --- a/config/user.role.editor.yml +++ b/config/user.role.editor.yml @@ -4,6 +4,7 @@ status: true dependencies: config: - filter.format.text_editor_simple + - node.type.page module: - content_translation - file @@ -13,13 +14,15 @@ dependencies: - node - paragraphs - path + - system - taxonomy - toolbar id: editor label: Editor -weight: 3 +weight: -5 is_admin: null permissions: + - 'access content' - 'access content overview' - 'access files overview' - 'access media overview' @@ -28,11 +31,17 @@ permissions: - 'administer taxonomy' - 'create content translations' - 'create media' + - 'create page content' - 'create url aliases' - 'delete any media' + - 'delete any page content' - 'delete content translations' - 'delete media' + - 'delete own page content' + - 'edit any page content' + - 'edit own page content' - 'revert all revisions' + - 'revert page revisions' - 'translate any entity' - 'translate editable entities' - 'translate menu_link_content' @@ -44,4 +53,5 @@ permissions: - 'view all revisions' - 'view own unpublished content' - 'view own unpublished media' + - 'view page revisions' - 'view unpublished paragraphs' diff --git a/config/user.role.pdf_analyzer.yml b/config/user.role.pdf_analyzer.yml new file mode 100644 index 0000000..3c15d48 --- /dev/null +++ b/config/user.role.pdf_analyzer.yml @@ -0,0 +1,31 @@ +uuid: 0536c9bf-2876-4201-adab-959ff24e25ba +langcode: en +status: true +dependencies: + config: + - node.type.summary + - workflows.workflow.summarize + module: + - content_moderation + - node + - node_view_permissions + - system +id: pdf_analyzer +label: 'PDF Analyzer' +weight: -6 +is_admin: null +permissions: + - 'access content' + - 'create summary content' + - 'delete own summary content' + - 'edit own summary content' + - 'use summarize transition archive' + - 'use summarize transition extrat_text' + - 'use summarize transition pdf_uploaded' + - 'use summarize transition published' + - 'use summarize transition summarize' + - 'use summarize transition summarized_update' + - 'use summarize transition text_extracted_update' + - 'view own summary content' + - 'view own unpublished content' + - 'view summary revisions' diff --git a/config/user.role.user_manager.yml b/config/user.role.user_manager.yml new file mode 100644 index 0000000..ac2c56a --- /dev/null +++ b/config/user.role.user_manager.yml @@ -0,0 +1,22 @@ +uuid: d75c3c5c-e577-49a9-92a8-612fe743565d +langcode: en +status: true +dependencies: + config: + - node.type.summary + module: + - content_moderation + - node + - node_view_permissions +id: user_manager +label: 'User manager' +weight: -7 +is_admin: null +permissions: + - 'access user profiles' + - 'administer users' + - 'edit any summary content' + - 'view any summary content' + - 'view any unpublished content' + - 'view latest version' + - 'view user email addresses' From 71ee67f58fd64e295a308b24b3a182281b3edbfd Mon Sep 17 00:00:00 2001 From: "Peter Droogmans (attiks)" Date: Tue, 19 Sep 2023 15:59:56 +0200 Subject: [PATCH 09/18] Add num paragraphsd, add overview --- ...tity_form_display.node.summary.default.yml | 25 +- ...tity_view_display.node.summary.default.yml | 14 +- ...ntity_view_display.node.summary.teaser.yml | 2 + ...ode.summary.field_number_of_paragraphs.yml | 23 + config/field.field.node.summary.field_pdf.yml | 2 +- ...torage.node.field_number_of_paragraphs.yml | 51 ++ config/views.view.my_documents.yml | 457 ++++++++++++++++++ .../ocha_ai_summarize.module | 3 + .../OchaAiSummarizeExtractText.php | 1 + .../QueueWorker/OchaAiSummarizeSummarize.php | 17 +- 10 files changed, 575 insertions(+), 20 deletions(-) create mode 100644 config/field.field.node.summary.field_number_of_paragraphs.yml create mode 100644 config/field.storage.node.field_number_of_paragraphs.yml create mode 100644 config/views.view.my_documents.yml diff --git a/config/core.entity_form_display.node.summary.default.yml b/config/core.entity_form_display.node.summary.default.yml index c156116..836a872 100644 --- a/config/core.entity_form_display.node.summary.default.yml +++ b/config/core.entity_form_display.node.summary.default.yml @@ -5,6 +5,7 @@ dependencies: config: - field.field.node.summary.field_ai_brain - field.field.node.summary.field_automatic_mode + - field.field.node.summary.field_number_of_paragraphs - field.field.node.summary.field_pdf - field.field.node.summary.field_pdf_text - field.field.node.summary.field_summary @@ -21,7 +22,7 @@ mode: default content: created: type: datetime_timestamp - weight: 11 + weight: 12 region: content settings: { } third_party_settings: { } @@ -38,16 +39,22 @@ content: settings: display_label: true third_party_settings: { } + field_number_of_paragraphs: + type: options_select + weight: 4 + region: content + settings: { } + third_party_settings: { } field_pdf: type: file_generic - weight: 4 + weight: 5 region: content settings: progress_indicator: throbber third_party_settings: { } field_pdf_text: type: string_textarea - weight: 5 + weight: 6 region: content settings: rows: 5 @@ -55,7 +62,7 @@ content: third_party_settings: { } field_summary: type: string_textarea - weight: 6 + weight: 7 region: content settings: rows: 5 @@ -70,19 +77,19 @@ content: third_party_settings: { } moderation_state: type: moderation_state_default - weight: 7 + weight: 8 region: content settings: { } third_party_settings: { } path: type: path - weight: 8 + weight: 9 region: content settings: { } third_party_settings: { } status: type: boolean_checkbox - weight: 10 + weight: 11 region: content settings: display_label: true @@ -97,7 +104,7 @@ content: third_party_settings: { } uid: type: entity_reference_autocomplete - weight: 12 + weight: 13 region: content settings: match_operator: CONTAINS @@ -106,7 +113,7 @@ content: placeholder: '' third_party_settings: { } url_redirects: - weight: 9 + weight: 10 region: content settings: { } third_party_settings: { } diff --git a/config/core.entity_view_display.node.summary.default.yml b/config/core.entity_view_display.node.summary.default.yml index 760fda0..fec87f3 100644 --- a/config/core.entity_view_display.node.summary.default.yml +++ b/config/core.entity_view_display.node.summary.default.yml @@ -5,6 +5,7 @@ dependencies: config: - field.field.node.summary.field_ai_brain - field.field.node.summary.field_automatic_mode + - field.field.node.summary.field_number_of_paragraphs - field.field.node.summary.field_pdf - field.field.node.summary.field_pdf_text - field.field.node.summary.field_summary @@ -21,7 +22,7 @@ content: content_moderation_control: settings: { } third_party_settings: { } - weight: 4 + weight: 5 region: content field_ai_brain: type: list_default @@ -40,6 +41,13 @@ content: third_party_settings: { } weight: 0 region: content + field_number_of_paragraphs: + type: list_default + label: inline + settings: { } + third_party_settings: { } + weight: 3 + region: content field_pdf: type: file_default label: inline @@ -53,12 +61,12 @@ content: label: hidden settings: { } third_party_settings: { } - weight: 3 + weight: 4 region: content links: settings: { } third_party_settings: { } - weight: 5 + weight: 6 region: content hidden: field_pdf_text: true diff --git a/config/core.entity_view_display.node.summary.teaser.yml b/config/core.entity_view_display.node.summary.teaser.yml index 83b2c99..ca8e08e 100644 --- a/config/core.entity_view_display.node.summary.teaser.yml +++ b/config/core.entity_view_display.node.summary.teaser.yml @@ -6,6 +6,7 @@ dependencies: - core.entity_view_mode.node.teaser - field.field.node.summary.field_ai_brain - field.field.node.summary.field_automatic_mode + - field.field.node.summary.field_number_of_paragraphs - field.field.node.summary.field_pdf - field.field.node.summary.field_pdf_text - field.field.node.summary.field_summary @@ -30,6 +31,7 @@ content: hidden: field_ai_brain: true field_automatic_mode: true + field_number_of_paragraphs: true field_pdf: true field_pdf_text: true field_summary: true diff --git a/config/field.field.node.summary.field_number_of_paragraphs.yml b/config/field.field.node.summary.field_number_of_paragraphs.yml new file mode 100644 index 0000000..1a19268 --- /dev/null +++ b/config/field.field.node.summary.field_number_of_paragraphs.yml @@ -0,0 +1,23 @@ +uuid: 887881a1-a220-4d1d-b23f-95f8c51fea83 +langcode: en +status: true +dependencies: + config: + - field.storage.node.field_number_of_paragraphs + - node.type.summary + module: + - options +id: node.summary.field_number_of_paragraphs +field_name: field_number_of_paragraphs +entity_type: node +bundle: summary +label: 'Number of paragraphs' +description: '' +required: true +translatable: false +default_value: + - + value: 5 +default_value_callback: '' +settings: { } +field_type: list_integer diff --git a/config/field.field.node.summary.field_pdf.yml b/config/field.field.node.summary.field_pdf.yml index fd145f0..87784cd 100644 --- a/config/field.field.node.summary.field_pdf.yml +++ b/config/field.field.node.summary.field_pdf.yml @@ -21,7 +21,7 @@ settings: handler: 'default:file' handler_settings: { } file_directory: '[date:custom:Y]-[date:custom:m]' - file_extensions: pdf + file_extensions: 'pdf docx odf doc' max_filesize: '' description_field: false field_type: file diff --git a/config/field.storage.node.field_number_of_paragraphs.yml b/config/field.storage.node.field_number_of_paragraphs.yml new file mode 100644 index 0000000..556234c --- /dev/null +++ b/config/field.storage.node.field_number_of_paragraphs.yml @@ -0,0 +1,51 @@ +uuid: 10e0c86c-5148-45c4-b644-d50e4f4f2266 +langcode: en +status: true +dependencies: + module: + - node + - options +id: node.field_number_of_paragraphs +field_name: field_number_of_paragraphs +entity_type: node +type: list_integer +settings: + allowed_values: + - + value: 1 + label: '1' + - + value: 2 + label: '2' + - + value: 3 + label: '3' + - + value: 4 + label: '4' + - + value: 5 + label: '5' + - + value: 6 + label: '6' + - + value: 7 + label: '7' + - + value: 8 + label: '8' + - + value: 9 + label: '9' + - + value: 10 + label: '10' + allowed_values_function: '' +module: options +locked: false +cardinality: 1 +translatable: true +indexes: { } +persist_with_no_fields: false +custom_storage: false diff --git a/config/views.view.my_documents.yml b/config/views.view.my_documents.yml new file mode 100644 index 0000000..752c9cf --- /dev/null +++ b/config/views.view.my_documents.yml @@ -0,0 +1,457 @@ +uuid: f11aa09f-ffec-4dbb-8d48-465c65f520f5 +langcode: en +status: true +dependencies: + config: + - core.entity_view_mode.node.teaser + - node.type.page + - system.menu.main + module: + - content_moderation + - node + - user +_core: + default_config_hash: 6eeliKIydPjqyv5V__QqTfahvJMWkHjOVUUuUIdB1ik +id: my_documents +label: 'My documents' +module: node +description: 'All content promoted to the front page.' +tag: default +base_table: node_field_data +base_field: nid +display: + default: + id: default + display_title: Default + display_plugin: default + position: 0 + display_options: + title: 'My documents' + fields: + title: + id: title + table: node_field_data + field: title + relationship: none + group_type: group + admin_label: '' + entity_type: node + entity_field: title + plugin_id: field + label: Title + exclude: false + alter: + alter_text: false + text: '' + make_link: false + path: '' + absolute: false + external: false + replace_spaces: false + path_case: none + trim_whitespace: false + alt: '' + rel: '' + link_class: '' + prefix: '' + suffix: '' + target: '' + nl2br: false + max_length: 0 + word_boundary: true + ellipsis: true + more_link: false + more_link_text: '' + more_link_path: '' + strip_tags: false + trim: false + preserve_tags: '' + html: false + element_type: '' + element_class: '' + element_label_type: '' + element_label_class: '' + element_label_colon: true + element_wrapper_type: '' + element_wrapper_class: '' + element_default_classes: true + empty: '' + hide_empty: false + empty_zero: false + hide_alter_empty: true + click_sort_column: value + type: string + settings: + link_to_entity: true + group_column: value + group_columns: { } + group_rows: true + delta_limit: 0 + delta_offset: 0 + delta_reversed: false + delta_first_last: false + multi_type: separator + separator: ', ' + field_api_classes: false + status: + id: status + table: node_field_data + field: status + relationship: none + group_type: group + admin_label: '' + entity_type: node + entity_field: status + plugin_id: field + label: Published + exclude: false + alter: + alter_text: false + text: '' + make_link: false + path: '' + absolute: false + external: false + replace_spaces: false + path_case: none + trim_whitespace: false + alt: '' + rel: '' + link_class: '' + prefix: '' + suffix: '' + target: '' + nl2br: false + max_length: 0 + word_boundary: true + ellipsis: true + more_link: false + more_link_text: '' + more_link_path: '' + strip_tags: false + trim: false + preserve_tags: '' + html: false + element_type: '' + element_class: '' + element_label_type: '' + element_label_class: '' + element_label_colon: true + element_wrapper_type: '' + element_wrapper_class: '' + element_default_classes: true + empty: '' + hide_empty: false + empty_zero: false + hide_alter_empty: true + click_sort_column: value + type: boolean + settings: + format: default + format_custom_false: '' + format_custom_true: '' + group_column: value + group_columns: { } + group_rows: true + delta_limit: 0 + delta_offset: 0 + delta_reversed: false + delta_first_last: false + multi_type: separator + separator: ', ' + field_api_classes: false + moderation_state: + id: moderation_state + table: node_field_data + field: moderation_state + relationship: none + group_type: group + admin_label: '' + entity_type: node + plugin_id: moderation_state_field + label: 'Moderation state' + exclude: false + alter: + alter_text: false + text: '' + make_link: false + path: '' + absolute: false + external: false + replace_spaces: false + path_case: none + trim_whitespace: false + alt: '' + rel: '' + link_class: '' + prefix: '' + suffix: '' + target: '' + nl2br: false + max_length: 0 + word_boundary: true + ellipsis: true + more_link: false + more_link_text: '' + more_link_path: '' + strip_tags: false + trim: false + preserve_tags: '' + html: false + element_type: '' + element_class: '' + element_label_type: '' + element_label_class: '' + element_label_colon: true + element_wrapper_type: '' + element_wrapper_class: '' + element_default_classes: true + empty: '' + hide_empty: false + empty_zero: false + hide_alter_empty: true + click_sort_column: value + type: content_moderation_state + settings: { } + group_column: value + group_columns: { } + group_rows: true + delta_limit: 0 + delta_offset: 0 + delta_reversed: false + delta_first_last: false + multi_type: separator + separator: ', ' + field_api_classes: false + pager: + type: full + options: + offset: 0 + items_per_page: 10 + total_pages: 0 + id: 0 + tags: + next: 'Next ›' + previous: '‹ Previous' + first: '« First' + last: 'Last »' + expose: + items_per_page: false + items_per_page_label: 'Items per page' + items_per_page_options: '5, 10, 25, 50' + items_per_page_options_all: false + items_per_page_options_all_label: '- All -' + offset: false + offset_label: Offset + quantity: 9 + exposed_form: + type: basic + options: + submit_button: Apply + reset_button: false + reset_button_label: Reset + exposed_sorts_label: 'Sort by' + expose_sort_order: true + sort_asc_label: Asc + sort_desc_label: Desc + access: + type: perm + options: + perm: 'access content' + cache: + type: tag + options: { } + empty: + area_text_custom: + id: area_text_custom + table: views + field: area_text_custom + relationship: none + group_type: group + admin_label: '' + plugin_id: text_custom + label: '' + empty: true + content: 'No front page content has been created yet.
Follow the User Guide to start building your site.' + tokenize: false + node_listing_empty: + id: node_listing_empty + table: node + field: node_listing_empty + relationship: none + group_type: group + admin_label: '' + entity_type: node + plugin_id: node_listing_empty + label: '' + empty: true + title: + id: title + table: views + field: title + relationship: none + group_type: group + admin_label: '' + plugin_id: title + label: '' + empty: true + title: Welcome! + sorts: + created: + id: created + table: node_field_data + field: created + relationship: none + group_type: group + admin_label: '' + entity_type: node + entity_field: created + plugin_id: date + order: DESC + expose: + label: '' + field_identifier: created + exposed: false + granularity: second + arguments: { } + filters: + langcode: + id: langcode + table: node_field_data + field: langcode + relationship: none + group_type: group + admin_label: '' + entity_type: node + entity_field: langcode + plugin_id: language + operator: in + value: + '***LANGUAGE_language_content***': '***LANGUAGE_language_content***' + group: 1 + exposed: false + expose: + operator_id: '' + label: '' + description: '' + use_operator: false + operator: '' + operator_limit_selection: false + operator_list: { } + identifier: '' + required: false + remember: false + multiple: false + remember_roles: + authenticated: authenticated + reduce: false + is_grouped: false + group_info: + label: '' + description: '' + identifier: '' + optional: true + widget: select + multiple: false + remember: false + default_group: All + default_group_multiple: { } + group_items: { } + type: + id: type + table: node_field_data + field: type + relationship: none + group_type: group + admin_label: '' + entity_type: node + entity_field: type + plugin_id: bundle + operator: 'not in' + value: + page: page + group: 1 + exposed: false + expose: + operator_id: '' + label: '' + description: '' + use_operator: false + operator: '' + operator_limit_selection: false + operator_list: { } + identifier: '' + required: false + remember: false + multiple: false + remember_roles: + authenticated: authenticated + reduce: false + is_grouped: false + group_info: + label: '' + description: '' + identifier: '' + optional: true + widget: select + multiple: false + remember: false + default_group: All + default_group_multiple: { } + group_items: { } + style: + type: table + options: + row_class: '' + default_row_class: false + row: + type: 'entity:node' + options: + view_mode: teaser + query: + type: views_query + options: + query_comment: '' + disable_sql_rewrite: false + distinct: false + replica: false + query_tags: { } + relationships: { } + header: { } + footer: { } + display_extenders: { } + cache_metadata: + max-age: -1 + contexts: + - 'languages:language_content' + - 'languages:language_interface' + - url.query_args + - 'user.node_grants:view' + - user.permissions + tags: { } + page_1: + id: page_1 + display_title: Page + display_plugin: page + position: 1 + display_options: + display_extenders: { } + path: my-documents + menu: + type: normal + title: 'My documents' + description: '' + weight: 0 + expanded: false + menu_name: main + parent: '' + context: '0' + cache_metadata: + max-age: -1 + contexts: + - 'languages:language_content' + - 'languages:language_interface' + - url.query_args + - 'user.node_grants:view' + - user.permissions + tags: { } diff --git a/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module b/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module index 570aae0..4999e95 100644 --- a/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module +++ b/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module @@ -82,6 +82,7 @@ function ocha_ai_summarize_node_update(EntityInterface $entity) { $item = new \stdClass(); $item->nid = $entity->id(); $item->brain = $entity->get('field_ai_brain')->value; + $item->num_paragraphs = $entity->get('field_number_of_paragraphs')->value ?? 3; $queue->createItem($item); } @@ -131,6 +132,7 @@ function ocha_ai_summarize_form_node_summary_alter(&$form, FormStateInterface $f break; case 'summarize': + $form['field_number_of_paragraphs']['#disabled'] = TRUE; $form['field_pdf_text']['#disabled'] = TRUE; unset($form['field_pdf_text']['widget'][$form['field_pdf_text']['widget']['#max_delta']]); unset($form['field_pdf_text']['widget']['add_more']); @@ -140,6 +142,7 @@ function ocha_ai_summarize_form_node_summary_alter(&$form, FormStateInterface $f break; case 'summarized': + $form['field_number_of_paragraphs']['#disabled'] = TRUE; $form['field_pdf_text']['#disabled'] = TRUE; unset($form['field_pdf_text']['widget'][$form['field_pdf_text']['widget']['#max_delta']]); unset($form['field_pdf_text']['widget']['add_more']); diff --git a/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeExtractText.php b/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeExtractText.php index a7f32df..51926c7 100644 --- a/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeExtractText.php +++ b/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeExtractText.php @@ -61,6 +61,7 @@ public static function create(ContainerInterface $container, array $configuratio */ public function processItem($data) { $nid = $data->nid; + if (empty($nid)) { return; } diff --git a/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeSummarize.php b/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeSummarize.php index 3e0be90..9d7ebfb 100644 --- a/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeSummarize.php +++ b/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeSummarize.php @@ -52,6 +52,8 @@ public static function create(ContainerInterface $container, array $configuratio public function processItem($data) { $bot = $data->brain ?? 'openai'; $nid = $data->nid; + $num_paragraphs = $data->num_paragraphs; + if (empty($nid)) { return; } @@ -83,20 +85,21 @@ public function processItem($data) { $text = $pdf_text->value; if (strlen($text) < 100) { + $results[] = $text; continue; } switch ($bot) { case 'openai': - $results[] = $this->sendToOpenAi("Summerize the following text in 3 paragraphs:\n\n" . $text); + $results[] = $this->sendToOpenAi("Summerize the following text in $num_paragraphs paragraphs:\n\n" . $text); break; case 'azure_trained': - $results[] = $this->sendToAzureAi("Summerize the following text in 3 paragraphs:\n\n" . $text); + $results[] = $this->sendToAzureAi("Summerize the following text in $num_paragraphs paragraphs:\n\n" . $text); break; case 'bedrock': - $results[] = $this->sendToBedRock("Summerize the following text in 3 paragraphs:\n\n" . $text); + $results[] = $this->sendToBedRock("Summerize the following text in $num_paragraphs paragraphs:\n\n" . $text); break; } } @@ -110,15 +113,15 @@ public function processItem($data) { switch ($bot) { case 'openai': - $summary = $this->sendToOpenAi("Summerize the following text in 5 paragraphs:\n\n" . $text); + $summary = $this->sendToOpenAi("Summerize the following text in $num_paragraphs paragraphs:\n\n" . $text); break; case 'azure_trained': - $summary = $this->sendToAzureAi("Summerize the following text in 5 paragraphs:\n\n" . $text); + $summary = $this->sendToAzureAi("Summerize the following text in $num_paragraphs paragraphs:\n\n" . $text); break; case 'bedrock': - $summary = $this->sendToBedRock("Summerize the following text in 5 paragraphs:\n\n" . $text); + $summary = $this->sendToBedRock("Summerize the following text in $num_paragraphs paragraphs:\n\n" . $text); break; } @@ -137,7 +140,7 @@ protected function sendToOpenAi($text) : string { 'messages' => [ [ 'role' => 'user', - 'content' => "Summerize the following text in 3 paragraphs:\n\n" . $text, + 'content' => $text, ], ], 'temperature' => .2, From e1c754ea1d43b5ce5be5207361760fd7bea9c70a Mon Sep 17 00:00:00 2001 From: "Peter Droogmans (attiks)" Date: Tue, 19 Sep 2023 16:14:00 +0200 Subject: [PATCH 10/18] Rename PDF to document --- ...tity_form_display.node.summary.default.yml | 20 ++++++------- ...tity_view_display.node.summary.default.yml | 22 +++++++------- ...ntity_view_display.node.summary.teaser.yml | 8 ++--- ...eld.field.node.summary.field_document.yml} | 8 ++--- ...ield.node.summary.field_document_text.yml} | 8 ++--- ... => field.storage.node.field_document.yml} | 4 +-- ...ield.storage.node.field_document_text.yml} | 4 +-- ...ser_add_role_action.document_analyzer.yml} | 8 ++--- ...r_remove_role_action.document_analyzer.yml | 14 +++++++++ ...n.user_remove_role_action.pdf_analyzer.yml | 14 --------- ...er.yml => user.role.document_analyzer.yml} | 6 ++-- config/workflows.workflow.summarize.yml | 24 +++++++-------- .../ocha_ai_summarize.module | 30 +++++++++---------- .../OchaAiSummarizeExtractText.php | 10 +++---- .../QueueWorker/OchaAiSummarizeSummarize.php | 8 ++--- .../QueueWorker/OchaAiSummarizeTextract.php | 10 +++---- 16 files changed, 99 insertions(+), 99 deletions(-) rename config/{field.field.node.summary.field_pdf.yml => field.field.node.summary.field_document.yml} (80%) rename config/{field.field.node.summary.field_pdf_text.yml => field.field.node.summary.field_document_text.yml} (67%) rename config/{field.storage.node.field_pdf.yml => field.storage.node.field_document.yml} (87%) rename config/{field.storage.node.field_pdf_text.yml => field.storage.node.field_document_text.yml} (83%) rename config/{system.action.user_add_role_action.pdf_analyzer.yml => system.action.user_add_role_action.document_analyzer.yml} (50%) create mode 100644 config/system.action.user_remove_role_action.document_analyzer.yml delete mode 100644 config/system.action.user_remove_role_action.pdf_analyzer.yml rename config/{user.role.pdf_analyzer.yml => user.role.document_analyzer.yml} (88%) diff --git a/config/core.entity_form_display.node.summary.default.yml b/config/core.entity_form_display.node.summary.default.yml index 836a872..c92499b 100644 --- a/config/core.entity_form_display.node.summary.default.yml +++ b/config/core.entity_form_display.node.summary.default.yml @@ -6,8 +6,8 @@ dependencies: - field.field.node.summary.field_ai_brain - field.field.node.summary.field_automatic_mode - field.field.node.summary.field_number_of_paragraphs - - field.field.node.summary.field_pdf - - field.field.node.summary.field_pdf_text + - field.field.node.summary.field_document + - field.field.node.summary.field_document_text - field.field.node.summary.field_summary - node.type.summary - workflows.workflow.summarize @@ -39,20 +39,14 @@ content: settings: display_label: true third_party_settings: { } - field_number_of_paragraphs: - type: options_select - weight: 4 - region: content - settings: { } - third_party_settings: { } - field_pdf: + field_document: type: file_generic weight: 5 region: content settings: progress_indicator: throbber third_party_settings: { } - field_pdf_text: + field_document_text: type: string_textarea weight: 6 region: content @@ -60,6 +54,12 @@ content: rows: 5 placeholder: '' third_party_settings: { } + field_number_of_paragraphs: + type: options_select + weight: 4 + region: content + settings: { } + third_party_settings: { } field_summary: type: string_textarea weight: 7 diff --git a/config/core.entity_view_display.node.summary.default.yml b/config/core.entity_view_display.node.summary.default.yml index fec87f3..260527a 100644 --- a/config/core.entity_view_display.node.summary.default.yml +++ b/config/core.entity_view_display.node.summary.default.yml @@ -6,8 +6,8 @@ dependencies: - field.field.node.summary.field_ai_brain - field.field.node.summary.field_automatic_mode - field.field.node.summary.field_number_of_paragraphs - - field.field.node.summary.field_pdf - - field.field.node.summary.field_pdf_text + - field.field.node.summary.field_document + - field.field.node.summary.field_document_text - field.field.node.summary.field_summary - node.type.summary module: @@ -41,14 +41,7 @@ content: third_party_settings: { } weight: 0 region: content - field_number_of_paragraphs: - type: list_default - label: inline - settings: { } - third_party_settings: { } - weight: 3 - region: content - field_pdf: + field_document: type: file_default label: inline settings: @@ -56,6 +49,13 @@ content: third_party_settings: { } weight: 2 region: content + field_number_of_paragraphs: + type: list_default + label: inline + settings: { } + third_party_settings: { } + weight: 3 + region: content field_summary: type: basic_string label: hidden @@ -69,5 +69,5 @@ content: weight: 6 region: content hidden: - field_pdf_text: true + field_document_text: true langcode: true diff --git a/config/core.entity_view_display.node.summary.teaser.yml b/config/core.entity_view_display.node.summary.teaser.yml index ca8e08e..5705beb 100644 --- a/config/core.entity_view_display.node.summary.teaser.yml +++ b/config/core.entity_view_display.node.summary.teaser.yml @@ -7,8 +7,8 @@ dependencies: - field.field.node.summary.field_ai_brain - field.field.node.summary.field_automatic_mode - field.field.node.summary.field_number_of_paragraphs - - field.field.node.summary.field_pdf - - field.field.node.summary.field_pdf_text + - field.field.node.summary.field_document + - field.field.node.summary.field_document_text - field.field.node.summary.field_summary - node.type.summary module: @@ -31,8 +31,8 @@ content: hidden: field_ai_brain: true field_automatic_mode: true + field_document: true + field_document_text: true field_number_of_paragraphs: true - field_pdf: true - field_pdf_text: true field_summary: true langcode: true diff --git a/config/field.field.node.summary.field_pdf.yml b/config/field.field.node.summary.field_document.yml similarity index 80% rename from config/field.field.node.summary.field_pdf.yml rename to config/field.field.node.summary.field_document.yml index 87784cd..636df17 100644 --- a/config/field.field.node.summary.field_pdf.yml +++ b/config/field.field.node.summary.field_document.yml @@ -3,15 +3,15 @@ langcode: en status: true dependencies: config: - - field.storage.node.field_pdf + - field.storage.node.field_document - node.type.summary module: - file -id: node.summary.field_pdf -field_name: field_pdf +id: node.summary.field_document +field_name: field_document entity_type: node bundle: summary -label: PDF +label: Document description: '' required: true translatable: false diff --git a/config/field.field.node.summary.field_pdf_text.yml b/config/field.field.node.summary.field_document_text.yml similarity index 67% rename from config/field.field.node.summary.field_pdf_text.yml rename to config/field.field.node.summary.field_document_text.yml index a03a9e9..b09d04c 100644 --- a/config/field.field.node.summary.field_pdf_text.yml +++ b/config/field.field.node.summary.field_document_text.yml @@ -3,13 +3,13 @@ langcode: en status: true dependencies: config: - - field.storage.node.field_pdf_text + - field.storage.node.field_document_text - node.type.summary -id: node.summary.field_pdf_text -field_name: field_pdf_text +id: node.summary.field_document_text +field_name: field_document_text entity_type: node bundle: summary -label: 'PDF text' +label: 'Document text' description: '' required: false translatable: false diff --git a/config/field.storage.node.field_pdf.yml b/config/field.storage.node.field_document.yml similarity index 87% rename from config/field.storage.node.field_pdf.yml rename to config/field.storage.node.field_document.yml index ba7fa9e..3b9bf48 100644 --- a/config/field.storage.node.field_pdf.yml +++ b/config/field.storage.node.field_document.yml @@ -5,8 +5,8 @@ dependencies: module: - file - node -id: node.field_pdf -field_name: field_pdf +id: node.field_document +field_name: field_document entity_type: node type: file settings: diff --git a/config/field.storage.node.field_pdf_text.yml b/config/field.storage.node.field_document_text.yml similarity index 83% rename from config/field.storage.node.field_pdf_text.yml rename to config/field.storage.node.field_document_text.yml index 420c42a..72076cb 100644 --- a/config/field.storage.node.field_pdf_text.yml +++ b/config/field.storage.node.field_document_text.yml @@ -4,8 +4,8 @@ status: true dependencies: module: - node -id: node.field_pdf_text -field_name: field_pdf_text +id: node.field_document_text +field_name: field_document_text entity_type: node type: string_long settings: diff --git a/config/system.action.user_add_role_action.pdf_analyzer.yml b/config/system.action.user_add_role_action.document_analyzer.yml similarity index 50% rename from config/system.action.user_add_role_action.pdf_analyzer.yml rename to config/system.action.user_add_role_action.document_analyzer.yml index 48fd9a3..ee441f6 100644 --- a/config/system.action.user_add_role_action.pdf_analyzer.yml +++ b/config/system.action.user_add_role_action.document_analyzer.yml @@ -3,12 +3,12 @@ langcode: en status: true dependencies: config: - - user.role.pdf_analyzer + - user.role.document_analyzer module: - user -id: user_add_role_action.pdf_analyzer -label: 'Add the PDF Analyzer role to the selected user(s)' +id: user_add_role_action.document_analyzer +label: 'Add the Document Analyzer role to the selected user(s)' type: user plugin: user_add_role_action configuration: - rid: pdf_analyzer + rid: document_analyzer diff --git a/config/system.action.user_remove_role_action.document_analyzer.yml b/config/system.action.user_remove_role_action.document_analyzer.yml new file mode 100644 index 0000000..9f04b44 --- /dev/null +++ b/config/system.action.user_remove_role_action.document_analyzer.yml @@ -0,0 +1,14 @@ +uuid: b4c53584-6d8b-4277-aa80-dcfa59ec5b5f +langcode: en +status: true +dependencies: + config: + - user.role.document_analyzer + module: + - user +id: user_remove_role_action.document_analyzer +label: 'Remove the document Analyzer role from the selected user(s)' +type: user +plugin: user_remove_role_action +configuration: + rid: document_analyzer diff --git a/config/system.action.user_remove_role_action.pdf_analyzer.yml b/config/system.action.user_remove_role_action.pdf_analyzer.yml deleted file mode 100644 index 1a251b0..0000000 --- a/config/system.action.user_remove_role_action.pdf_analyzer.yml +++ /dev/null @@ -1,14 +0,0 @@ -uuid: b4c53584-6d8b-4277-aa80-dcfa59ec5b5f -langcode: en -status: true -dependencies: - config: - - user.role.pdf_analyzer - module: - - user -id: user_remove_role_action.pdf_analyzer -label: 'Remove the PDF Analyzer role from the selected user(s)' -type: user -plugin: user_remove_role_action -configuration: - rid: pdf_analyzer diff --git a/config/user.role.pdf_analyzer.yml b/config/user.role.document_analyzer.yml similarity index 88% rename from config/user.role.pdf_analyzer.yml rename to config/user.role.document_analyzer.yml index 3c15d48..c995d40 100644 --- a/config/user.role.pdf_analyzer.yml +++ b/config/user.role.document_analyzer.yml @@ -10,8 +10,8 @@ dependencies: - node - node_view_permissions - system -id: pdf_analyzer -label: 'PDF Analyzer' +id: document_analyzer +label: 'Document Analyzer' weight: -6 is_admin: null permissions: @@ -20,8 +20,8 @@ permissions: - 'delete own summary content' - 'edit own summary content' - 'use summarize transition archive' + - 'use summarize transition document_uploaded' - 'use summarize transition extrat_text' - - 'use summarize transition pdf_uploaded' - 'use summarize transition published' - 'use summarize transition summarize' - 'use summarize transition summarized_update' diff --git a/config/workflows.workflow.summarize.yml b/config/workflows.workflow.summarize.yml index fba5559..ce13b6d 100644 --- a/config/workflows.workflow.summarize.yml +++ b/config/workflows.workflow.summarize.yml @@ -16,6 +16,11 @@ type_settings: weight: 3 published: false default_revision: false + document_uploaded: + label: 'Document uploaded' + weight: -3 + published: false + default_revision: false draft: label: Draft weight: -4 @@ -26,11 +31,6 @@ type_settings: weight: -2 published: false default_revision: false - pdf_uploaded: - label: 'PDF uploaded' - weight: -3 - published: false - default_revision: false published: label: Published weight: 2 @@ -64,18 +64,18 @@ type_settings: - archived to: published weight: 3 + document_uploaded: + label: 'document uploaded' + from: + - draft + to: document_uploaded + weight: -5 extrat_text: label: 'Extrat text' from: - - pdf_uploaded + - document_uploaded to: extract_text weight: -4 - pdf_uploaded: - label: 'PDF uploaded' - from: - - draft - to: pdf_uploaded - weight: -5 published: label: Published from: diff --git a/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module b/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module index 4999e95..9605685 100644 --- a/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module +++ b/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module @@ -2,7 +2,7 @@ /** * @file - * Use AI to summarize PDF files. + * Use AI to summarize documents. */ use Aws\Credentials\Credentials; @@ -51,7 +51,7 @@ function ocha_ai_summarize_node_update(EntityInterface $entity) { $queue_name = ''; switch ($current_state) { - case 'pdf_uploaded': + case 'document_uploaded': if ($automatic_mode) { $entity->set('moderation_state', 'extract_text'); $entity->save(); @@ -105,10 +105,10 @@ function ocha_ai_summarize_form_node_summary_edit_form_alter(&$form, FormStateIn */ function ocha_ai_summarize_form_node_summary_alter(&$form, FormStateInterface $form_state, $form_id) { if ($form_id === 'node_summary_form') { - $form['field_pdf_text']['#access'] = FALSE; + $form['field_document_text']['#access'] = FALSE; $form['field_summary']['#access'] = FALSE; $form['moderation_state']['#access'] = FALSE; - $form['moderation_state']['widget'][0]['state']['#default_value'] = 'pdf_uploaded'; + $form['moderation_state']['widget'][0]['state']['#default_value'] = 'document_uploaded'; return; } @@ -120,9 +120,9 @@ function ocha_ai_summarize_form_node_summary_alter(&$form, FormStateInterface $f switch ($current_state) { case 'draft': - case 'pdf_uploaded': + case 'document_uploaded': case 'extract_text': - $form['field_pdf_text']['#access'] = FALSE; + $form['field_document_text']['#access'] = FALSE; $form['field_summary']['#access'] = FALSE; $form['moderation_state']['#access'] = FALSE; break; @@ -133,9 +133,9 @@ function ocha_ai_summarize_form_node_summary_alter(&$form, FormStateInterface $f case 'summarize': $form['field_number_of_paragraphs']['#disabled'] = TRUE; - $form['field_pdf_text']['#disabled'] = TRUE; - unset($form['field_pdf_text']['widget'][$form['field_pdf_text']['widget']['#max_delta']]); - unset($form['field_pdf_text']['widget']['add_more']); + $form['field_document_text']['#disabled'] = TRUE; + unset($form['field_document_text']['widget'][$form['field_document_text']['widget']['#max_delta']]); + unset($form['field_document_text']['widget']['add_more']); $form['field_summary']['#access'] = FALSE; $form['moderation_state']['#access'] = FALSE; @@ -143,9 +143,9 @@ function ocha_ai_summarize_form_node_summary_alter(&$form, FormStateInterface $f case 'summarized': $form['field_number_of_paragraphs']['#disabled'] = TRUE; - $form['field_pdf_text']['#disabled'] = TRUE; - unset($form['field_pdf_text']['widget'][$form['field_pdf_text']['widget']['#max_delta']]); - unset($form['field_pdf_text']['widget']['add_more']); + $form['field_document_text']['#disabled'] = TRUE; + unset($form['field_document_text']['widget'][$form['field_document_text']['widget']['#max_delta']]); + unset($form['field_document_text']['widget']['add_more']); $form['field_summary']['#disabled'] = TRUE; break; @@ -167,15 +167,15 @@ function ocha_ai_summarize_node_view(array &$build, EntityInterface $entity, Ent $content_moderation_state = ContentModerationState::loadFromModeratedEntity($entity); if (!$content_moderation_state) { - $active = 'pdf_uploaded'; + $active = 'document_uploaded'; } else { $active = $content_moderation_state->get('moderation_state')->value; } $states = [ - 'pdf_uploaded' => [ - '#markup' => 'PDF uploaded', + 'document_uploaded' => [ + '#markup' => 'Document uploaded', '#wrapper_attributes' => [ 'class' => [], ], diff --git a/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeExtractText.php b/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeExtractText.php index 51926c7..0c65fab 100644 --- a/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeExtractText.php +++ b/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeExtractText.php @@ -10,11 +10,11 @@ use Symfony\Component\DependencyInjection\ContainerInterface; /** - * Extract text from a PDF file. + * Extract text from a document. * * @QueueWorker( * id = "ocha_ai_summarize_extract_text", - * title = @Translation("Extract text from a PDF file"), + * title = @Translation("Extract text from a document"), * cron = {"time" = 30} * ) */ @@ -82,12 +82,12 @@ public function processItem($data) { return; } - if (!$node->field_pdf_text->isEmpty()) { + if (!$node->field_document_text->isEmpty()) { return; } /** @var \Drupal\file\Plugin\Field\FieldType\FileItem $file_item */ - $file_item = $node->get('field_pdf')->first() ?? NULL; + $file_item = $node->get('field_document')->first() ?? NULL; if (!$file_item) { return; } @@ -101,7 +101,7 @@ public function processItem($data) { $absolute_path = $this->fileSystem->realpath($file->getFileUri()); $text = ocha_ai_summarize_extract_pages($absolute_path); - $node->set('field_pdf_text', $text); + $node->set('field_document_text', $text); $node->set('moderation_state', 'text_extracted'); $node->save(); } diff --git a/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeSummarize.php b/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeSummarize.php index 9d7ebfb..44ddef6 100644 --- a/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeSummarize.php +++ b/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeSummarize.php @@ -9,7 +9,7 @@ use Symfony\Component\DependencyInjection\ContainerInterface; /** - * Extract text from a PDF file. + * Extract text from a document file. * * @QueueWorker( * id = "ocha_ai_summarize_summarize", @@ -75,14 +75,14 @@ public function processItem($data) { return; } - if ($node->field_pdf_text->isEmpty()) { + if ($node->field_document_text->isEmpty()) { return; } // Summarize each page. $results = []; - foreach ($node->field_pdf_text as $pdf_text) { - $text = $pdf_text->value; + foreach ($node->field_document_text as $document_text) { + $text = $document_text->value; if (strlen($text) < 100) { $results[] = $text; diff --git a/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeTextract.php b/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeTextract.php index bed579d..d062849 100644 --- a/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeTextract.php +++ b/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeTextract.php @@ -10,11 +10,11 @@ use Symfony\Component\DependencyInjection\ContainerInterface; /** - * Extract text from a PDF file. + * Extract text from a document file. * * @QueueWorker( * id = "ocha_ai_summarize_textract", - * title = @Translation("Extract text from a PDF file using AWS Textract"), + * title = @Translation("Extract text from a document file using AWS Textract"), * cron = {"time" = 30} * ) */ @@ -81,12 +81,12 @@ public function processItem($data) { return; } - if (!$node->field_pdf_text->isEmpty()) { + if (!$node->field_document_text->isEmpty()) { return; } /** @var \Drupal\file\Plugin\Field\FieldType\FileItem $file_item */ - $file_item = $node->get('field_pdf')->first() ?? NULL; + $file_item = $node->get('field_document')->first() ?? NULL; if (!$file_item) { return; } @@ -101,7 +101,7 @@ public function processItem($data) { $text = ocha_ai_summarize_texttract($absolute_path); // Split in BLOB of 3000 characters. - $node->set('field_pdf_text', $text); + $node->set('field_document_text', $text); $node->set('moderation_state', 'text_extracted'); $node->save(); } From 7696b65296c522fa1ac5a2ddd3c8f41c1488ce74 Mon Sep 17 00:00:00 2001 From: "Peter Droogmans (attiks)" Date: Tue, 19 Sep 2023 16:31:33 +0200 Subject: [PATCH 11/18] Parse word docs --- composer.json | 1 + composer.lock | 172 +++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 172 insertions(+), 1 deletion(-) diff --git a/composer.json b/composer.json index fcf5085..d97e7fa 100644 --- a/composer.json +++ b/composer.json @@ -54,6 +54,7 @@ "drush/drush": "^11.3", "oomphinc/composer-installers-extender": "^2.0", "orakili/composer-drupal-info-file-patch-helper": "^1", + "phpoffice/phpword": "^1.1", "unocha/common_design": "^7.4", "webflo/drupal-finder": "^1.2.2" }, diff --git a/composer.lock b/composer.lock index 9327892..42e5060 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "25626d5695fd25fdeff207152ec8edb0", + "content-hash": "39182221e9032eeac4606cc96f15a7b0", "packages": [ { "name": "asm89/stack-cors", @@ -5618,6 +5618,68 @@ }, "time": "2022-04-13T08:02:27+00:00" }, + { + "name": "laminas/laminas-escaper", + "version": "2.12.0", + "source": { + "type": "git", + "url": "https://github.com/laminas/laminas-escaper.git", + "reference": "ee7a4c37bf3d0e8c03635d5bddb5bb3184ead490" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/laminas/laminas-escaper/zipball/ee7a4c37bf3d0e8c03635d5bddb5bb3184ead490", + "reference": "ee7a4c37bf3d0e8c03635d5bddb5bb3184ead490", + "shasum": "" + }, + "require": { + "ext-ctype": "*", + "ext-mbstring": "*", + "php": "^7.4 || ~8.0.0 || ~8.1.0 || ~8.2.0" + }, + "conflict": { + "zendframework/zend-escaper": "*" + }, + "require-dev": { + "infection/infection": "^0.26.6", + "laminas/laminas-coding-standard": "~2.4.0", + "maglnet/composer-require-checker": "^3.8.0", + "phpunit/phpunit": "^9.5.18", + "psalm/plugin-phpunit": "^0.17.0", + "vimeo/psalm": "^4.22.0" + }, + "type": "library", + "autoload": { + "psr-4": { + "Laminas\\Escaper\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "description": "Securely and safely escape HTML, HTML attributes, JavaScript, CSS, and URLs", + "homepage": "https://laminas.dev", + "keywords": [ + "escaper", + "laminas" + ], + "support": { + "chat": "https://laminas.dev/chat", + "docs": "https://docs.laminas.dev/laminas-escaper/", + "forum": "https://discourse.laminas.dev", + "issues": "https://github.com/laminas/laminas-escaper/issues", + "rss": "https://github.com/laminas/laminas-escaper/releases.atom", + "source": "https://github.com/laminas/laminas-escaper" + }, + "funding": [ + { + "url": "https://funding.communitybridge.org/projects/laminas-project", + "type": "community_bridge" + } + ], + "time": "2022-10-10T10:11:09+00:00" + }, { "name": "league/container", "version": "4.2.0", @@ -7037,6 +7099,114 @@ }, "time": "2023-08-12T11:01:26+00:00" }, + { + "name": "phpoffice/phpword", + "version": "1.1.0", + "source": { + "type": "git", + "url": "https://github.com/PHPOffice/PHPWord.git", + "reference": "90a55955e6a772bb4cd9b1ef6a7e88c8976c2561" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/PHPOffice/PHPWord/zipball/90a55955e6a772bb4cd9b1ef6a7e88c8976c2561", + "reference": "90a55955e6a772bb4cd9b1ef6a7e88c8976c2561", + "shasum": "" + }, + "require": { + "ext-dom": "*", + "ext-json": "*", + "ext-xml": "*", + "laminas/laminas-escaper": ">=2.6", + "php": "^7.1|^8.0" + }, + "require-dev": { + "dompdf/dompdf": "^2.0", + "ext-gd": "*", + "ext-libxml": "*", + "ext-zip": "*", + "friendsofphp/php-cs-fixer": "^3.3", + "mpdf/mpdf": "^8.1", + "phpmd/phpmd": "^2.13", + "phpunit/phpunit": ">=7.0", + "symfony/process": "^4.4", + "tecnickcom/tcpdf": "^6.5" + }, + "suggest": { + "dompdf/dompdf": "Allows writing PDF", + "ext-gd2": "Allows adding images", + "ext-xmlwriter": "Allows writing OOXML and ODF", + "ext-xsl": "Allows applying XSL style sheet to headers, to main document part, and to footers of an OOXML template", + "ext-zip": "Allows writing OOXML and ODF" + }, + "type": "library", + "autoload": { + "psr-4": { + "PhpOffice\\PhpWord\\": "src/PhpWord" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "LGPL-3.0" + ], + "authors": [ + { + "name": "Mark Baker" + }, + { + "name": "Gabriel Bull", + "email": "me@gabrielbull.com", + "homepage": "http://gabrielbull.com/" + }, + { + "name": "Franck Lefevre", + "homepage": "https://rootslabs.net/blog/" + }, + { + "name": "Ivan Lanin", + "homepage": "http://ivan.lanin.org" + }, + { + "name": "Roman Syroeshko", + "homepage": "http://ru.linkedin.com/pub/roman-syroeshko/34/a53/994/" + }, + { + "name": "Antoine de Troostembergh" + } + ], + "description": "PHPWord - A pure PHP library for reading and writing word processing documents (OOXML, ODF, RTF, HTML, PDF)", + "homepage": "https://phpword.readthedocs.io/", + "keywords": [ + "ISO IEC 29500", + "OOXML", + "Office Open XML", + "OpenDocument", + "OpenXML", + "PhpOffice", + "PhpWord", + "Rich Text Format", + "WordprocessingML", + "doc", + "docx", + "html", + "odf", + "odt", + "office", + "pdf", + "php", + "reader", + "rtf", + "template", + "template processor", + "word", + "writer" + ], + "support": { + "issues": "https://github.com/PHPOffice/PHPWord/issues", + "source": "https://github.com/PHPOffice/PHPWord/tree/1.1.0" + }, + "time": "2023-05-30T07:59:14+00:00" + }, { "name": "phpowermove/docblock", "version": "v4.0", From b3591737ecb94102cf2f04b12cec9b63caf7a890 Mon Sep 17 00:00:00 2001 From: "Peter Droogmans (attiks)" Date: Tue, 19 Sep 2023 16:59:58 +0200 Subject: [PATCH 12/18] Extract text from docx, rtf, odt --- ...ield.field.node.summary.field_document.yml | 2 +- .../ocha_ai_summarize.module | 68 +++++++++++++++---- .../OchaAiSummarizeExtractText.php | 9 ++- 3 files changed, 62 insertions(+), 17 deletions(-) diff --git a/config/field.field.node.summary.field_document.yml b/config/field.field.node.summary.field_document.yml index 636df17..22b8315 100644 --- a/config/field.field.node.summary.field_document.yml +++ b/config/field.field.node.summary.field_document.yml @@ -21,7 +21,7 @@ settings: handler: 'default:file' handler_settings: { } file_directory: '[date:custom:Y]-[date:custom:m]' - file_extensions: 'pdf docx odf doc' + file_extensions: 'pdf docx odt rtf' max_filesize: '' description_field: false field_type: file diff --git a/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module b/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module index 9605685..0801f99 100644 --- a/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module +++ b/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module @@ -15,6 +15,7 @@ use Drupal\Core\Entity\EntityInterface; use Drupal\Core\Form\FormStateInterface; use GuzzleHttp\Client; use GuzzleHttp\Psr7\Request; +use PhpOffice\PhpWord\IOFactory; use Symfony\Component\Process\Process; /** @@ -261,7 +262,58 @@ function ocha_ai_summarize_get_num_pages($filename) { /** * Extract text for each page separately. */ -function ocha_ai_summarize_extract_pages($filename) { +function ocha_ai_summarize_extract_pages_from_doc($filename) { + $reader_name = ''; + $file_parts = pathinfo($filename); + switch (strtolower($file_parts['extension'])) { + case 'docx': + $reader_name = 'Word2007'; + break; + + case 'rtf': + $reader_name = 'RTF'; + break; + + case 'odt': + $reader_name = 'ODText'; + break; + } + + if (empty($reader_name)) { + return; + } + + $phpWord = IOFactory::load($filename, $reader_name); + + $pages = []; + foreach ($phpWord->getSections() as $section) { + $text = ''; + foreach ($section->getElements() as $element) { + if (method_exists($element, 'getElements')) { + foreach ($element->getElements() as $child_element) { + if (method_exists($child_element, 'getText')) { + $text .= $child_element->getText() . ' '; + } + elseif (method_exists($child_element, 'getContent')) { + $text .= $child_element->getContent() . ' '; + } + } + } + elseif (method_exists($element, 'getText')) { + $text .= $element->getText() . "\n"; + } + } + + $pages[] = $text; + } + + return $pages; +} + +/** + * Extract text for each page separately. + */ +function ocha_ai_summarize_extract_pages_from_pdf($filename) { $pages = []; $num_pages = ocha_ai_summarize_get_num_pages($filename); @@ -431,20 +483,6 @@ function ocha_ai_summarize_texttract_get_text($job_id) { return ''; } -/** - * Test run for AWS Textract. - */ -function ocha_ai_summarize_testit() { - $file_name = '/var/www/private_files/2023-09/2018_OCHA_Aide Memoire.pdf'; - ocha_ai_summarize_upload_to_s3($file_name); - $job_id = ocha_ai_summarize_texttract($file_name); - - sleep(5); - - $text = ocha_ai_summarize_texttract_get_text($job_id); - return $text; -} - /** * Make chat call to OpenAi. */ diff --git a/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeExtractText.php b/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeExtractText.php index 0c65fab..3799507 100644 --- a/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeExtractText.php +++ b/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeExtractText.php @@ -98,9 +98,16 @@ public function processItem($data) { return; } + // PDF or else. $absolute_path = $this->fileSystem->realpath($file->getFileUri()); + $file_parts = pathinfo($absolute_path); + if (strtolower($file_parts['extension']) == 'pdf') { + $text = ocha_ai_summarize_extract_pages_from_pdf($absolute_path); + } + else { + $text = ocha_ai_summarize_extract_pages_from_doc($absolute_path); + } - $text = ocha_ai_summarize_extract_pages($absolute_path); $node->set('field_document_text', $text); $node->set('moderation_state', 'text_extracted'); $node->save(); From 4b46011cda3858a05518a392d3225140ee74f461 Mon Sep 17 00:00:00 2001 From: "Peter Droogmans (attiks)" Date: Fri, 22 Sep 2023 11:42:45 +0200 Subject: [PATCH 13/18] chore: Add claude --- config/field.storage.node.field_ai_brain.yml | 3 + .../ocha_ai_summarize.module | 40 +++++++++ .../QueueWorker/OchaAiSummarizeSummarize.php | 87 ++++++++++++------- 3 files changed, 98 insertions(+), 32 deletions(-) diff --git a/config/field.storage.node.field_ai_brain.yml b/config/field.storage.node.field_ai_brain.yml index 8833700..97065bc 100644 --- a/config/field.storage.node.field_ai_brain.yml +++ b/config/field.storage.node.field_ai_brain.yml @@ -20,6 +20,9 @@ settings: - value: bedrock label: 'BedRock (Titan)' + - + value: claude + label: Claude allowed_values_function: '' module: options locked: false diff --git a/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module b/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module index 0801f99..f3dd07c 100644 --- a/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module +++ b/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module @@ -583,3 +583,43 @@ function ocha_ai_summarize_http_call_bedrock($prompt) { $body = $response->getBody() . ''; return json_decode($body, TRUE); } + +/** + * Make chat call to Claude. + */ +function ocha_ai_summarize_http_call_claude($prompt) { + $config = \Drupal::config('ocha_ai_summarize.settings'); + $endpoint = $config->get('claude_endpoint'); + $api_key = $config->get('claude_api_key'); + $claude_version = $config->get('claude_version'); + + $headers = [ + 'Content-Type' => 'application/json', + 'Accept' => 'application/json', + 'anthropic-version' => $claude_version, + 'x-api-key' => $api_key, + ]; + + $body = [ + 'model' => 'claude-2', + 'prompt' => $prompt, + "stop_sequences" => [ + "\\n\\nHuman:", + ], + 'temperature' => 0.1, + 'max_tokens_to_sample' => 2500, + ]; + + $http_client = \Drupal::httpClient(); + $response = $http_client->request( + 'POST', + $endpoint, + [ + 'headers' => $headers, + 'json' => $body, + ], + ); + + $body = $response->getBody() . ''; + return json_decode($body, TRUE); +} diff --git a/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeSummarize.php b/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeSummarize.php index 44ddef6..28163f1 100644 --- a/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeSummarize.php +++ b/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeSummarize.php @@ -79,52 +79,64 @@ public function processItem($data) { return; } - // Summarize each page. - $results = []; - foreach ($node->field_document_text as $document_text) { - $text = $document_text->value; - - if (strlen($text) < 100) { - $results[] = $text; - continue; + // Claude can handle all text at once. + if ($bot == 'claude') { + $text = ''; + foreach ($node->field_document_text as $document_text) { + $text = $document_text->value . "\n"; + } + + $summary = $this->sendToClaudeAi("Summerize the following text in $num_paragraphs paragraphs:\n\n" . $text); + } + else { + // Summarize each page. + $results = []; + foreach ($node->field_document_text as $document_text) { + $text = $document_text->value; + + if (strlen($text) < 100) { + $results[] = $text; + continue; + } + + switch ($bot) { + case 'openai': + $results[] = $this->sendToOpenAi("Summerize the following text in $num_paragraphs paragraphs:\n\n" . $text); + break; + + case 'azure_trained': + $results[] = $this->sendToAzureAi("Summerize the following text in $num_paragraphs paragraphs:\n\n" . $text); + break; + + case 'bedrock': + $results[] = $this->sendToBedRock("Summerize the following text in $num_paragraphs paragraphs:\n\n" . $text); + break; + + } + } + + // Summarize the summaries. + $text = ''; + foreach ($results as $row) { + $text .= $row; + $text .= "\n"; } switch ($bot) { case 'openai': - $results[] = $this->sendToOpenAi("Summerize the following text in $num_paragraphs paragraphs:\n\n" . $text); + $summary = $this->sendToOpenAi("Summerize the following text in $num_paragraphs paragraphs:\n\n" . $text); break; case 'azure_trained': - $results[] = $this->sendToAzureAi("Summerize the following text in $num_paragraphs paragraphs:\n\n" . $text); + $summary = $this->sendToAzureAi("Summerize the following text in $num_paragraphs paragraphs:\n\n" . $text); break; case 'bedrock': - $results[] = $this->sendToBedRock("Summerize the following text in $num_paragraphs paragraphs:\n\n" . $text); + $summary = $this->sendToBedRock("Summerize the following text in $num_paragraphs paragraphs:\n\n" . $text); break; } } - // Summarize the summaries. - $text = ''; - foreach ($results as $row) { - $text .= $row; - $text .= "\n"; - } - - switch ($bot) { - case 'openai': - $summary = $this->sendToOpenAi("Summerize the following text in $num_paragraphs paragraphs:\n\n" . $text); - break; - - case 'azure_trained': - $summary = $this->sendToAzureAi("Summerize the following text in $num_paragraphs paragraphs:\n\n" . $text); - break; - - case 'bedrock': - $summary = $this->sendToBedRock("Summerize the following text in $num_paragraphs paragraphs:\n\n" . $text); - break; - } - $node->set('field_summary', $summary); $node->set('moderation_state', 'summarized'); $node->save(); @@ -181,4 +193,15 @@ protected function sendToBedRock($text) : string { return $result['results'][0]['outputText'] ?? ''; } + /** + * Send query to Claude AI. + */ + protected function sendToClaudeAi($text) : string { + $prompt = "\n\nHuman: $text\n\nAssistant:"; + + $result = ocha_ai_summarize_http_call_claude($prompt); + + return $result['completion'] ?? ''; + } + } From 75e3e3c30554d2174cb63fb625cbd36a661d1c96 Mon Sep 17 00:00:00 2001 From: "Peter Droogmans (attiks)" Date: Fri, 22 Sep 2023 11:43:25 +0200 Subject: [PATCH 14/18] chore: Add claude --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 80e28bd..c95cffd 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,10 @@ $config['ocha_ai_summarize.settings']['bedrock_endpoint'] = 'https://bedrock.us- $config['ocha_ai_summarize.settings']['bedrock_model'] = 'amazon.titan-tg1-large'; $config['ocha_ai_summarize.settings']['bedrock_access_key'] = 'x1'; $config['ocha_ai_summarize.settings']['bedrock_secret_key'] = 'x2'; + +$config['ocha_ai_summarize.settings']['claude_endpoint'] = 'https://api.anthropic.com/v1/complete'; +$config['ocha_ai_summarize.settings']['claude_version'] = '2023-06-01'; +$config['ocha_ai_summarize.settings']['claude_api_key'] = 'zz'; ``` ## Cron From 570de6549155e8fcbc7618f33984386e43f0bef7 Mon Sep 17 00:00:00 2001 From: "Peter Droogmans (attiks)" Date: Tue, 3 Oct 2023 13:20:21 +0200 Subject: [PATCH 15/18] Use AI to extract action points --- README.md | 1 + ...ld_override.node.action_points.promote.yml | 22 + ...orm_display.node.action_points.default.yml | 127 ++++ ...iew_display.node.action_points.default.yml | 66 ++ ...view_display.node.action_points.teaser.yml | 31 + ...node.action_points.field_action_points.yml | 28 + ...ield.node.action_points.field_ai_brain.yml | 23 + ...ode.action_points.field_automatic_mode.yml | 23 + ...ield.node.action_points.field_document.yml | 27 + ...node.action_points.field_document_text.yml | 19 + ...field.storage.node.field_action_points.yml | 19 + ...ge.content_settings.node.action_points.yml | 11 + config/node.type.action_points.yml | 17 + config/user.role.anonymous.yml | 2 + config/user.role.authenticated.yml | 2 + config/user.role.document_analyzer.yml | 12 + config/user.role.user_manager.yml | 1 + config/workflows.workflow.action_points.yml | 118 +++ config_dev/dblog.settings.yml | 3 - config_dev/devel.settings.yml | 12 - config_dev/devel.toolbar.settings.yml | 10 - config_dev/system.menu.devel.yml | 13 - config_dev/views.view.watchdog.yml | 712 ------------------ .../ocha_ai_summarize.module | 240 ++++++ .../OchaAiSummarizeActionPoints.php | 211 ++++++ .../OchaAiSummarizeExtractText.php | 2 +- 26 files changed, 1001 insertions(+), 751 deletions(-) create mode 100644 config/core.base_field_override.node.action_points.promote.yml create mode 100644 config/core.entity_form_display.node.action_points.default.yml create mode 100644 config/core.entity_view_display.node.action_points.default.yml create mode 100644 config/core.entity_view_display.node.action_points.teaser.yml create mode 100644 config/field.field.node.action_points.field_action_points.yml create mode 100644 config/field.field.node.action_points.field_ai_brain.yml create mode 100644 config/field.field.node.action_points.field_automatic_mode.yml create mode 100644 config/field.field.node.action_points.field_document.yml create mode 100644 config/field.field.node.action_points.field_document_text.yml create mode 100644 config/field.storage.node.field_action_points.yml create mode 100644 config/language.content_settings.node.action_points.yml create mode 100644 config/node.type.action_points.yml create mode 100644 config/workflows.workflow.action_points.yml delete mode 100644 config_dev/dblog.settings.yml delete mode 100644 config_dev/devel.settings.yml delete mode 100644 config_dev/devel.toolbar.settings.yml delete mode 100644 config_dev/system.menu.devel.yml delete mode 100644 config_dev/views.view.watchdog.yml create mode 100644 html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeActionPoints.php diff --git a/README.md b/README.md index c95cffd..d55335a 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ $config['ocha_ai_summarize.settings']['claude_api_key'] = 'zz'; ```bash drush queue:process ocha_ai_summarize_extract_text drush queue:process ocha_ai_summarize_summarize +drush queue:process ocha_ai_summarize_action_points ``` We can either use cron to run the queues or run them separatly diff --git a/config/core.base_field_override.node.action_points.promote.yml b/config/core.base_field_override.node.action_points.promote.yml new file mode 100644 index 0000000..8798602 --- /dev/null +++ b/config/core.base_field_override.node.action_points.promote.yml @@ -0,0 +1,22 @@ +uuid: 77ca281c-e187-4460-8307-6b20ede22edd +langcode: en +status: true +dependencies: + config: + - node.type.action_points +id: node.action_points.promote +field_name: promote +entity_type: node +bundle: action_points +label: 'Promoted to front page' +description: '' +required: false +translatable: true +default_value: + - + value: 0 +default_value_callback: '' +settings: + on_label: 'On' + off_label: 'Off' +field_type: boolean diff --git a/config/core.entity_form_display.node.action_points.default.yml b/config/core.entity_form_display.node.action_points.default.yml new file mode 100644 index 0000000..739a3bb --- /dev/null +++ b/config/core.entity_form_display.node.action_points.default.yml @@ -0,0 +1,127 @@ +uuid: b0a337d3-57d1-474e-ab26-2c94eaab3e43 +langcode: en +status: true +dependencies: + config: + - field.field.node.action_points.field_action_points + - field.field.node.action_points.field_ai_brain + - field.field.node.action_points.field_automatic_mode + - field.field.node.action_points.field_document + - field.field.node.action_points.field_document_text + - node.type.action_points + module: + - content_moderation + - file + - path + - text +id: node.action_points.default +targetEntityType: node +bundle: action_points +mode: default +content: + created: + type: datetime_timestamp + weight: 8 + region: content + settings: { } + third_party_settings: { } + field_action_points: + type: text_textarea + weight: 7 + region: content + settings: + rows: 5 + placeholder: '' + third_party_settings: { } + field_ai_brain: + type: options_select + weight: 2 + region: content + settings: { } + third_party_settings: { } + field_automatic_mode: + type: boolean_checkbox + weight: 3 + region: content + settings: + display_label: true + third_party_settings: { } + field_document: + type: file_generic + weight: 4 + region: content + settings: + progress_indicator: throbber + third_party_settings: { } + field_document_text: + type: string_textarea + weight: 6 + region: content + settings: + rows: 5 + placeholder: '' + third_party_settings: { } + langcode: + type: language_select + weight: 1 + region: content + settings: + include_locked: true + third_party_settings: { } + moderation_state: + type: moderation_state_default + weight: 13 + region: content + settings: { } + third_party_settings: { } + path: + type: path + weight: 11 + region: content + settings: { } + third_party_settings: { } + promote: + type: boolean_checkbox + weight: 9 + region: content + settings: + display_label: true + third_party_settings: { } + status: + type: boolean_checkbox + weight: 14 + region: content + settings: + display_label: true + third_party_settings: { } + sticky: + type: boolean_checkbox + weight: 10 + region: content + settings: + display_label: true + third_party_settings: { } + title: + type: string_textfield + weight: 0 + region: content + settings: + size: 60 + placeholder: '' + third_party_settings: { } + uid: + type: entity_reference_autocomplete + weight: 5 + region: content + settings: + match_operator: CONTAINS + match_limit: 10 + size: 60 + placeholder: '' + third_party_settings: { } + url_redirects: + weight: 12 + region: content + settings: { } + third_party_settings: { } +hidden: { } diff --git a/config/core.entity_view_display.node.action_points.default.yml b/config/core.entity_view_display.node.action_points.default.yml new file mode 100644 index 0000000..75aed6b --- /dev/null +++ b/config/core.entity_view_display.node.action_points.default.yml @@ -0,0 +1,66 @@ +uuid: 15038d49-cf49-4566-b9f7-6b14cbbf04e4 +langcode: en +status: true +dependencies: + config: + - field.field.node.action_points.field_action_points + - field.field.node.action_points.field_ai_brain + - field.field.node.action_points.field_automatic_mode + - field.field.node.action_points.field_document + - field.field.node.action_points.field_document_text + - node.type.action_points + module: + - file + - options + - text + - user +id: node.action_points.default +targetEntityType: node +bundle: action_points +mode: default +content: + content_moderation_control: + settings: { } + third_party_settings: { } + weight: 4 + region: content + field_action_points: + type: text_default + label: hidden + settings: { } + third_party_settings: { } + weight: 3 + region: content + field_ai_brain: + type: list_default + label: inline + settings: { } + third_party_settings: { } + weight: 1 + region: content + field_automatic_mode: + type: boolean + label: inline + settings: + format: default + format_custom_false: '' + format_custom_true: '' + third_party_settings: { } + weight: 0 + region: content + field_document: + type: file_default + label: inline + settings: + use_description_as_link_text: true + third_party_settings: { } + weight: 2 + region: content + links: + settings: { } + third_party_settings: { } + weight: 5 + region: content +hidden: + field_document_text: true + langcode: true diff --git a/config/core.entity_view_display.node.action_points.teaser.yml b/config/core.entity_view_display.node.action_points.teaser.yml new file mode 100644 index 0000000..9c6ffa2 --- /dev/null +++ b/config/core.entity_view_display.node.action_points.teaser.yml @@ -0,0 +1,31 @@ +uuid: 9977694b-b1f0-4006-b537-c9efd6eb76b7 +langcode: en +status: true +dependencies: + config: + - core.entity_view_mode.node.teaser + - field.field.node.action_points.field_action_points + - field.field.node.action_points.field_ai_brain + - field.field.node.action_points.field_automatic_mode + - field.field.node.action_points.field_document + - field.field.node.action_points.field_document_text + - node.type.action_points + module: + - user +id: node.action_points.teaser +targetEntityType: node +bundle: action_points +mode: teaser +content: + links: + settings: { } + third_party_settings: { } + weight: 100 + region: content +hidden: + field_action_points: true + field_ai_brain: true + field_automatic_mode: true + field_document: true + field_document_text: true + langcode: true diff --git a/config/field.field.node.action_points.field_action_points.yml b/config/field.field.node.action_points.field_action_points.yml new file mode 100644 index 0000000..3bd6419 --- /dev/null +++ b/config/field.field.node.action_points.field_action_points.yml @@ -0,0 +1,28 @@ +uuid: 57e67950-2bea-4289-a1e5-253786d5fa77 +langcode: en +status: true +dependencies: + config: + - field.storage.node.field_action_points + - filter.format.text_editor_simple + - node.type.action_points + module: + - allowed_formats + - text +third_party_settings: + allowed_formats: + allowed_formats: { } +id: node.action_points.field_action_points +field_name: field_action_points +entity_type: node +bundle: action_points +label: 'Action points' +description: '' +required: false +translatable: false +default_value: { } +default_value_callback: '' +settings: + allowed_formats: + - text_editor_simple +field_type: text_long diff --git a/config/field.field.node.action_points.field_ai_brain.yml b/config/field.field.node.action_points.field_ai_brain.yml new file mode 100644 index 0000000..c18c333 --- /dev/null +++ b/config/field.field.node.action_points.field_ai_brain.yml @@ -0,0 +1,23 @@ +uuid: 6aec51f7-796d-435c-8727-239d87fa5b0f +langcode: en +status: true +dependencies: + config: + - field.storage.node.field_ai_brain + - node.type.action_points + module: + - options +id: node.action_points.field_ai_brain +field_name: field_ai_brain +entity_type: node +bundle: action_points +label: 'AI Brain' +description: '' +required: false +translatable: false +default_value: + - + value: openai +default_value_callback: '' +settings: { } +field_type: list_string diff --git a/config/field.field.node.action_points.field_automatic_mode.yml b/config/field.field.node.action_points.field_automatic_mode.yml new file mode 100644 index 0000000..c52f2a4 --- /dev/null +++ b/config/field.field.node.action_points.field_automatic_mode.yml @@ -0,0 +1,23 @@ +uuid: ac2d157e-dc5e-4ee1-bd36-09dc56de7d00 +langcode: en +status: true +dependencies: + config: + - field.storage.node.field_automatic_mode + - node.type.action_points +id: node.action_points.field_automatic_mode +field_name: field_automatic_mode +entity_type: node +bundle: action_points +label: 'Automatic mode' +description: 'When enabled will automatically run the whole flow without any user interaction needed.' +required: false +translatable: false +default_value: + - + value: 1 +default_value_callback: '' +settings: + on_label: 'On' + off_label: 'Off' +field_type: boolean diff --git a/config/field.field.node.action_points.field_document.yml b/config/field.field.node.action_points.field_document.yml new file mode 100644 index 0000000..20bf501 --- /dev/null +++ b/config/field.field.node.action_points.field_document.yml @@ -0,0 +1,27 @@ +uuid: 1c2155b9-7720-4954-96bf-9931ea26c918 +langcode: en +status: true +dependencies: + config: + - field.storage.node.field_document + - node.type.action_points + module: + - file +id: node.action_points.field_document +field_name: field_document +entity_type: node +bundle: action_points +label: Document +description: '' +required: true +translatable: false +default_value: { } +default_value_callback: '' +settings: + handler: 'default:file' + handler_settings: { } + file_directory: '[date:custom:Y]-[date:custom:m]' + file_extensions: 'pdf docx odt rtf' + max_filesize: '' + description_field: false +field_type: file diff --git a/config/field.field.node.action_points.field_document_text.yml b/config/field.field.node.action_points.field_document_text.yml new file mode 100644 index 0000000..0ae85ab --- /dev/null +++ b/config/field.field.node.action_points.field_document_text.yml @@ -0,0 +1,19 @@ +uuid: afdce9d3-772e-445e-a4b6-7567f31c7f0c +langcode: en +status: true +dependencies: + config: + - field.storage.node.field_document_text + - node.type.action_points +id: node.action_points.field_document_text +field_name: field_document_text +entity_type: node +bundle: action_points +label: 'Document text' +description: '' +required: false +translatable: false +default_value: { } +default_value_callback: '' +settings: { } +field_type: string_long diff --git a/config/field.storage.node.field_action_points.yml b/config/field.storage.node.field_action_points.yml new file mode 100644 index 0000000..9585073 --- /dev/null +++ b/config/field.storage.node.field_action_points.yml @@ -0,0 +1,19 @@ +uuid: 01ded4be-e97d-4e0d-bdbc-e7c8707e661f +langcode: en +status: true +dependencies: + module: + - node + - text +id: node.field_action_points +field_name: field_action_points +entity_type: node +type: text_long +settings: { } +module: text +locked: false +cardinality: 1 +translatable: true +indexes: { } +persist_with_no_fields: false +custom_storage: false diff --git a/config/language.content_settings.node.action_points.yml b/config/language.content_settings.node.action_points.yml new file mode 100644 index 0000000..f92b5e4 --- /dev/null +++ b/config/language.content_settings.node.action_points.yml @@ -0,0 +1,11 @@ +uuid: d3403fab-fa8f-4fbb-8722-b333b2845827 +langcode: en +status: true +dependencies: + config: + - node.type.action_points +id: node.action_points +target_entity_type_id: node +target_bundle: action_points +default_langcode: site_default +language_alterable: false diff --git a/config/node.type.action_points.yml b/config/node.type.action_points.yml new file mode 100644 index 0000000..ccbc258 --- /dev/null +++ b/config/node.type.action_points.yml @@ -0,0 +1,17 @@ +uuid: dcf924bb-5409-401d-a9bc-d7287181daeb +langcode: en +status: true +dependencies: + module: + - menu_ui +third_party_settings: + menu_ui: + available_menus: { } + parent: '' +name: 'Action points' +type: action_points +description: '' +help: '' +new_revision: true +preview_mode: 1 +display_submitted: false diff --git a/config/user.role.anonymous.yml b/config/user.role.anonymous.yml index c8ddb92..53701a8 100644 --- a/config/user.role.anonymous.yml +++ b/config/user.role.anonymous.yml @@ -14,4 +14,6 @@ is_admin: false permissions: - 'access content' - 'view any page content' + - 'view own action_points content' - 'view own page content' + - 'view own summary content' diff --git a/config/user.role.authenticated.yml b/config/user.role.authenticated.yml index 5b15d7c..5df3205 100644 --- a/config/user.role.authenticated.yml +++ b/config/user.role.authenticated.yml @@ -16,4 +16,6 @@ permissions: - 'access content' - 'view any page content' - 'view media' + - 'view own action_points content' - 'view own page content' + - 'view own summary content' diff --git a/config/user.role.document_analyzer.yml b/config/user.role.document_analyzer.yml index c995d40..9a7dbb9 100644 --- a/config/user.role.document_analyzer.yml +++ b/config/user.role.document_analyzer.yml @@ -3,7 +3,9 @@ langcode: en status: true dependencies: config: + - node.type.action_points - node.type.summary + - workflows.workflow.action_points - workflows.workflow.summarize module: - content_moderation @@ -16,9 +18,18 @@ weight: -6 is_admin: null permissions: - 'access content' + - 'create action_points content' - 'create summary content' + - 'delete own action_points content' - 'delete own summary content' + - 'edit own action_points content' - 'edit own summary content' + - 'use action_points transition action_points' + - 'use action_points transition action_points_created_update' + - 'use action_points transition document_uploaded' + - 'use action_points transition extrat_text' + - 'use action_points transition published' + - 'use action_points transition text_extracted_update' - 'use summarize transition archive' - 'use summarize transition document_uploaded' - 'use summarize transition extrat_text' @@ -26,6 +37,7 @@ permissions: - 'use summarize transition summarize' - 'use summarize transition summarized_update' - 'use summarize transition text_extracted_update' + - 'view action_points revisions' - 'view own summary content' - 'view own unpublished content' - 'view summary revisions' diff --git a/config/user.role.user_manager.yml b/config/user.role.user_manager.yml index ac2c56a..3d222e0 100644 --- a/config/user.role.user_manager.yml +++ b/config/user.role.user_manager.yml @@ -16,6 +16,7 @@ permissions: - 'access user profiles' - 'administer users' - 'edit any summary content' + - 'view any action_points content' - 'view any summary content' - 'view any unpublished content' - 'view latest version' diff --git a/config/workflows.workflow.action_points.yml b/config/workflows.workflow.action_points.yml new file mode 100644 index 0000000..f097086 --- /dev/null +++ b/config/workflows.workflow.action_points.yml @@ -0,0 +1,118 @@ +uuid: c2c66e40-565a-403b-8597-6b8ec8db4d2b +langcode: en +status: true +dependencies: + config: + - node.type.action_points + module: + - content_moderation +id: action_points +label: 'Action points' +type: content_moderation +type_settings: + states: + action_points: + label: 'Get action points' + weight: 0 + published: false + default_revision: false + action_points_created: + label: 'Action points created' + weight: 1 + published: false + default_revision: false + archived: + label: Archived + weight: 3 + published: false + default_revision: false + document_uploaded: + label: 'Document uploaded' + weight: -3 + published: false + default_revision: false + draft: + label: Draft + weight: -4 + published: false + default_revision: false + extract_text: + label: 'Extract text' + weight: -2 + published: false + default_revision: false + published: + label: Published + weight: 2 + published: true + default_revision: true + text_extracted: + label: 'Text extracted' + weight: -1 + published: false + default_revision: false + transitions: + action_points: + label: action_points + from: + - text_extracted + to: action_points + weight: -1 + action_points_created: + label: action_points_created + from: + - action_points + to: action_points_created + weight: 0 + action_points_created_update: + label: 'action_points_created (update)' + from: + - action_points_created + to: action_points_created + weight: 4 + archive: + label: Archive + from: + - published + to: archived + weight: 2 + archived_published: + label: Restore + from: + - archived + to: published + weight: 3 + document_uploaded: + label: 'document uploaded' + from: + - draft + to: document_uploaded + weight: -5 + extrat_text: + label: 'Extrat text' + from: + - document_uploaded + to: extract_text + weight: -4 + published: + label: Published + from: + - action_points_created + to: published + weight: 1 + text_extracted: + label: 'Text extracted' + from: + - extract_text + to: text_extracted + weight: -3 + text_extracted_update: + label: 'Text extracted (update)' + from: + - text_extracted + to: text_extracted + weight: -2 + entity_types: + node: + - action_points + default_moderation_state: draft diff --git a/config_dev/dblog.settings.yml b/config_dev/dblog.settings.yml deleted file mode 100644 index fbd17ea..0000000 --- a/config_dev/dblog.settings.yml +++ /dev/null @@ -1,3 +0,0 @@ -_core: - default_config_hash: e883aGsrt1wFrsydlYU584PZONCSfRy0DtkZ9KzHb58 -row_limit: 1000 diff --git a/config_dev/devel.settings.yml b/config_dev/devel.settings.yml deleted file mode 100644 index 976cc91..0000000 --- a/config_dev/devel.settings.yml +++ /dev/null @@ -1,12 +0,0 @@ -_core: - default_config_hash: Aqx6J0yYT6mVqT0fbjeP4JkoL-700nmudVF5d6Pq2Yo -page_alter: false -raw_names: false -error_handlers: - 1: 1 -rebuild_theme: false -debug_mail_file_format: '%to-%subject-%datetime.mail.txt' -debug_mail_directory: 'temporary://devel-mails' -devel_dumper: var_dumper -debug_logfile: 'temporary://drupal_debug.txt' -debug_pre: true diff --git a/config_dev/devel.toolbar.settings.yml b/config_dev/devel.toolbar.settings.yml deleted file mode 100644 index 76ada43..0000000 --- a/config_dev/devel.toolbar.settings.yml +++ /dev/null @@ -1,10 +0,0 @@ -_core: - default_config_hash: IQjf_ytthngZTAk_MU8-74VecArWD3G5g0oEH6PM6GA -toolbar_items: - - devel.admin_settings_link - - devel.cache_clear - - devel.container_info.service - - devel.menu_rebuild - - devel.reinstall - - devel.route_info - - devel.run_cron diff --git a/config_dev/system.menu.devel.yml b/config_dev/system.menu.devel.yml deleted file mode 100644 index 6a3cbda..0000000 --- a/config_dev/system.menu.devel.yml +++ /dev/null @@ -1,13 +0,0 @@ -uuid: 1559eb46-90cf-4750-9a81-5a7f76f6027a -langcode: en -status: true -dependencies: - enforced: - module: - - devel -_core: - default_config_hash: 3V-l1uuTcyirYOGLPZV5HWaDfr02uEbWZJIwc8Byz-c -id: devel -label: Development -description: 'Links related to Devel module.' -locked: true diff --git a/config_dev/views.view.watchdog.yml b/config_dev/views.view.watchdog.yml deleted file mode 100644 index 14fbf67..0000000 --- a/config_dev/views.view.watchdog.yml +++ /dev/null @@ -1,712 +0,0 @@ -uuid: 495a9b09-6af4-4cee-b776-97f1e3ecdea1 -langcode: en -status: true -dependencies: - module: - - dblog - - user -_core: - default_config_hash: j0txIxY4nkJT_dscmXckM-1vanygDkJAeHPawZKfyH0 -id: watchdog -label: Watchdog -module: views -description: 'Recent log messages' -tag: '' -base_table: watchdog -base_field: wid -display: - default: - id: default - display_title: Default - display_plugin: default - position: 0 - display_options: - title: 'Recent log messages' - fields: - nothing: - id: nothing - table: views - field: nothing - relationship: none - group_type: group - admin_label: Icon - plugin_id: custom - label: '' - exclude: false - alter: - alter_text: true - text: '' - make_link: false - path: '' - absolute: false - external: false - replace_spaces: false - path_case: none - trim_whitespace: false - alt: '' - rel: '' - link_class: '' - prefix: '' - suffix: '' - target: '' - nl2br: false - max_length: 0 - word_boundary: true - ellipsis: true - more_link: false - more_link_text: '' - more_link_path: '' - strip_tags: false - trim: false - preserve_tags: '' - html: false - element_type: '' - element_class: icon - element_label_type: '' - element_label_class: '' - element_label_colon: false - element_wrapper_type: '' - element_wrapper_class: '' - element_default_classes: false - empty: '' - hide_empty: false - empty_zero: false - hide_alter_empty: false - wid: - id: wid - table: watchdog - field: wid - relationship: none - group_type: group - admin_label: '' - plugin_id: standard - label: WID - exclude: true - alter: - alter_text: false - text: '' - make_link: false - path: '' - absolute: false - external: false - replace_spaces: false - path_case: none - trim_whitespace: false - alt: '' - rel: '' - link_class: '' - prefix: '' - suffix: '' - target: '' - nl2br: false - max_length: 0 - word_boundary: true - ellipsis: true - more_link: false - more_link_text: '' - more_link_path: '' - strip_tags: false - trim: false - preserve_tags: '' - html: false - element_type: '' - element_class: '' - element_label_type: '' - element_label_class: '' - element_label_colon: true - element_wrapper_type: '' - element_wrapper_class: '' - element_default_classes: true - empty: '' - hide_empty: false - empty_zero: false - hide_alter_empty: true - severity: - id: severity - table: watchdog - field: severity - relationship: none - group_type: group - admin_label: '' - plugin_id: machine_name - label: Severity - exclude: true - alter: - alter_text: false - text: '' - make_link: false - path: '' - absolute: false - external: false - replace_spaces: false - path_case: none - trim_whitespace: false - alt: '' - rel: '' - link_class: '' - prefix: '' - suffix: '' - target: '' - nl2br: false - max_length: 0 - word_boundary: true - ellipsis: true - more_link: false - more_link_text: '' - more_link_path: '' - strip_tags: false - trim: false - preserve_tags: '' - html: false - element_type: '' - element_class: '' - element_label_type: '' - element_label_class: '' - element_label_colon: true - element_wrapper_type: '' - element_wrapper_class: '' - element_default_classes: true - empty: '' - hide_empty: false - empty_zero: false - hide_alter_empty: true - machine_name: false - type: - id: type - table: watchdog - field: type - relationship: none - group_type: group - admin_label: '' - plugin_id: standard - label: Type - exclude: false - alter: - alter_text: false - text: '' - make_link: false - path: '' - absolute: false - external: false - replace_spaces: false - path_case: none - trim_whitespace: false - alt: '' - rel: '' - link_class: '' - prefix: '' - suffix: '' - target: '' - nl2br: false - max_length: 0 - word_boundary: true - ellipsis: true - more_link: false - more_link_text: '' - more_link_path: '' - strip_tags: false - trim: false - preserve_tags: '' - html: false - element_type: '' - element_class: '' - element_label_type: '' - element_label_class: '' - element_label_colon: true - element_wrapper_type: '' - element_wrapper_class: '' - element_default_classes: true - empty: '' - hide_empty: false - empty_zero: false - hide_alter_empty: true - timestamp: - id: timestamp - table: watchdog - field: timestamp - relationship: none - group_type: group - admin_label: '' - plugin_id: date - label: Date - exclude: false - alter: - alter_text: false - text: '' - make_link: false - path: '' - absolute: false - external: false - replace_spaces: false - path_case: none - trim_whitespace: false - alt: '' - rel: '' - link_class: '' - prefix: '' - suffix: '' - target: '' - nl2br: false - max_length: 0 - word_boundary: true - ellipsis: true - more_link: false - more_link_text: '' - more_link_path: '' - strip_tags: false - trim: false - preserve_tags: '' - html: false - element_type: '' - element_class: '' - element_label_type: '' - element_label_class: '' - element_label_colon: true - element_wrapper_type: '' - element_wrapper_class: '' - element_default_classes: true - empty: '' - hide_empty: false - empty_zero: false - hide_alter_empty: true - date_format: short - custom_date_format: '' - timezone: '' - message: - id: message - table: watchdog - field: message - relationship: none - group_type: group - admin_label: '' - plugin_id: dblog_message - label: Message - exclude: false - alter: - alter_text: false - text: '' - make_link: true - path: 'admin/reports/dblog/event/{{ wid }}' - absolute: false - external: false - replace_spaces: false - path_case: none - trim_whitespace: false - alt: '{{ message }}' - rel: '' - link_class: '' - prefix: '' - suffix: '' - target: '' - nl2br: false - max_length: 56 - word_boundary: true - ellipsis: true - more_link: false - more_link_text: '' - more_link_path: '' - strip_tags: true - trim: true - preserve_tags: '' - html: true - element_type: '' - element_class: '' - element_label_type: '' - element_label_class: '' - element_label_colon: true - element_wrapper_type: '' - element_wrapper_class: '' - element_default_classes: true - empty: '' - hide_empty: false - empty_zero: false - hide_alter_empty: true - replace_variables: true - name: - id: name - table: users_field_data - field: name - relationship: uid - group_type: group - admin_label: '' - entity_type: user - entity_field: name - plugin_id: field - label: User - exclude: false - alter: - alter_text: false - text: '' - make_link: false - path: '' - absolute: false - external: false - replace_spaces: false - path_case: none - trim_whitespace: false - alt: '' - rel: '' - link_class: '' - prefix: '' - suffix: '' - target: '' - nl2br: false - max_length: 0 - word_boundary: true - ellipsis: true - more_link: false - more_link_text: '' - more_link_path: '' - strip_tags: false - trim: false - preserve_tags: '' - html: false - element_type: '' - element_class: '' - element_label_type: '' - element_label_class: '' - element_label_colon: true - element_wrapper_type: '' - element_wrapper_class: '' - element_default_classes: true - empty: '' - hide_empty: false - empty_zero: false - hide_alter_empty: true - click_sort_column: value - type: user_name - settings: - link_to_entity: true - group_column: value - group_columns: { } - group_rows: true - delta_limit: 0 - delta_offset: 0 - delta_reversed: false - delta_first_last: false - multi_type: separator - separator: ', ' - field_api_classes: false - link: - id: link - table: watchdog - field: link - relationship: none - group_type: group - admin_label: '' - plugin_id: dblog_operations - label: Operations - exclude: false - alter: - alter_text: false - text: '' - make_link: false - path: '' - absolute: false - external: false - replace_spaces: false - path_case: none - trim_whitespace: false - alt: '' - rel: '' - link_class: '' - prefix: '' - suffix: '' - target: '' - nl2br: false - max_length: 0 - word_boundary: true - ellipsis: true - more_link: false - more_link_text: '' - more_link_path: '' - strip_tags: false - trim: false - preserve_tags: '' - html: false - element_type: '' - element_class: '' - element_label_type: '' - element_label_class: '' - element_label_colon: true - element_wrapper_type: '' - element_wrapper_class: '' - element_default_classes: true - empty: '' - hide_empty: false - empty_zero: false - hide_alter_empty: true - pager: - type: mini - options: - offset: 0 - items_per_page: 50 - total_pages: null - id: 0 - tags: - next: ›› - previous: ‹‹ - expose: - items_per_page: false - items_per_page_label: 'Items per page' - items_per_page_options: '5, 10, 25, 50' - items_per_page_options_all: false - items_per_page_options_all_label: '- All -' - offset: false - offset_label: Offset - exposed_form: - type: basic - options: - submit_button: Filter - reset_button: true - reset_button_label: Reset - exposed_sorts_label: 'Sort by' - expose_sort_order: false - sort_asc_label: Asc - sort_desc_label: Desc - access: - type: perm - options: - perm: 'access site reports' - cache: - type: none - options: { } - empty: - area: - id: area_text_custom - table: views - field: area_text_custom - relationship: none - group_type: group - admin_label: 'No log messages available.' - plugin_id: text_custom - empty: true - content: 'No log messages available.' - tokenize: false - sorts: - wid: - id: wid - table: watchdog - field: wid - relationship: none - group_type: group - admin_label: '' - plugin_id: standard - order: DESC - expose: - label: '' - field_identifier: wid - exposed: false - arguments: { } - filters: - type: - id: type - table: watchdog - field: type - relationship: none - group_type: group - admin_label: '' - plugin_id: dblog_types - operator: in - value: { } - group: 1 - exposed: true - expose: - operator_id: type_op - label: Type - description: '' - use_operator: false - operator: type_op - operator_limit_selection: false - operator_list: { } - identifier: type - required: false - remember: false - multiple: true - remember_roles: - authenticated: authenticated - anonymous: '0' - administrator: '0' - reduce: false - is_grouped: false - group_info: - label: '' - description: '' - identifier: '' - optional: true - widget: select - multiple: false - remember: false - default_group: All - default_group_multiple: { } - group_items: { } - severity: - id: severity - table: watchdog - field: severity - relationship: none - group_type: group - admin_label: '' - plugin_id: in_operator - operator: in - value: { } - group: 1 - exposed: true - expose: - operator_id: severity_op - label: Severity - description: '' - use_operator: false - operator: severity_op - operator_limit_selection: false - operator_list: { } - identifier: severity - required: false - remember: false - multiple: true - remember_roles: - authenticated: authenticated - anonymous: '0' - administrator: '0' - reduce: false - is_grouped: false - group_info: - label: '' - description: '' - identifier: '' - optional: true - widget: select - multiple: false - remember: false - default_group: All - default_group_multiple: { } - group_items: { } - filter_groups: - operator: AND - groups: - 1: AND - style: - type: table - options: - grouping: { } - row_class: '{{ type }} {{ severity }}' - default_row_class: true - columns: - nothing: nothing - wid: wid - severity: severity - type: type - timestamp: timestamp - message: message - name: name - link: link - default: wid - info: - nothing: - align: '' - separator: '' - empty_column: false - responsive: priority-medium - wid: - sortable: false - default_sort_order: desc - align: '' - separator: '' - empty_column: false - responsive: priority-low - severity: - sortable: false - default_sort_order: asc - align: '' - separator: '' - empty_column: false - responsive: priority-low - type: - sortable: true - default_sort_order: asc - align: '' - separator: '' - empty_column: false - responsive: priority-medium - timestamp: - sortable: true - default_sort_order: desc - align: '' - separator: '' - empty_column: false - responsive: priority-low - message: - sortable: false - default_sort_order: asc - align: '' - separator: '' - empty_column: false - responsive: '' - name: - sortable: true - default_sort_order: asc - align: '' - separator: '' - empty_column: false - responsive: priority-medium - link: - align: '' - separator: '' - empty_column: false - responsive: priority-low - override: true - sticky: false - summary: '' - empty_table: false - caption: '' - description: '' - row: - type: fields - query: - type: views_query - options: - query_comment: '' - disable_sql_rewrite: false - distinct: false - replica: false - query_tags: { } - relationships: - uid: - id: uid - table: watchdog - field: uid - relationship: none - group_type: group - admin_label: User - plugin_id: standard - required: false - css_class: admin-dblog - header: { } - footer: { } - display_extenders: { } - cache_metadata: - max-age: -1 - contexts: - - 'languages:language_content' - - 'languages:language_interface' - - url - - url.query_args - - user.permissions - tags: { } - page: - id: page - display_title: Page - display_plugin: page - position: 1 - display_options: - display_extenders: { } - path: admin/reports/dblog - cache_metadata: - max-age: -1 - contexts: - - 'languages:language_content' - - 'languages:language_interface' - - url - - url.query_args - - user.permissions - tags: { } diff --git a/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module b/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module index f3dd07c..0712df5 100644 --- a/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module +++ b/html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module @@ -29,6 +29,22 @@ function ocha_ai_summarize_node_insert(EntityInterface $entity) { * Implements hook_ENTITY_TYPE_update(). */ function ocha_ai_summarize_node_update(EntityInterface $entity) { + switch ($entity->bundle()) { + case 'summary': + ocha_ai_summarize_node_update_summary($entity); + break; + + case 'action_points': + ocha_ai_summarize_node_update_action_points($entity); + break; + + } +} + +/** + * Implements hook_ENTITY_TYPE_update(). + */ +function ocha_ai_summarize_node_update_summary(EntityInterface $entity) { if ($entity->bundle() !== 'summary') { return; } @@ -87,6 +103,67 @@ function ocha_ai_summarize_node_update(EntityInterface $entity) { $queue->createItem($item); } +/** + * Implements hook_ENTITY_TYPE_update(). + */ +function ocha_ai_summarize_node_update_action_points(EntityInterface $entity) { + if ($entity->bundle() !== 'action_points') { + return; + } + + /** @var \Drupal\content_moderation\ModerationInformationInterface $moderation_info */ + $moderation_info = Drupal::service('content_moderation.moderation_information'); + + // Check to see if the content is moderated or not. + $is_moderated = $moderation_info->isModeratedEntity($entity); + + if (!$is_moderated) { + return; + } + + $current_state = $entity->moderation_state->value; + $automatic_mode = $entity->field_automatic_mode->value; + + if (!$current_state) { + return; + } + + $queue_name = ''; + switch ($current_state) { + case 'document_uploaded': + if ($automatic_mode) { + $entity->set('moderation_state', 'extract_text'); + $entity->save(); + } + break; + + case 'extract_text': + $queue_name = 'ocha_ai_summarize_extract_text'; + break; + + case 'text_extracted': + if ($automatic_mode) { + $entity->set('moderation_state', 'action_points'); + $entity->save(); + } + break; + + case 'action_points': + $queue_name = 'ocha_ai_summarize_action_points'; + break; + } + + if (empty($queue_name)) { + return; + } + + $queue = \Drupal::service('queue')->get($queue_name); + $item = new \stdClass(); + $item->nid = $entity->id(); + $item->brain = $entity->get('field_ai_brain')->value; + $queue->createItem($item); +} + /** * Implements hook_form_FORM_ID_alter(). */ @@ -154,10 +231,91 @@ function ocha_ai_summarize_form_node_summary_alter(&$form, FormStateInterface $f } } +/** + * Implements hook_form_FORM_ID_alter(). + */ +function ocha_ai_summarize_form_node_action_points_form_alter(&$form, FormStateInterface $form_state, $form_id) { + ocha_ai_summarize_form_node_action_points_alter($form, $form_state, $form_id); +} + +/** + * Implements hook_form_FORM_ID_alter(). + */ +function ocha_ai_summarize_form_node_action_points_edit_form_alter(&$form, FormStateInterface $form_state, $form_id) { + ocha_ai_summarize_form_node_action_points_alter($form, $form_state, $form_id); +} + +/** + * Alter action_points form based on state. + */ +function ocha_ai_summarize_form_node_action_points_alter(&$form, FormStateInterface $form_state, $form_id) { + if ($form_id === 'node_action_points_form') { + $form['field_document_text']['#access'] = FALSE; + $form['field_action_points']['#access'] = FALSE; + $form['moderation_state']['#access'] = FALSE; + $form['moderation_state']['widget'][0]['state']['#default_value'] = 'document_uploaded'; + return; + } + + $node = $form_state->getFormObject()->getEntity(); + $current_state = $node->moderation_state->value; + if (!$current_state) { + return; + } + + switch ($current_state) { + case 'draft': + case 'document_uploaded': + case 'extract_text': + $form['field_document_text']['#access'] = FALSE; + $form['field_action_points']['#access'] = FALSE; + $form['moderation_state']['#access'] = FALSE; + break; + + case 'text_extracted': + $form['field_action_points']['#access'] = FALSE; + break; + + case 'action_points': + $form['field_document_text']['#disabled'] = TRUE; + unset($form['field_document_text']['widget'][$form['field_document_text']['widget']['#max_delta']]); + unset($form['field_document_text']['widget']['add_more']); + + $form['field_action_points']['#access'] = FALSE; + $form['moderation_state']['#access'] = FALSE; + break; + + case 'action_points_created': + $form['field_document_text']['#disabled'] = TRUE; + unset($form['field_document_text']['widget'][$form['field_document_text']['widget']['#max_delta']]); + unset($form['field_document_text']['widget']['add_more']); + + $form['field_action_points']['#disabled'] = TRUE; + break; + + } +} + /** * Implements hook_ENTITY_TYPE_view(). */ function ocha_ai_summarize_node_view(array &$build, EntityInterface $entity, EntityViewDisplayInterface $display, $view_mode) { + switch ($entity->bundle()) { + case 'summary': + ocha_ai_summarize_node_view_summary($build, $entity, $display, $view_mode); + break; + + case 'action_points': + ocha_ai_summarize_node_view_action_points($build, $entity, $display, $view_mode); + break; + + } +} + +/** + * Implements hook_ENTITY_TYPE_view(). + */ +function ocha_ai_summarize_node_view_summary(array &$build, EntityInterface $entity, EntityViewDisplayInterface $display, $view_mode) { if ($entity->bundle() !== 'summary') { return; } @@ -236,6 +394,88 @@ function ocha_ai_summarize_node_view(array &$build, EntityInterface $entity, Ent ]; } +/** + * Implements hook_ENTITY_TYPE_view(). + */ +function ocha_ai_summarize_node_view_action_points(array &$build, EntityInterface $entity, EntityViewDisplayInterface $display, $view_mode) { + if ($entity->bundle() !== 'action_points') { + return; + } + + if ($entity->isPublished()) { + return; + } + + $content_moderation_state = ContentModerationState::loadFromModeratedEntity($entity); + if (!$content_moderation_state) { + $active = 'document_uploaded'; + } + else { + $active = $content_moderation_state->get('moderation_state')->value; + } + + $states = [ + 'document_uploaded' => [ + '#markup' => 'Document uploaded', + '#wrapper_attributes' => [ + 'class' => [], + ], + ], + 'extract_text' => [ + '#markup' => 'Extract text', + '#wrapper_attributes' => [ + 'class' => [], + ], + ], + 'text_extracted' => [ + '#markup' => 'Text extracted', + '#wrapper_attributes' => [ + 'class' => [], + ], + ], + 'action_points' => [ + '#markup' => 'Get action points', + '#wrapper_attributes' => [ + 'class' => [], + ], + ], + 'action_points_created' => [ + '#markup' => 'Action points created', + '#wrapper_attributes' => [ + 'class' => [], + ], + ], + ]; + + $future = FALSE; + foreach ($states as $key => &$state) { + if ($key == $active) { + $state['#wrapper_attributes']['class'] = ['active']; + $future = TRUE; + } + else { + if (!$future) { + $state['#wrapper_attributes']['class'] = ['done']; + } + else { + $state['#wrapper_attributes']['class'] = ['to-do']; + } + } + } + + $build['workflow'] = [ + '#theme' => 'item_list', + '#attributes' => [ + 'class' => [ + 'workflow-steps', + ], + ], + '#list_type' => 'ol', + '#items' => array_values($states), + '#weight' => -100, + ]; +} + /** * Get number of pages in a PDF file. */ diff --git a/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeActionPoints.php b/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeActionPoints.php new file mode 100644 index 0000000..dfdb4c6 --- /dev/null +++ b/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeActionPoints.php @@ -0,0 +1,211 @@ +entityTypeManager = $entity_type_manager; + } + + /** + * {@inheritdoc} + */ + public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) { + return new static( + $configuration, + $plugin_id, + $plugin_definition, + $container->get('entity_type.manager'), + ); + } + + /** + * {@inheritdoc} + */ + public function processItem($data) { + $bot = $data->brain ?? 'openai'; + $nid = $data->nid; + + if (empty($nid)) { + return; + } + + /** @var \Drupal\node\Entity\Node $node */ + $node = $this->entityTypeManager->getStorage('node')->load($nid); + + if (!$node || $node->bundle() !== 'action_points') { + return; + } + + $content_moderation_state = ContentModerationState::loadFromModeratedEntity($node); + + if (!$content_moderation_state) { + return; + } + + if ($content_moderation_state->get('moderation_state')->value !== 'action_points') { + return; + } + + if ($node->field_document_text->isEmpty()) { + return; + } + + $prompt = "Extract the action points from the following meeting minutes"; + + // Claude can handle all text at once. + if ($bot == 'claude') { + $text = ''; + foreach ($node->field_document_text as $document_text) { + $text = $document_text->value . "\n"; + } + + $action_points = $this->sendToClaudeAi("$prompt:\n\n" . $text); + } + else { + // Summarize each page. + $results = []; + foreach ($node->field_document_text as $document_text) { + $text = $document_text->value; + + if (strlen($text) < 100) { + $results[] = $text; + continue; + } + + switch ($bot) { + case 'openai': + $results[] = $this->sendToOpenAi("Summerize the following text in 3 paragraphs:\n\n" . $text); + break; + + case 'azure_trained': + $results[] = $this->sendToAzureAi("Summerize the following text in 3 paragraphs:\n\n" . $text); + break; + + case 'bedrock': + $results[] = $this->sendToBedRock("Summerize the following text in 3 paragraphs:\n\n" . $text); + break; + + } + } + + // Get the action points. + $text = ''; + foreach ($results as $row) { + $text .= $row; + $text .= "\n"; + } + + switch ($bot) { + case 'openai': + $action_points = $this->sendToOpenAi("$prompt:\n\n" . $text); + break; + + case 'azure_trained': + $action_points = $this->sendToAzureAi("$prompt:\n\n" . $text); + break; + + case 'bedrock': + $action_points = $this->sendToBedRock("$prompt:\n\n" . $text); + break; + } + } + + $node->set('field_action_points', [ + 'value' => $action_points, + 'format' => 'text_editor_simple', + ]); + $node->set('moderation_state', 'action_points_created'); + $node->save(); + } + + /** + * Send query to OpenAi. + */ + protected function sendToOpenAi($text) : string { + $result = ocha_ai_summarize_http_call_openai( + [ + 'model' => 'gpt-3.5-turbo-16k', + 'messages' => [ + [ + 'role' => 'user', + 'content' => $text, + ], + ], + 'temperature' => .2, + 'max_tokens' => 600, + ], + ); + + return $result['choices'][0]['message']['content'] ?? ''; + } + + /** + * Send query to Azure AI. + */ + protected function sendToAzureAi($text) : string { + $result = ocha_ai_summarize_http_call_azure( + [ + 'messages' => [ + [ + 'role' => 'system', + 'content' => 'You are an AI assistant that extracts action points out of meeting minutes.', + ], + [ + 'role' => 'user', + 'content' => $text, + ], + ], + ], + ); + + return $result['choices'][0]['message']['content'] ?? ''; + } + + /** + * Send query to BedRock. + */ + protected function sendToBedRock($text) : string { + $result = ocha_ai_summarize_http_call_bedrock($text); + return $result['results'][0]['outputText'] ?? ''; + } + + /** + * Send query to Claude AI. + */ + protected function sendToClaudeAi($text) : string { + $prompt = "\n\nHuman: $text\n\nAssistant:"; + + $result = ocha_ai_summarize_http_call_claude($prompt); + + return $result['completion'] ?? ''; + } + +} diff --git a/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeExtractText.php b/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeExtractText.php index 3799507..43fc39b 100644 --- a/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeExtractText.php +++ b/html/modules/custom/ocha_ai_summarize/src/Plugin/QueueWorker/OchaAiSummarizeExtractText.php @@ -69,7 +69,7 @@ public function processItem($data) { /** @var \Drupal\node\Entity\Node $node */ $node = $this->entityTypeManager->getStorage('node')->load($nid); - if (!$node || $node->bundle() !== 'summary') { + if (!$node) { return; } From f125614c8f04f3f61aae7962e5955b07110b50d7 Mon Sep 17 00:00:00 2001 From: Peter Lieverdink Date: Fri, 13 Oct 2023 16:33:46 +1100 Subject: [PATCH 16/18] chore: No need for the custom fastcgi handler anymore; use PHP 8.2. Refs: OPS-9682 --- docker/Dockerfile | 5 +-- .../etc/nginx/apps/drupal/fastcgi_drupal.conf | 44 ------------------- 2 files changed, 2 insertions(+), 47 deletions(-) delete mode 100644 docker/etc/nginx/apps/drupal/fastcgi_drupal.conf diff --git a/docker/Dockerfile b/docker/Dockerfile index 4311423..9262716 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,5 +1,5 @@ # Build the code. -FROM public.ecr.aws/unocha/php-k8s:8.1-stable as builder +FROM public.ecr.aws/unocha/php-k8s:8.2-stable as builder ARG BRANCH_ENVIRONMENT @@ -22,7 +22,7 @@ RUN cp -a docker/settings.php docker/services.yml docker/memcache.services.yml h ################################################################################ # Generate the image. -FROM public.ecr.aws/unocha/php-k8s:8.1-stable +FROM public.ecr.aws/unocha/php-k8s:8.2-stable ARG VCS_REF ARG VCS_URL @@ -55,4 +55,3 @@ COPY --from=builder /srv/www/composer.patches.json /srv/www/composer.patches.jso COPY --from=builder /srv/www/composer.lock /srv/www/composer.lock COPY --from=builder /srv/www/PATCHES /srv/www/PATCHES COPY --from=builder /srv/www/scripts /srv/www/scripts -COPY --from=builder /srv/www/docker/etc/nginx/apps/drupal/fastcgi_drupal.conf /etc/nginx/apps/drupal/fastcgi_drupal.conf diff --git a/docker/etc/nginx/apps/drupal/fastcgi_drupal.conf b/docker/etc/nginx/apps/drupal/fastcgi_drupal.conf deleted file mode 100644 index db38bb3..0000000 --- a/docker/etc/nginx/apps/drupal/fastcgi_drupal.conf +++ /dev/null @@ -1,44 +0,0 @@ -#-*- mode: nginx; mode: flyspell-prog; ispell-local-dictionary: "american" -*- -### fastcgi configuration for serving private files. -## 1. Parameters. -fastcgi_param QUERY_STRING $args; -fastcgi_param REQUEST_METHOD $request_method; -fastcgi_param CONTENT_TYPE $content_type; -fastcgi_param CONTENT_LENGTH $content_length; - -fastcgi_param SCRIPT_NAME /index.php; -fastcgi_param REQUEST_URI $request_uri; -fastcgi_param DOCUMENT_URI $document_uri; -fastcgi_param DOCUMENT_ROOT $document_root; -fastcgi_param SERVER_PROTOCOL $server_protocol; - -fastcgi_param GATEWAY_INTERFACE CGI/1.1; -fastcgi_param SERVER_SOFTWARE nginx/$nginx_version; - -fastcgi_param REMOTE_ADDR $remote_addr; -fastcgi_param REMOTE_PORT $remote_port; -fastcgi_param SERVER_ADDR $server_addr; -fastcgi_param SERVER_PORT $server_port; -fastcgi_param SERVER_NAME $server_name; -## PHP only, required if PHP was built with --enable-force-cgi-redirect -fastcgi_param REDIRECT_STATUS 200; -fastcgi_param SCRIPT_FILENAME $document_root/index.php; -## HTTPS 'on' parameter. This requires Nginx version 1.1.11 or -## later. The if_not_empty flag was introduced in 1.1.11. See: -## http://nginx.org/en/CHANGES. If using a version that doesn't -## support this comment out the line below. -fastcgi_param HTTPS $fastcgi_https if_not_empty; -## For Nginx versions below 1.1.11 uncomment the line below after commenting out the above. -#fastcgi_param HTTPS $fastcgi_https; - -## 2. Nginx FCGI specific directives. -fastcgi_buffers 256 4k; -fastcgi_intercept_errors on; -## Allow 4 hrs - pass timeout responsibility to upstream. -fastcgi_read_timeout 14400; -fastcgi_index index.php; -## Hide the Drupal cache headers. -fastcgi_hide_header 'X-Drupal-Cache'; -fastcgi_hide_header 'X-Drupal-Dynamic-Cache'; -## Hide the Drupal header X-Generator. -fastcgi_hide_header 'X-Generator'; From fa496a4c6d0b811bf3fe333178e6cc9e839795fb Mon Sep 17 00:00:00 2001 From: Peter Lieverdink Date: Fri, 13 Oct 2023 16:36:21 +1100 Subject: [PATCH 17/18] chore: Ensure other AI systems do not use this one as source. Refs: OPS-9682 --- assets/docker-build-image.yml | 31 ------------------------------- assets/robots.txt.append | 13 +++++++++++++ 2 files changed, 13 insertions(+), 31 deletions(-) delete mode 100644 assets/docker-build-image.yml diff --git a/assets/docker-build-image.yml b/assets/docker-build-image.yml deleted file mode 100644 index 205391e..0000000 --- a/assets/docker-build-image.yml +++ /dev/null @@ -1,31 +0,0 @@ -name: Build docker image - -on: - push: - branches: - - develop - - 'feature/**' - - main - release: - types: [published] - -jobs: - build: - runs-on: ubuntu-latest - steps: - - - name: Build - id: build-action - uses: UN-OCHA/actions/drupal-docker-build@main - with: - aws_access_key_id: ${{ secrets.ECR_AWS_ACCESS_KEY_ID }} - aws_secret_access_key: ${{ secrets.ECR_AWS_ACCESS_KEY_ID }} - docker_registry_url: public.ecr.aws - docker_registry_path: /unocha/ - docker_image: ai-summarize-site - docker_username: ${{ secrets.ECR_AWS_ACCESS_KEY_ID }} - docker_password: ${{ secrets.ECR_AWS_SECRET_ACCESS_KEY }} - ecr_github_token: ${{ secrets.ECR_GITHUB_TOKEN }} - ecr_jenkins_token: ${{ secrets.JENKINS_ECR_TOKEN }} - slack_bot_token: ${{ secrets.SLACK_BOT_TOKEN }} - slack_channel_name: ${{ secrets.SLACK_CHANNEL }} diff --git a/assets/robots.txt.append b/assets/robots.txt.append index e40cd97..02f27d5 100644 --- a/assets/robots.txt.append +++ b/assets/robots.txt.append @@ -1 +1,14 @@ # Sitemap + +# Other people's AI. +User-agent: GPTBot +Disallow: / + +User-agent: ChatGPT-User +Disallow: / + +User-agent: Google-Extended +Disallow: / + +User-agent: CCBot +Disallow: / From d55461d65762c09df628b47ea81971f1dfb3f4ad Mon Sep 17 00:00:00 2001 From: Peter Lieverdink Date: Fri, 13 Oct 2023 16:39:05 +1100 Subject: [PATCH 18/18] chore: Update the workfloes to use the correct vars. Refs: OPS-9682 --- .github/workflows/composer-update.yml | 5 ++--- .github/workflows/docker-build-image.yml | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/composer-update.yml b/.github/workflows/composer-update.yml index c47ffee..9b1cdb5 100644 --- a/.github/workflows/composer-update.yml +++ b/.github/workflows/composer-update.yml @@ -16,7 +16,6 @@ jobs: github_access_token: ${{ secrets.PAT }} patch_branch: 'develop' patch_packages: 'drupal/*' - patch_maintainers: ${{ secrets.DRUPAL_MAINTAINERS }} + patch_maintainers: ${{ vars.DRUPAL_MAINTAINERS }} slack_bot_token: ${{ secrets.SLACK_BOT_TOKEN }} - slack_channel_name: ${{ secrets.SLACK_CHANNEL }} - flowdock_token: ${{ secrets.FLOWDOCK_TOKEN }} + slack_channel_name: ${{ vars.SLACK_CHANNEL }} diff --git a/.github/workflows/docker-build-image.yml b/.github/workflows/docker-build-image.yml index 205391e..23630a9 100644 --- a/.github/workflows/docker-build-image.yml +++ b/.github/workflows/docker-build-image.yml @@ -28,4 +28,4 @@ jobs: ecr_github_token: ${{ secrets.ECR_GITHUB_TOKEN }} ecr_jenkins_token: ${{ secrets.JENKINS_ECR_TOKEN }} slack_bot_token: ${{ secrets.SLACK_BOT_TOKEN }} - slack_channel_name: ${{ secrets.SLACK_CHANNEL }} + slack_channel_name: ${{ vars.SLACK_CHANNEL }}