From dd840930f61abcc15fafc1db6fc6cf49c984d58d Mon Sep 17 00:00:00 2001 From: Niklaus Johner Date: Wed, 17 Feb 2021 16:54:45 +0100 Subject: [PATCH 1/8] Mark permissions as ignored for merge operations. --- .../scripts/repository_migration.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/opengever/maintenance/scripts/repository_migration.py b/opengever/maintenance/scripts/repository_migration.py index ab9f5afe..7cde9f7a 100644 --- a/opengever/maintenance/scripts/repository_migration.py +++ b/opengever/maintenance/scripts/repository_migration.py @@ -515,7 +515,8 @@ def operation_by_old_refnum(self, reference_number): def validate_operation(self, operation): """Make sure that operation satisfies all necessary conditions and add - is_valid, repository_depth_violated and leaf_node_violated to it. + is_valid, repository_depth_violated and leaf_node_violated and + permissions_disregarded to it. """ operation['is_valid'] = True @@ -602,6 +603,19 @@ def validate_operation(self, operation): operation['is_valid'] = False self.new_positions.add(new_position) + # if position is being merged, then permissions set in that row will + # be lost. Best would be to compare the permissions of that row with + # the ones it gets merged into. Instead we simply log and write it + # in the analysis excel. The user can make sure this is correct himself. + operation['permissions_disregarded'] = False + if operation['merge_into']: + permissions = operation['permissions'] + if any(permissions.values()): + logger.info( + "Permissions disregarded: this position gets merged" + " {}".format(operation)) + operation['permissions_disregarded'] = True + def get_new_title(self, new_repo_pos, old_repo_pos): """Returns the new title or none if no rename is necessary.""" if new_repo_pos.title != old_repo_pos.title: @@ -825,6 +839,7 @@ def insert_label_row(self, sheet): 'Ist ungultig', # permission + 'Ignorierte Bewilligungen', 'Bewilligungen', ] @@ -851,6 +866,7 @@ def insert_value_rows(self, sheet, rows): 'x' if data['repository_depth_violated'] else '', 'x' if data['leaf_node_violated'] else '', 'x' if not data['is_valid'] else '', + 'x' if data['permissions_disregarded'] else '', json.dumps(data['permissions']), ] From eac637f9a0ff9b189f5d0b813fc6bdbd1a1cfe95 Mon Sep 17 00:00:00 2001 From: Niklaus Johner Date: Thu, 18 Feb 2021 12:12:22 +0100 Subject: [PATCH 2/8] Log warning if previously set local roles will get reset during migration. --- .../scripts/repository_migration.py | 39 ++++++++++++++++--- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/opengever/maintenance/scripts/repository_migration.py b/opengever/maintenance/scripts/repository_migration.py index 7cde9f7a..57acbf70 100644 --- a/opengever/maintenance/scripts/repository_migration.py +++ b/opengever/maintenance/scripts/repository_migration.py @@ -34,6 +34,8 @@ from opengever.base.interfaces import IReferenceNumberFormatter from opengever.base.interfaces import IReferenceNumberPrefix from opengever.base.monkey.patching import MonkeyPatch +from opengever.base.role_assignments import ASSIGNMENT_VIA_SHARING +from opengever.base.role_assignments import RoleAssignmentManager from opengever.bundle.console import add_guid_index from opengever.bundle.ldap import DisabledLDAP from opengever.bundle.sections.bundlesource import BUNDLE_PATH_KEY @@ -79,6 +81,8 @@ MIGRATIOM_TIMESTAMP = time.strftime('%d%m%Y-%H%M%S') tasks_to_sync = set() +managed_roles_shortnames = ['read', 'add', 'edit', 'close', 'reactivate', 'manage_dossiers'] + def log_progress(i, tot, step=100): if i % step == 0: @@ -452,8 +456,6 @@ def analyse(self): new_position_parent_guid = None new_position_guid = None - permissions = None - needs_creation = not bool(old_repo_pos.position) need_number_change, need_move, need_merge = self.needs_number_change_move_or_merge(new_repo_pos, old_repo_pos) @@ -466,7 +468,8 @@ def analyse(self): if needs_creation: new_position_parent_position, new_position_parent_guid = self.get_parent_of_new_position(new_repo_pos) new_position_guid = uuid4().hex[:8] - permissions = self.extract_permissions(row) + + permissions = self.extract_permissions(row) operation = { 'uid': self.get_uuid_for_position(old_repo_pos.position), @@ -607,14 +610,38 @@ def validate_operation(self, operation): # be lost. Best would be to compare the permissions of that row with # the ones it gets merged into. Instead we simply log and write it # in the analysis excel. The user can make sure this is correct himself. + permissions = operation['permissions'] operation['permissions_disregarded'] = False + operation['local_roles_deleted'] = False if operation['merge_into']: - permissions = operation['permissions'] if any(permissions.values()): logger.info( "Permissions disregarded: this position gets merged" " {}".format(operation)) operation['permissions_disregarded'] = True + else: + # We also check that permissions are only set when inheritance is + # blocked and if local roles were defined on such positions before, + # we emit a warning as they will be lost during migration + has_local_roles = any(permissions[role_shortname] for role_shortname in managed_roles_shortnames) + inheritance_blocked = permissions['block_inheritance'] + if has_local_roles and not inheritance_blocked: + logger.warning( + "Invalid operation: setting local roles without blocking " + "inheritance. {}".format(operation)) + operation['is_valid'] = False + elif inheritance_blocked and not has_local_roles: + logger.warning( + "Invalid operation: blocking inheritance without setting " + "local roles. {}".format(operation)) + operation['is_valid'] = False + elif inheritance_blocked and has_local_roles: + obj = uuidToObject(operation['uid']) + if obj and RoleAssignmentManager(obj).get_assignments_by_cause(ASSIGNMENT_VIA_SHARING): + operation['local_roles_deleted'] = True + logger.warning( + "Sharing assignments for {} will be deleted and " + "replaced.".format(obj.absolute_url_path())) def get_new_title(self, new_repo_pos, old_repo_pos): """Returns the new title or none if no rename is necessary.""" @@ -794,7 +821,7 @@ def extract_permissions(self, row): if block == 'ja': permissions['block_inheritance'] = True - for key in ['read', 'add', 'edit', 'close', 'reactivate', 'manage_dossiers']: + for key in managed_roles_shortnames: groups = [group.strip() for group in getattr(row, key).split(',')] groups = [group for group in groups if group] @@ -840,6 +867,7 @@ def insert_label_row(self, sheet): # permission 'Ignorierte Bewilligungen', + 'Vorherige Lokalen Rollen entfernt' 'Bewilligungen', ] @@ -867,6 +895,7 @@ def insert_value_rows(self, sheet, rows): 'x' if data['leaf_node_violated'] else '', 'x' if not data['is_valid'] else '', 'x' if data['permissions_disregarded'] else '', + 'x' if data['local_roles_deleted'] else '', json.dumps(data['permissions']), ] From a14b6f3a478df56c639ad9cd7dd2e383b7b32202 Mon Sep 17 00:00:00 2001 From: Niklaus Johner Date: Thu, 18 Feb 2021 12:13:46 +0100 Subject: [PATCH 3/8] Only write permissions to excel file when not empty. --- opengever/maintenance/scripts/repository_migration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opengever/maintenance/scripts/repository_migration.py b/opengever/maintenance/scripts/repository_migration.py index 57acbf70..e7df3906 100644 --- a/opengever/maintenance/scripts/repository_migration.py +++ b/opengever/maintenance/scripts/repository_migration.py @@ -896,7 +896,7 @@ def insert_value_rows(self, sheet, rows): 'x' if not data['is_valid'] else '', 'x' if data['permissions_disregarded'] else '', 'x' if data['local_roles_deleted'] else '', - json.dumps(data['permissions']), + json.dumps(data['permissions']) if any(data['permissions'].values()) else '', ] for column, attr in enumerate(values, 1): From 36846c612c265b6bfe0c62fcea80dbdf4565aab9 Mon Sep 17 00:00:00 2001 From: Niklaus Johner Date: Thu, 18 Feb 2021 12:20:06 +0100 Subject: [PATCH 4/8] Improve log readability. --- .../scripts/repository_migration.py | 73 ++++++++++--------- 1 file changed, 37 insertions(+), 36 deletions(-) diff --git a/opengever/maintenance/scripts/repository_migration.py b/opengever/maintenance/scripts/repository_migration.py index e7df3906..66d5bcf5 100644 --- a/opengever/maintenance/scripts/repository_migration.py +++ b/opengever/maintenance/scripts/repository_migration.py @@ -444,7 +444,7 @@ def analyse(self): # Skip positions that should be deleted if not new_repo_pos.position: - logger.info("Skipping, we do not support deletion: {}".format(row)) + logger.info("\nSkipping, we do not support deletion: {}\n".format(row)) continue new_number = None @@ -501,8 +501,8 @@ def analyse(self): portal_type='opengever.repository.repositoryfolder'): refnum = IReferenceNumber(brain.getObject()).get_repository_number() if not self.operation_by_old_refnum(refnum): - logger.warning("Excel is incomplete. No operation defined for " - "position {}".format(brain.reference)) + logger.warning("\nExcel is incomplete. No operation defined for " + "position {}\n".format(brain.reference)) self.is_valid = False # Make sure that analysis is invalid if any operation was invalid @@ -525,8 +525,8 @@ def validate_operation(self, operation): # Each operation should either have a uid or a new_position_guid if not any((operation['new_position_guid'], operation['uid'])): - logger.warning("Invalid operation: needs new_position_guid " - "or uid. {}".format(operation)) + logger.warning("\nInvalid operation: needs new_position_guid " + "or uid. {}\n".format(operation)) operation['is_valid'] = False # Make sure that all UIDs are valid and that for existing UIDs, @@ -534,42 +534,42 @@ def validate_operation(self, operation): if operation['uid']: obj = uuidToObject(operation['uid']) if not obj: - logger.warning("Invalid operation: uid is not valid." - "or uid. {}".format(operation)) + logger.warning("\nInvalid operation: uid is not valid." + "or uid. {}\n".format(operation)) operation['is_valid'] = False else: old_repo_pos = operation['old_repo_pos'] if obj.title_de != old_repo_pos.title: - logger.warning("Invalid operation: incorrect title." - "{}".format(operation)) + logger.warning("\nInvalid operation: incorrect title." + "{}\n".format(operation)) operation['is_valid'] = False if obj.get_repository_number().replace('.', '') != old_repo_pos.position: - logger.warning("Invalid operation: incorrect position." - "{}".format(operation)) + logger.warning("\nInvalid operation: incorrect position." + "{}\n".format(operation)) operation['is_valid'] = False if (obj.description or old_repo_pos.description) and obj.description != old_repo_pos.description: - logger.warning("Invalid operation: incorrect description." - "{}".format(operation)) + logger.warning("\nInvalid operation: incorrect description." + "{}\n".format(operation)) operation['is_valid'] = False # Each operation should have new position if not operation['new_repo_pos'].position: - logger.warning("Invalid operation: needs new position. {}".format( + logger.warning("\nInvalid operation: needs new position. {}\n".format( operation)) operation['is_valid'] = False if all((operation['new_position_guid'], operation['uid'])): - logger.warning("Invalid operation: can define only one of " - "new_position_guid or uid. {}".format(operation)) + logger.warning("\nInvalid operation: can define only one of " + "new_position_guid or uid. {}\n".format(operation)) operation['is_valid'] = False # A move operation should have a new_parent_uid if operation['new_parent_position'] or operation['new_parent_uid']: if not operation['new_parent_uid']: logger.warning( - "Invalid operation: move operation must define " - "new_parent_uid. {}".format(operation)) + "\nInvalid operation: move operation must define " + "new_parent_uid. {}\n".format(operation)) operation['is_valid'] = False # Make sure that if a position is being created, its parent will be found @@ -579,8 +579,8 @@ def validate_operation(self, operation): if not parent: logger.warning( - "Invalid operation: could not find new parent for create " - "operation. {}".format(operation)) + "\nInvalid operation: could not find new parent for create " + "operation. {}\n".format(operation)) operation['is_valid'] = False self.check_repository_depth_violation(operation) @@ -591,8 +591,8 @@ def validate_operation(self, operation): if old_position: if old_position in self.positions: logger.warning( - "Invalid operation: position appears twice in excel." - " {}".format(operation)) + "\nInvalid operation: position appears twice in excel." + " {}\n".format(operation)) operation['is_valid'] = False self.positions.add(old_position) @@ -601,8 +601,8 @@ def validate_operation(self, operation): if new_position and not operation['merge_into']: if new_position in self.new_positions: logger.warning( - "Invalid operation: new position appears twice in excel." - " {}".format(operation)) + "\nInvalid operation: new position appears twice in excel." + " {}\n".format(operation)) operation['is_valid'] = False self.new_positions.add(new_position) @@ -616,8 +616,8 @@ def validate_operation(self, operation): if operation['merge_into']: if any(permissions.values()): logger.info( - "Permissions disregarded: this position gets merged" - " {}".format(operation)) + "\nPermissions disregarded: this position gets merged" + " {}\n".format(operation)) operation['permissions_disregarded'] = True else: # We also check that permissions are only set when inheritance is @@ -627,21 +627,21 @@ def validate_operation(self, operation): inheritance_blocked = permissions['block_inheritance'] if has_local_roles and not inheritance_blocked: logger.warning( - "Invalid operation: setting local roles without blocking " - "inheritance. {}".format(operation)) + "\nInvalid operation: setting local roles without blocking " + "inheritance. {}\n".format(operation)) operation['is_valid'] = False elif inheritance_blocked and not has_local_roles: logger.warning( - "Invalid operation: blocking inheritance without setting " - "local roles. {}".format(operation)) + "\nInvalid operation: blocking inheritance without setting " + "local roles. {}\n".format(operation)) operation['is_valid'] = False elif inheritance_blocked and has_local_roles: obj = uuidToObject(operation['uid']) if obj and RoleAssignmentManager(obj).get_assignments_by_cause(ASSIGNMENT_VIA_SHARING): operation['local_roles_deleted'] = True logger.warning( - "Sharing assignments for {} will be deleted and " - "replaced.".format(obj.absolute_url_path())) + "\nSharing assignments for {} will be deleted and " + "replaced.\n".format(obj.absolute_url_path())) def get_new_title(self, new_repo_pos, old_repo_pos): """Returns the new title or none if no rename is necessary.""" @@ -753,8 +753,8 @@ def check_repository_depth_violation(self, operation): new_repo_pos = operation['new_repo_pos'] if new_repo_pos.position and len(new_repo_pos.position) > max_depth: - logger.warning( - "Invalid operation: repository depth violated. {}".format(operation)) + logger.warning("\nInvalid operation: repository depth violated." + " {}\n".format(operation)) operation['is_valid'] = False operation['repository_depth_violated'] = True else: @@ -784,12 +784,13 @@ def check_leaf_node_principle_violation(self, operation): if not parent_repo: # Something is fishy, parent should either exist or be created operation['is_valid'] = False - logger.warning("Invalid operation: parent not found. {}".format(operation)) + logger.warning("\nInvalid operation: parent not found. {}\n".format(operation)) return if any([IDossierMarker.providedBy(item) for item in parent_repo.objectValues()]): operation['is_valid'] = False operation['leaf_node_violated'] = True - logger.warning("Invalid operation: leaf node principle violated. {}".format(operation)) + logger.warning("\nInvalid operation: leaf node principle violated." + " {}\n".format(operation)) def get_repository_reference_mapping(self): if not self._reference_repository_mapping: From 7853a9c23db555791ac10bc0ac71acf159906a3d Mon Sep 17 00:00:00 2001 From: Niklaus Johner Date: Fri, 19 Feb 2021 12:17:22 +0100 Subject: [PATCH 5/8] Also set permissions for already existing objects. --- .../scripts/repository_migration.py | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/opengever/maintenance/scripts/repository_migration.py b/opengever/maintenance/scripts/repository_migration.py index 66d5bcf5..fde6e071 100644 --- a/opengever/maintenance/scripts/repository_migration.py +++ b/opengever/maintenance/scripts/repository_migration.py @@ -22,6 +22,12 @@ tasks_to_sync = json.load(infile) TaskSyncer(tasks_to_sync)() transaction.commit() + +Notes: +- Permissions are only taken into account if both inheritance is blocked and some + local_roles are set +- permissions for positions that get merged are disregarded +- Setting new permissions will replace the existing sharing permissions. """ from Acquisition import aq_inner @@ -36,6 +42,8 @@ from opengever.base.monkey.patching import MonkeyPatch from opengever.base.role_assignments import ASSIGNMENT_VIA_SHARING from opengever.base.role_assignments import RoleAssignmentManager +from opengever.base.role_assignments import SharingRoleAssignment +from opengever.base.schemadump.config import ROLES_BY_SHORTNAME from opengever.bundle.console import add_guid_index from opengever.bundle.ldap import DisabledLDAP from opengever.bundle.sections.bundlesource import BUNDLE_PATH_KEY @@ -613,6 +621,7 @@ def validate_operation(self, operation): permissions = operation['permissions'] operation['permissions_disregarded'] = False operation['local_roles_deleted'] = False + operation['set_permissions'] = False if operation['merge_into']: if any(permissions.values()): logger.info( @@ -637,6 +646,10 @@ def validate_operation(self, operation): operation['is_valid'] = False elif inheritance_blocked and has_local_roles: obj = uuidToObject(operation['uid']) + if obj: + # newly created positions will have the local_roles set + # in the pipeline + operation['set_permissions'] = True if obj and RoleAssignmentManager(obj).get_assignments_by_cause(ASSIGNMENT_VIA_SHARING): operation['local_roles_deleted'] = True logger.warning( @@ -919,6 +932,7 @@ def check_preconditions(self): raise MigrationPreconditionsError("Some operations are invalid.") def run(self): + self.set_permissions(self.items_to_set_permissions()) self.create_repository_folders(self.items_to_create()) self.move_branches(self.items_to_move()) self.merge_branches(self.items_to_merge()) @@ -943,6 +957,9 @@ def items_to_adjust_number(self): def items_to_rename(self): return [item for item in self.operations_list if item['new_title']] + def items_to_set_permissions(self): + return [item for item in self.operations_list if item['set_permissions']] + def add_to_reindexing_queue(self, uid, idxs, with_children=False): self.to_reindex[uid].update(idxs) obj = uuidToObject(uid) @@ -1099,6 +1116,40 @@ def update_description(self, items): repo.description = new_description self.add_to_reindexing_queue(item['uid'], ('Description',)) + def set_permissions(self, items): + logger.info("\n\nUpdating permissions...\n") + n_tot = len(items) + for i, item in enumerate(items): + log_progress(i, n_tot, 5) + repo = uuidToObject(item['uid']) + self._set_permissions_on_object(repo, item['permissions']) + + def _set_permissions_on_object(self, obj, permissions): + """ We set the local roles and block inheritance if needed. + local_roles are only set if the inheritance is blocked. + Other conditions should have risen a validation error for the + excel. + """ + block_inheritance = permissions['block_inheritance'] + + roles_by_principals = defaultdict(list) + for role_shortname in managed_roles_shortnames: + role = ROLES_BY_SHORTNAME[role_shortname] + principals = permissions.get(role_shortname) + for principal in principals: + roles_by_principals[principal].append(role) + + if not (block_inheritance and roles_by_principals): + return + + obj.__ac_local_roles_block__ = block_inheritance + manager = RoleAssignmentManager(obj) + manager.storage.clear_by_cause(ASSIGNMENT_VIA_SHARING) + for principal, roles in roles_by_principals.items(): + assignment = SharingRoleAssignment(principal, roles) + RoleAssignmentManager(obj).add_or_update_assignment(assignment) + obj.reindexObjectSecurity() + def reindex(self): logger.info("\n\nReindexing...\n") n_tot = len(self.to_reindex) From 24d0caa592d154fb1ab600adac65a5c78ea9d7c8 Mon Sep 17 00:00:00 2001 From: Niklaus Johner Date: Fri, 19 Feb 2021 12:18:12 +0100 Subject: [PATCH 6/8] Skip docproperties update during migration. --- .../maintenance/scripts/repository_migration.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/opengever/maintenance/scripts/repository_migration.py b/opengever/maintenance/scripts/repository_migration.py index fde6e071..ba33ed1a 100644 --- a/opengever/maintenance/scripts/repository_migration.py +++ b/opengever/maintenance/scripts/repository_migration.py @@ -267,6 +267,20 @@ def sync_with(self, plone_task): self.patch_refs(Task, 'sync_with', sync_with) +class SkipDocPropsUpdate(MonkeyPatch): + """ No nead to update the docproperties, we anyway don't have the + blobs during the migration + """ + + def __call__(self): + from opengever.document import handlers + + def _update_docproperties(document, raise_on_error=False): + return + + self.patch_refs(handlers, '_update_docproperties', _update_docproperties) + + def cleanup_position(position): """Remove splitting dots - they're not usefull for comparison. This only works for grouped_by_three formatter. @@ -1352,6 +1366,7 @@ def main(): else: SkipTaskSyncWith()() PatchDisableLDAP()() + SkipDocPropsUpdate()() logger.info('\n\nstarting analysis...\n') analyser = RepositoryExcelAnalyser(mapping_path, options.output_directory) From 98c95016f057c51c8e4588776e6a0da7b0df19bf Mon Sep 17 00:00:00 2001 From: Niklaus Johner Date: Fri, 19 Feb 2021 12:24:34 +0100 Subject: [PATCH 7/8] Commit transaction to speed-up migration. As we anyway do the migration on a copy of the deployment, we can take the risk of committing during the migration to speed it up. --- .../scripts/repository_migration.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/opengever/maintenance/scripts/repository_migration.py b/opengever/maintenance/scripts/repository_migration.py index ba33ed1a..9644222a 100644 --- a/opengever/maintenance/scripts/repository_migration.py +++ b/opengever/maintenance/scripts/repository_migration.py @@ -934,8 +934,9 @@ def insert_value_rows(self, sheet, rows): class RepositoryMigrator(object): - def __init__(self, operations_list): + def __init__(self, operations_list, dry_run): self.operations_list = operations_list + self.dry_run = dry_run self._reference_repository_mapping = None self.to_reindex = defaultdict(set) self.catalog = api.portal.get_tool('portal_catalog') @@ -1013,6 +1014,8 @@ def create_repository_folders(self, items): self.start_bundle_import(tmpdirname) shutil.rmtree(tmpdirname) + if not self.dry_run: + transaction.commit() def start_bundle_import(self, bundle_path): logger.info("\n\nStarting bundle import...\n") @@ -1042,6 +1045,8 @@ def move_branches(self, items): raise Exception('No parent or repo found for {}'.format(item)) api.content.move(source=repo, target=parent, safe_id=True) + if not self.dry_run: + transaction.commit() def merge_branches(self, items): logger.info("\n\nMerging...\n") @@ -1066,6 +1071,8 @@ def merge_branches(self, items): if item['uid'] in self.to_reindex: self.to_reindex.pop(item['uid']) + if not self.dry_run: + transaction.commit() def adjust_reference_number_prefix(self, items): logger.info("\n\nAdjusting reference number prefix...\n") @@ -1079,6 +1086,8 @@ def adjust_reference_number_prefix(self, items): self.add_to_reindexing_queue( item['uid'], ('Title', 'sortable_title', 'reference'), with_children=True) + if not self.dry_run: + transaction.commit() self.regenerate_reference_number_mapping(list(parents)) @@ -1115,6 +1124,8 @@ def rename(self, items): # recursively self.add_to_reindexing_queue( item['uid'], ('Title', 'sortable_title')) + if not self.dry_run: + transaction.commit() def update_description(self, items): logger.info("\n\nUpdating descriptions...\n") @@ -1129,6 +1140,8 @@ def update_description(self, items): if repo.description != new_description: repo.description = new_description self.add_to_reindexing_queue(item['uid'], ('Description',)) + if not self.dry_run: + transaction.commit() def set_permissions(self, items): logger.info("\n\nUpdating permissions...\n") @@ -1137,6 +1150,8 @@ def set_permissions(self, items): log_progress(i, n_tot, 5) repo = uuidToObject(item['uid']) self._set_permissions_on_object(repo, item['permissions']) + if not self.dry_run: + transaction.commit() def _set_permissions_on_object(self, obj, permissions): """ We set the local roles and block inheritance if needed. @@ -1388,7 +1403,7 @@ def main(): logger.info('\n\nInvalid migration excel, aborting...\n') return - migrator = RepositoryMigrator(analyser.analysed_rows) + migrator = RepositoryMigrator(analyser.analysed_rows, dry_run=options.dryrun) if not options.dryrun: logger.info('\n\nstarting migration...\n') migrator.run() From c60ec72b95fa236f5880420727f1b827913dcc67 Mon Sep 17 00:00:00 2001 From: Niklaus Johner Date: Wed, 24 Feb 2021 09:50:17 +0100 Subject: [PATCH 8/8] Avoid reindexing SearchableText in Solr during migration. This would access the blobs, which we do not have during the migration. --- .../maintenance/scripts/repository_migration.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/opengever/maintenance/scripts/repository_migration.py b/opengever/maintenance/scripts/repository_migration.py index 9644222a..a2abc58a 100644 --- a/opengever/maintenance/scripts/repository_migration.py +++ b/opengever/maintenance/scripts/repository_migration.py @@ -281,6 +281,20 @@ def _update_docproperties(document, raise_on_error=False): self.patch_refs(handlers, '_update_docproperties', _update_docproperties) +class SkipSearchableTextExtraction(MonkeyPatch): + """ During migration we do not have the blobs, so that we should + avoid extracting full text from the blobs. + """ + + def __call__(self): + from ftw.solr.connection import SolrConnection + + def extract(self, blob, field, data, content_type): + return + + self.patch_refs(SolrConnection, 'extract', extract) + + def cleanup_position(position): """Remove splitting dots - they're not usefull for comparison. This only works for grouped_by_three formatter. @@ -1382,6 +1396,7 @@ def main(): SkipTaskSyncWith()() PatchDisableLDAP()() SkipDocPropsUpdate()() + SkipSearchableTextExtraction()() logger.info('\n\nstarting analysis...\n') analyser = RepositoryExcelAnalyser(mapping_path, options.output_directory)