From 3ac19ca20d7191e160d0c5f348f13318ce40f7fd Mon Sep 17 00:00:00 2001 From: vmonakhov Date: Sun, 11 Aug 2024 17:26:32 +0300 Subject: [PATCH] Perspective view better pagination -- https://github.com/ispras/lingvodoc-react/issues/1133 (#1513) * init * next steps * minor * PerspectivePage methods moved to DictionaryPerspective * fixes * next steps * sql to alchemy * sql to alchemy * sql to alchemy * refactoring * filtering * filtering * sorting * refactoring * checking edit mode * refactoring * fix * regexp * refactoring * fixed and cleaned-up * complex sorting * correct sorting within lex entry * sort by lowercase * fixed filtering * minor * cleanup * fixes and cleanup * more correct filtering * next steps * outerjoin and labels * with sorting_cte * created entries calculating * new entries showing * fixes * fixes --- lingvodoc/models.py | 315 ++++++++++++++---- lingvodoc/schema/gql_dictionaryperspective.py | 197 +++++++---- lingvodoc/schema/gql_search.py | 4 +- lingvodoc/schema/query.py | 6 +- 4 files changed, 383 insertions(+), 139 deletions(-) diff --git a/lingvodoc/models.py b/lingvodoc/models.py index 3e3ff36e..f60decc9 100755 --- a/lingvodoc/models.py +++ b/lingvodoc/models.py @@ -23,6 +23,8 @@ Index, literal, or_, + func, + desc, Sequence, Table, tuple_) @@ -66,7 +68,7 @@ # Project imports. import lingvodoc.cache.caching as caching - +from pdb import set_trace as A # Setting up logging. log = logging.getLogger(__name__) @@ -1022,6 +1024,25 @@ class Dictionary( domain = Column(Integer, default=0) +class PerspectivePage( + CompositeIdMixin, + TableNameMixin, + ParentMixin, + CreatedAtMixin, + TranslationMixin, + StateMixin, + MarkedForDeletionMixin, + AdditionalMetadataMixin, + ObjectTOCMixin, + Base): + """ + This object used to compile and get list of lexical entries with extra information + after filtering, sorting and pagination on backend + Parent: DictionaryPerspective + """ + __parentname__ = 'DictionaryPerspective' + + class DictionaryPerspective( CompositeIdMixin, TableNameMixin, @@ -1899,99 +1920,247 @@ def remove_keys(obj, rubbish): @classmethod def graphene_track_multiple( cls, - lexs, + lexes, publish = None, accept = None, delete = False, + filter = None, + sort_by_field = None, + is_ascending = None, + is_case_sens = True, + is_regexp = False, + created_entries = [], check_perspective = True): + deleted_per = [] + alive_lexes = [] + if check_perspective: + deleted_per = DictionaryPerspective.get_deleted() - filtered_lexes = [] + for x in lexes: - deleted_persps = DictionaryPerspective.get_deleted() - for i in lexs: - if (i[2], i[3]) not in deleted_persps: - filtered_lexes.append(i) + if len(x) >= 4 and (x[2], x[3]) in deleted_per: + continue - else: + alive_lexes.append({'client_id': x[0], 'object_id': x[1]}) - filtered_lexes = lexs + temp_table_name = 'lexical_entries_temp_table' + str(uuid.uuid4()).replace("-", "") - ls = [] + DBSession.execute( + '''create TEMPORARY TABLE %s (client_id BIGINT, object_id BIGINT) on COMMIT DROP;''' % temp_table_name) - for i, x in enumerate(filtered_lexes): - ls.append({'traversal_lexical_order': i, 'client_id': x[0], 'object_id': x[1]}) + class Tempo(Base): - if not ls: - return [] + __tablename__ = temp_table_name + __table_args__ = {'prefixes': ['TEMPORARY']} - pub_filter = "" + client_id = Column(SLBigInteger(), primary_key=True) + object_id = Column(SLBigInteger(), primary_key=True) - if publish is not None or accept is not None or delete is not None: - where_cond = list() - if accept: - where_cond.append("publishingentity.accepted = True") - if accept is False: - where_cond.append("publishingentity.accepted = False") - if publish: - where_cond.append("publishingentity.published = True") - if publish is False: - where_cond.append("publishingentity.published = False") - if delete: - where_cond.append("cte_expr.marked_for_deletion = True") - if delete is False: - where_cond.append("cte_expr.marked_for_deletion = False") - where_cond = ["WHERE", " AND ".join(where_cond)] - pub_filter = " ".join(where_cond) + DBSession.execute( + Tempo.__table__ + .insert() + .values(alive_lexes)) - temp_table_name = 'lexical_entries_temp_table' + str(uuid.uuid4()).replace("-", "") + # We need just lexical entry and entity id and entity's content for sorting and filtering - DBSession.execute( - '''create TEMPORARY TABLE %s (traversal_lexical_order INTEGER, client_id BIGINT, object_id BIGINT) on COMMIT DROP;''' % temp_table_name) + entities_query = ( + DBSession + .query( + Entity.client_id, + Entity.object_id, + Entity.parent_client_id, + Entity.parent_object_id, + Entity.content) - DBSession.execute( - '''insert into %s (traversal_lexical_order, client_id, object_id) values (:traversal_lexical_order, :client_id, :object_id);''' % temp_table_name, - ls) + .filter( + Entity.parent_client_id == Tempo.client_id, + Entity.parent_object_id == Tempo.object_id)) - statement = text(''' - WITH cte_expr AS - (SELECT - entity.*, - {0}.traversal_lexical_order AS traversal_lexical_order - FROM entity - INNER JOIN {0} - ON - entity.parent_client_id = {0}.client_id - AND entity.parent_object_id = {0}.object_id - ) - SELECT - cte_expr.client_id, - cte_expr.object_id, - cte_expr.parent_client_id, - cte_expr.parent_object_id, - cte_expr.self_client_id, - cte_expr.self_object_id, - cte_expr.link_client_id, - cte_expr.link_object_id, - cte_expr.field_client_id, - cte_expr.field_object_id, - cte_expr.locale_id, - cte_expr.marked_for_deletion, - cte_expr.content, - cte_expr.additional_metadata, - cte_expr.created_at, - publishingentity.* - FROM cte_expr - LEFT JOIN publishingentity - ON publishingentity.client_id = cte_expr.client_id AND publishingentity.object_id = cte_expr.object_id - {1} - ORDER BY cte_expr.traversal_lexical_order; - '''.format(temp_table_name, pub_filter)) + filed_lexes = entities_query.with_entities('parent_client_id', 'parent_object_id') + + # Collect all empty lexes including created ones + + empty_lexes = ( + DBSession + .query( + Tempo.client_id, + Tempo.object_id) + + .filter( + tuple_(Tempo.client_id, Tempo.object_id) + .notin_(filed_lexes)) + + .all()) + + # Apply user's custom filter + + if filter: + + # We filter using Entity model in parallels twice, + # so we need to use cte(), we can't use .with_entities + + # Filter from special fields + filtered_entities = entities_query.filter( + Entity.field_id != (66, 25)) + + if is_regexp: + if is_case_sens: + filtered_entities = filtered_entities.filter( + Entity.content.op('~')(filter)).cte() + else: + filtered_entities = filtered_entities.filter( + Entity.content.op('~*')(filter)).cte() + else: + if is_case_sens: + filtered_entities = filtered_entities.filter( + Entity.content.like(f"%{filter}%")).cte() + else: + filtered_entities = filtered_entities.filter( + Entity.content.ilike(f"%{filter}%")).cte() + + filtered_lexes = ( + DBSession + .query( + filtered_entities.c.parent_client_id, + filtered_entities.c.parent_object_id)) + + entities_query = entities_query.filter( + Entity.parent_id + .in_(filtered_lexes)) + + entities_cte = entities_query.cte() + + # Create sorting_cte to order by it + + sorting_cte = None + + if sort_by_field: + + field_entities = entities_query.filter(Entity.field_id == sort_by_field).cte() + + alpha_entities = ( + DBSession + .query( + field_entities.c.parent_client_id.label('lex_client_id'), + field_entities.c.parent_object_id.label('lex_object_id'), + func.min(func.lower(field_entities.c.content)).label('first_entity'), + func.max(func.lower(field_entities.c.content)).label('last_entity')) + + .filter(func.length(field_entities.c.content) > 0) + + .group_by('lex_client_id', 'lex_object_id') + + .cte() + ) + + sorting_cte = ( + DBSession + .query( + entities_cte.c.parent_client_id, + entities_cte.c.parent_object_id, + entities_cte.c.client_id, + entities_cte.c.object_id, + alpha_entities.c.first_entity, + alpha_entities.c.last_entity, + field_entities.c.content.label('order_content')) + + .outerjoin( + alpha_entities, and_( + alpha_entities.c.lex_client_id == entities_cte.c.parent_client_id, + alpha_entities.c.lex_object_id == entities_cte.c.parent_object_id)) + + .outerjoin( + field_entities, and_( + field_entities.c.client_id == entities_cte.c.client_id, + field_entities.c.object_id == entities_cte.c.object_id)) + + .cte()) - entries = DBSession.query(Entity, PublishingEntity).from_statement(statement) .options(joinedload('publishingentity')).yield_per(100) + entities_cte = sorting_cte - return entries + # Finally, filter and sort Entity and PublishingEntity objects + + entities_result = ( + DBSession + .query( + Entity, + PublishingEntity) + + .outerjoin( + PublishingEntity)) + + # Pre-filtering + + if accept is not None: + entities_result = entities_result.filter(PublishingEntity.accepted == accept) + if publish is not None: + entities_result = entities_result.filter(PublishingEntity.published == publish) + if delete is not None: + entities_result = entities_result.filter(Entity.marked_for_deletion == delete) + + # Get new entities from entities_before_custom_filtering + + new_entities_result = ( + entities_result + .filter( + + tuple_(Entity.parent_client_id, Entity.parent_object_id) + .in_(filed_lexes), + + tuple_(Entity.parent_client_id, Entity.parent_object_id) + .in_(created_entries))) + + # Filter and join at once to get and sort old entities + + old_entities_result = ( + entities_result + .filter( + + entities_cte.c.client_id == Entity.client_id, + entities_cte.c.object_id == Entity.object_id, + + tuple_(Entity.parent_client_id, Entity.parent_object_id) + .notin_(created_entries))) + + # Custom sorting + + if sorting_cte is not None: + + if is_ascending: + + old_entities_result = old_entities_result.order_by( + entities_cte.c.first_entity, + entities_cte.c.parent_client_id, + entities_cte.c.parent_object_id, + func.lower(entities_cte.c.order_content) + ) + + else: + + old_entities_result = old_entities_result.order_by( + desc(entities_cte.c.last_entity), + entities_cte.c.parent_client_id, + entities_cte.c.parent_object_id, + desc(func.lower(entities_cte.c.order_content)) + ) + + # Default sorting + + old_entities_result = old_entities_result.order_by( + Entity.parent_client_id, + Entity.parent_object_id, + Entity.client_id, + Entity.object_id) + + return ( + new_entities_result, + old_entities_result + .options( + joinedload('publishingentity')) + .yield_per(100), + empty_lexes) class Entity( diff --git a/lingvodoc/schema/gql_dictionaryperspective.py b/lingvodoc/schema/gql_dictionaryperspective.py index f2db0d4f..965d4235 100644 --- a/lingvodoc/schema/gql_dictionaryperspective.py +++ b/lingvodoc/schema/gql_dictionaryperspective.py @@ -42,6 +42,7 @@ JSONB, Language as dbLanguage, LexicalEntry as dbLexicalEntry, + PerspectivePage as dbPerspectivePage, ObjectTOC, ParserResult as dbParserResult, PublishingEntity as dbPublishingEntity, @@ -88,6 +89,7 @@ from lingvodoc.utils.deletion import real_delete_perspective from lingvodoc.utils.search import translation_gist_search +from pdb import set_trace as A # Setting up logging. @@ -111,47 +113,65 @@ def gql_lexicalentry(cur_lexical_entry, cur_entities): lex.dbObject = cur_lexical_entry return lex -def entries_with_entities(lexes, accept, delete, mode, publish, check_perspective = True): +def entries_with_entities(lexes, mode, + is_edit_mode=True, + created_entries=[], + limit=0, + offset=0, + **query_args): + if mode == 'debug': return [gql_lexicalentry(lex, None) for lex in lexes] + lex_id_to_obj = dict() lexes_composite_list = list() - if check_perspective: + for lex_obj in ( + lexes if isinstance(lexes, list) else + lexes.yield_per(100).all()): - for lex_obj in ( - lexes if isinstance(lexes, list) else - lexes.yield_per(100).all()): + lexes_composite_list.append((lex_obj.client_id, lex_obj.object_id, + lex_obj.parent_client_id, lex_obj.parent_object_id)) - lexes_composite_list.append((lex_obj.client_id, lex_obj.object_id, - lex_obj.parent_client_id, lex_obj.parent_object_id)) - lex_id_to_obj[(lex_obj.client_id, lex_obj.object_id)] = lex_obj + lex_id_to_obj[(lex_obj.client_id, lex_obj.object_id)] = lex_obj - else: + if mode == 'not_accepted': + query_args['accept'] = False + query_args['delete'] = False - # If we don't need to check for perspective deletion, we don't need perspective ids. + new_entities, old_entities, empty_lexes = ( + dbLexicalEntry.graphene_track_multiple( + lexes_composite_list, + created_entries=created_entries, + **query_args)) - for lex_obj in ( - lexes if isinstance(lexes, list) else - lexes.yield_per(100).all()): + # Getting sets of hashable items + empty_lexes_set = set([tuple(lex) for lex in empty_lexes]) + added_lexes_set = set([tuple(lex) for lex in created_entries]) - entry_id = (lex_obj.client_id, lex_obj.object_id) + # Calculating lists of old and newly added empty lexes + old_empty_lexes = empty_lexes_set - added_lexes_set if is_edit_mode else [] + new_empty_lexes = empty_lexes_set & added_lexes_set - lexes_composite_list.append(entry_id) - lex_id_to_obj[entry_id] = lex_obj + """ + Finally we start to combine summary list of lexes in following sequence: + -- (non-empty) new_entities in any amount + -- new_empty_lexes in any amount and mode + -- old_empty_lexes sliced by 'limit' in edit mode + -- old_entities sliced by [offset : offset + limit] + """ - if mode == 'not_accepted': - accept = False - delete = False + lexical_entries = [] + + # We have empty lexes only if is_edit_mode + for lex_ids in old_empty_lexes: - entities = dbLexicalEntry.graphene_track_multiple(lexes_composite_list, - publish=publish, - accept=accept, - delete=delete, - check_perspective=check_perspective) + lexical_entries.append( + gql_lexicalentry( + cur_lexical_entry = lex_id_to_obj[lex_ids], + cur_entities = [])) - ent_iter = itertools.chain(list(entities)) - lexical_entries = list() + ent_iter = itertools.chain(list(old_entities)) for lex_ids, entity_with_published in itertools.groupby(ent_iter, key = group_by_lex): @@ -159,19 +179,57 @@ def entries_with_entities(lexes, accept, delete, mode, publish, check_perspectiv gql_entity_with_published(cur_entity = x[0], cur_publishing = x[1]) for x in entity_with_published] - lexical_entry = lex_id_to_obj.pop(lex_ids) + lexical_entries.append( + gql_lexicalentry( + cur_lexical_entry = lex_id_to_obj[lex_ids], + cur_entities = gql_entities_list)) - if (lexical_entry.client_id, lexical_entry.object_id) == lex_ids: + # Pagination - lexical_entries.append( - gql_lexicalentry(cur_lexical_entry = lexical_entry, cur_entities = gql_entities_list)) + total_entries = len(lexical_entries) - for new_lex in lex_id_to_obj.values(): + lexical_entries = lexical_entries[offset:] + if limit > 0: + lexical_entries = lexical_entries[:offset + limit] - lexical_entries.append( - gql_lexicalentry(cur_lexical_entry = new_lex, cur_entities = [])) + # In any mode we show empty new lexes if any + # Adding them at the beginning of list + + for lex_ids in new_empty_lexes: + + lexical_entries.insert(0, + gql_lexicalentry( + cur_lexical_entry = lex_id_to_obj[lex_ids], + cur_entities = [])) + + # Add lexes with new_entities at the beginning of list + + ent_iter = itertools.chain(list(new_entities)) + + for lex_ids, entity_with_published in itertools.groupby(ent_iter, key = group_by_lex): + + gql_entities_list = [ + gql_entity_with_published(cur_entity = x[0], cur_publishing = x[1]) + for x in entity_with_published] + + lexical_entries.insert(0, + gql_lexicalentry( + cur_lexical_entry = lex_id_to_obj[lex_ids], + cur_entities = gql_entities_list)) + + return lexical_entries, total_entries + + +class PerspectivePage(graphene.ObjectType): + + lexical_entries = graphene.List(LexicalEntry) + entries_total = graphene.Int() + + dbType = dbPerspectivePage + + class Meta: + pass - return lexical_entries class DictionaryPerspective(LingvodocObjectType): """ @@ -222,7 +280,25 @@ class DictionaryPerspective(LingvodocObjectType): tree = graphene.List(CommonFieldsComposite, ) # TODO: check it columns = graphene.List(Column) - lexical_entries = graphene.List(LexicalEntry, ids = graphene.List(LingvodocID), mode=graphene.String()) + lexical_entries = graphene.List( + LexicalEntry, + ids = graphene.List(LingvodocID), + mode = graphene.String()) + + perspective_page = graphene.Field( + PerspectivePage, + ids = graphene.List(LingvodocID), + mode = graphene.String(), + filter = graphene.String(), + is_regexp = graphene.Boolean(), + is_case_sens = graphene.Boolean(), + is_edit_mode = graphene.Boolean(), + is_ascending = graphene.Boolean(), + sort_by_field = LingvodocID(), + offset = graphene.Int(), + limit = graphene.Int(), + created_entries = graphene.List(LingvodocID)) + authors = graphene.List('lingvodoc.schema.gql_user.User') roles = graphene.Field(UserAndOrganizationsRoles) role_check = graphene.Boolean(subject = graphene.String(required = True), action = graphene.String(required = True)) @@ -247,6 +323,8 @@ class DictionaryPerspective(LingvodocObjectType): dbType = dbPerspective + entries_total = 0 + class Meta: interfaces = (CommonFieldsComposite, StateHolder) @@ -511,7 +589,7 @@ def resolve_last_modified_at(self, info): # Complete query for the perspective, excluding created_at which we already have. DBSession.execute( - 'set extra_float_digits to 3;'); + 'set extra_float_digits to 3;') result = ( @@ -816,14 +894,14 @@ def resolve_new_adverb_data_count(self, info): return new_hash_count + (has_hash_count > ready_hash_count) @fetch_object() - def resolve_lexical_entries(self, info, ids=None, mode=None, authors=None, clients=None, start_date=None, end_date=None, - position=1): + def resolve_lexical_entries(self, info, ids=None, + mode=None, authors=None, clients=None, + start_date=None, end_date=None, position=1, + **query_args): if self.check_is_hidden_for_client(info): return [] - result = list() - request = info.context.get('request') if mode == 'all': publish = None accept = True @@ -859,11 +937,6 @@ def resolve_lexical_entries(self, info, ids=None, mode=None, authors=None, clien else: raise ResponseError(message="mode: ") - # dbcolumn = DBSession.query(dbColumn).filter_by(parent=self.dbObject, position=position, self_client_id=None, - # self_object_id=None).first() - # if not dbcolumn: - # dbcolumn = DBSession.query(dbColumn).filter_by(parent=self.dbObject, self_client_id=None, - # self_object_id=None).first() lexes = DBSession.query(dbLexicalEntry).filter(dbLexicalEntry.parent == self.dbObject) if ids is not None: ids = list(ids) @@ -871,10 +944,6 @@ def resolve_lexical_entries(self, info, ids=None, mode=None, authors=None, clien if authors or start_date or end_date: lexes = lexes.join(dbLexicalEntry.entity).join(dbEntity.publishingentity) - # if publish is not None: - # lexes = lexes.filter(dbPublishingEntity.published == publish) - # if accept is not None: - # lexes = lexes.filter(dbPublishingEntity.accepted == accept) if delete is not None: if authors or start_date or end_date: lexes = lexes.filter(or_(dbLexicalEntry.marked_for_deletion == delete, dbEntity.marked_for_deletion == delete)) @@ -890,20 +959,18 @@ def resolve_lexical_entries(self, info, ids=None, mode=None, authors=None, clien db_la_gist = translation_gist_search('Limited access') limited_client_id, limited_object_id = db_la_gist.client_id, db_la_gist.object_id - if self.dbObject.state_translation_gist_client_id == limited_client_id and self.dbObject.state_translation_gist_object_id == limited_object_id and mode != 'not_accepted': + if (self.dbObject.state_translation_gist_client_id == limited_client_id and + self.dbObject.state_translation_gist_object_id == limited_object_id and + mode != 'not_accepted'): + if not info.context.acl_check_if('view', 'lexical_entries_and_entities', - (self.dbObject.client_id, self.dbObject.object_id)): + (self.dbObject.client_id, self.dbObject.object_id)): + lexes = lexes.limit(20) - # lexes = lexes \ - # .order_by(func.min(case( - # [(or_(dbEntity.field_client_id != dbcolumn.field_client_id, - # dbEntity.field_object_id != dbcolumn.field_object_id), - # 'яяяяяя')], - # else_=dbEntity.content))) \ - # .group_by(dbLexicalEntry) - lexical_entries = ( - entries_with_entities(lexes, accept, delete, mode, publish, check_perspective = False)) + lexical_entries, self.entries_total = ( + entries_with_entities(lexes, mode, accept=accept, delete=delete, publish=publish, + check_perspective = False, **query_args)) # If we were asked for specific lexical entries, we try to return them in creation order. @@ -912,6 +979,14 @@ def resolve_lexical_entries(self, info, ids=None, mode=None, authors=None, clien return lexical_entries + def resolve_perspective_page( + self, + info, + **query_args): + + return PerspectivePage( + lexical_entries = self.resolve_lexical_entries(info, **query_args), + entries_total = self.entries_total) @fetch_object() def resolve_authors(self, info): @@ -1186,7 +1261,7 @@ def mutate(root, info, **args): if translation_gist_object_id: dbperspective.translation_gist_object_id = translation_gist_object_id # TODO: refactor like dictionaries if parent_id: - # parent_client_id, parent_object_id = parent_id + parent_client_id, parent_object_id = parent_id # dbparent_dictionary = DBSession.query(dbDictionary).filter_by(client_id=parent_client_id, # object_id=parent_object_id).first() dbparent_dictionary = CACHE.get(objects= diff --git a/lingvodoc/schema/gql_search.py b/lingvodoc/schema/gql_search.py index 19d19294..e91c0bda 100644 --- a/lingvodoc/schema/gql_search.py +++ b/lingvodoc/schema/gql_search.py @@ -615,7 +615,7 @@ def search_mechanism( # Compiling search results. - result_lexical_entries = ( + result_lexical_entries, _ = ( entries_with_entities( lexical_entry_list, @@ -1072,7 +1072,7 @@ def search_mechanism( if load_entities: - res_lexical_entries = ( + res_lexical_entries, _ = ( # Don't need to check for perspective deletion, we explicitly look only in undeleted dictionaries # and undeleted perspectives. diff --git a/lingvodoc/schema/query.py b/lingvodoc/schema/query.py index 19df553e..78e8b2e5 100644 --- a/lingvodoc/schema/query.py +++ b/lingvodoc/schema/query.py @@ -3744,7 +3744,7 @@ def resolve_basic_search(self, info, searchstring, search_in_published, field_id (lex.client_id, lex.object_id, lex.parent_client_id, lex.parent_object_id) for lex in lexes] - entities = ( + (_,entities, _) = ( dbLexicalEntry.graphene_track_multiple( lexes_composite_list, @@ -4741,8 +4741,8 @@ def resolve_connected_words(self, info, id, field_id, mode=None): (entry.client_id, entry.object_id, entry.parent_client_id, entry.parent_object_id) for entry in lexes] - entities = dbLexicalEntry.graphene_track_multiple(lexes_composite_list, - publish=publish, accept=accept) + (_, entities, _) = dbLexicalEntry.graphene_track_multiple(lexes_composite_list, + publish=publish, accept=accept) def graphene_entity(cur_entity, cur_publishing): ent = Entity(id = (cur_entity.client_id, cur_entity.object_id))