From 605d850c2e70dad86d7fd65fd57af4d0fa4db09a Mon Sep 17 00:00:00 2001 From: khorshuheng Date: Thu, 2 Jan 2025 17:43:00 +0800 Subject: [PATCH] fix: avoid leaking private document contents in search endpoint --- src/api/search.rs | 1 + src/biz/collab/folder_view.rs | 60 +++++++++++++++++++++++++++-------- src/biz/search/ops.rs | 24 +++++++++++++- 3 files changed, 71 insertions(+), 14 deletions(-) diff --git a/src/api/search.rs b/src/api/search.rs index 542c143de..c1b7f8f7f 100644 --- a/src/api/search.rs +++ b/src/api/search.rs @@ -32,6 +32,7 @@ async fn document_search( let metrics = &*state.metrics.request_metrics; let resp = search_document( &state.pg_pool, + &state.collab_access_control_storage, &state.indexer_scheduler, uid, workspace_id, diff --git a/src/biz/collab/folder_view.rs b/src/biz/collab/folder_view.rs index 86c425b0f..22b753fc8 100644 --- a/src/biz/collab/folder_view.rs +++ b/src/biz/collab/folder_view.rs @@ -11,15 +11,13 @@ use shared_entity::dto::workspace_dto::{ }; use uuid::Uuid; -/// Return all folders belonging to a workspace, excluding private sections which the user does not have access to. -pub fn collab_folder_to_folder_view( - workspace_id: Uuid, - root_view_id: &str, - folder: &Folder, - max_depth: u32, - pubished_view_ids: &HashSet, -) -> Result { - let mut unviewable = HashSet::new(); +pub struct PrivateAndNonviewableViews { + pub my_private_view_ids: HashSet, + pub nonviewable_view_ids: HashSet, +} + +pub fn private_and_nonviewable_view_ids(folder: &Folder) -> PrivateAndNonviewableViews { + let mut nonviewable_view_ids = HashSet::new(); let mut my_private_view_ids = HashSet::new(); for private_section in folder.get_my_private_sections() { my_private_view_ids.insert(private_section.id); @@ -28,21 +26,36 @@ pub fn collab_folder_to_folder_view( if let Some(private_view) = folder.get_view(&private_section.id) { if check_if_view_is_space(&private_view) && !my_private_view_ids.contains(&private_section.id) { - unviewable.insert(private_section.id); + nonviewable_view_ids.insert(private_section.id); } } } for trash_view in folder.get_all_trash_sections() { - unviewable.insert(trash_view.id); + nonviewable_view_ids.insert(trash_view.id); + } + PrivateAndNonviewableViews { + my_private_view_ids, + nonviewable_view_ids, } +} + +/// Return all folders belonging to a workspace, excluding private sections which the user does not have access to. +pub fn collab_folder_to_folder_view( + workspace_id: Uuid, + root_view_id: &str, + folder: &Folder, + max_depth: u32, + pubished_view_ids: &HashSet, +) -> Result { + let private_and_nonviewable_views = private_and_nonviewable_view_ids(folder); to_folder_view( workspace_id, "", root_view_id, folder, - &unviewable, - &my_private_view_ids, + &private_and_nonviewable_views.nonviewable_view_ids, + &private_and_nonviewable_views.my_private_view_ids, pubished_view_ids, false, 0, @@ -229,6 +242,27 @@ pub fn section_items_to_trash_folder_view( .collect() } +pub fn check_if_view_ancestors_fulfil_condition( + view_id: &str, + collab_folder: &Folder, + condition: impl Fn(&collab_folder::View) -> bool, +) -> bool { + let mut current_view_id = view_id.to_string(); + loop { + let view = match collab_folder.get_view(¤t_view_id) { + Some(view) => view, + None => return false, + }; + if condition(&view) { + return true; + } + current_view_id = view.parent_view_id.clone(); + if current_view_id.is_empty() { + return false; + } + } +} + pub fn check_if_view_is_space(view: &collab_folder::View) -> bool { let extra = match view.extra.as_ref() { Some(extra) => extra, diff --git a/src/biz/search/ops.rs b/src/biz/search/ops.rs index d07c69d26..97628b796 100644 --- a/src/biz/search/ops.rs +++ b/src/biz/search/ops.rs @@ -1,8 +1,14 @@ use crate::api::metrics::RequestMetrics; +use crate::biz::collab::folder_view::{ + check_if_view_ancestors_fulfil_condition, private_and_nonviewable_view_ids, +}; +use crate::biz::collab::utils::get_latest_collab_folder; use app_error::ErrorCode; use appflowy_ai_client::dto::{ EmbeddingEncodingFormat, EmbeddingInput, EmbeddingModel, EmbeddingOutput, EmbeddingRequest, }; +use appflowy_collaborate::collab::storage::CollabAccessControlStorage; +use database::collab::GetCollabOrigin; use std::sync::Arc; use database::index::{search_documents, SearchDocumentParams}; @@ -17,6 +23,7 @@ use uuid::Uuid; pub async fn search_document( pg_pool: &PgPool, + collab_storage: &CollabAccessControlStorage, indexer_scheduler: &Arc, uid: i64, workspace_id: Uuid, @@ -75,8 +82,23 @@ pub async fn search_document( results.len(), request.query ); + + let folder = get_latest_collab_folder( + collab_storage, + GetCollabOrigin::User { uid }, + &workspace_id.to_string(), + ) + .await?; + let private_and_nonviewable_views = private_and_nonviewable_view_ids(&folder); + let non_searchable_view_ids = private_and_nonviewable_views.nonviewable_view_ids; + let filtered_results = results.into_iter().filter(|item| { + !check_if_view_ancestors_fulfil_condition(&item.object_id, &folder, |view| { + non_searchable_view_ids.contains(&view.id) + }) + }); + Ok( - results + filtered_results .into_iter() .map(|item| SearchDocumentResponseItem { object_id: item.object_id,