Skip to content

Commit

Permalink
Implemented connector validation for confluence
Browse files Browse the repository at this point in the history
  • Loading branch information
hagen-danswer committed Jan 8, 2025
1 parent a6c9b95 commit 66930a6
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 30 deletions.
22 changes: 21 additions & 1 deletion backend/onyx/connectors/confluence/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@
from onyx.configs.constants import DocumentSource
from onyx.connectors.confluence.onyx_confluence import build_confluence_client
from onyx.connectors.confluence.onyx_confluence import OnyxConfluence
from onyx.connectors.confluence.onyx_confluence import validate_connector_configuration
from onyx.connectors.confluence.utils import attachment_to_content
from onyx.connectors.confluence.utils import build_confluence_document_id
from onyx.connectors.confluence.utils import datetime_from_string
from onyx.connectors.confluence.utils import extract_text_from_confluence_html
from onyx.connectors.confluence.utils import validate_attachment_filetype
from onyx.connectors.interfaces import ConnectorValidator
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import LoadConnector
Expand Down Expand Up @@ -76,7 +78,9 @@
)


class ConfluenceConnector(LoadConnector, PollConnector, SlimConnector):
class ConfluenceConnector(
LoadConnector, PollConnector, SlimConnector, ConnectorValidator
):
def __init__(
self,
wiki_base: str,
Expand Down Expand Up @@ -378,3 +382,19 @@ def retrieve_all_slim_documents(
doc_metadata_list = doc_metadata_list[_SLIM_DOC_BATCH_SIZE:]

yield doc_metadata_list

def validate_connector_configuration(self) -> None:
"""
This will raise an exception if either the
credentials or the connector configuration are invalid.
This is determined by trying to connect to Confluence
and retrieving a list of spaces (with a limit of 1 so it
doesn't take too long).
"""
validate_connector_configuration(
confluence_client=self.confluence_client,
# Let it retry but it shouldn't be too long
max_backoff_retries=2,
max_backoff_seconds=2,
)
80 changes: 51 additions & 29 deletions backend/onyx/connectors/confluence/onyx_confluence.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
from atlassian import Confluence # type:ignore
from requests import HTTPError

from onyx.connectors.interfaces import InvalidConnectorConfigurationException
from onyx.connectors.interfaces import InvalidConnectorException
from onyx.connectors.interfaces import InvalidCredentialsException
from onyx.utils.logger import setup_logger

logger = setup_logger()
Expand Down Expand Up @@ -337,23 +340,48 @@ def paginated_group_members_retrieval(
yield from self._paginate_url(f"rest/api/group/{group_name}/member", limit)


def _validate_connector_configuration(
credentials: dict[str, Any],
is_cloud: bool,
wiki_base: str,
def validate_connector_configuration(
confluence_client: OnyxConfluence,
max_backoff_retries: int,
max_backoff_seconds: int,
) -> None:
# test connection with direct client, no retries
confluence_client_with_minimal_retries = Confluence(
api_version="cloud" if is_cloud else "latest",
url=wiki_base.rstrip("/"),
username=credentials["confluence_username"] if is_cloud else None,
password=credentials["confluence_access_token"] if is_cloud else None,
token=credentials["confluence_access_token"] if not is_cloud else None,
backoff_and_retry=True,
max_backoff_retries=6,
max_backoff_seconds=10,
)
spaces = confluence_client_with_minimal_retries.get_all_spaces(limit=1)
"""
This function will test the connection to Confluence by retrieving a list of spaces.
If no spaces are found, it will raise an exception.
"""
# We want to test the connection with the base client, not the wrapped one
# because the wrapped one has retries built in and we want this to be fast
try:
confluence_client_with_minimal_retries = Confluence(
api_version=confluence_client.api_version,
url=confluence_client.url,
username=confluence_client.username,
password=confluence_client.password,
token=confluence_client.token
if hasattr(confluence_client, "token")
else None,
backoff_and_retry=True,
max_backoff_retries=max_backoff_retries,
max_backoff_seconds=max_backoff_seconds,
)
spaces = confluence_client_with_minimal_retries.get_all_spaces(limit=1)
except Exception as e:
if isinstance(e, HTTPError):
if e.response.status_code == 403:
raise InvalidCredentialsException(str(e)) from e
if e.response.status_code in [401, 404]:
raise InvalidConnectorConfigurationException(str(e)) from e

raise InvalidConnectorException(str(e)) from e

# All confluence clients should be able to retrieve at least one space
# If not, there is an issue with the credentials or the connector configuration
if not spaces:
raise InvalidConnectorConfigurationException(
f"No spaces found at {confluence_client.url}! "
"Check your credentials and wiki_base and make sure "
"is_cloud is set correctly."
)

# uncomment the following for testing
# the following is an attempt to retrieve the user's timezone
Expand All @@ -362,25 +390,13 @@ def _validate_connector_configuration(
# space_key = spaces["results"][0]["key"]
# space_details = confluence_client_with_minimal_retries.cql(f"space.key={space_key}+AND+type=space")

if not spaces:
raise RuntimeError(
f"No spaces found at {wiki_base}! "
"Check your credentials and wiki_base and make sure "
"is_cloud is set correctly."
)


def build_confluence_client(
credentials: dict[str, Any],
is_cloud: bool,
wiki_base: str,
) -> OnyxConfluence:
_validate_connector_configuration(
credentials=credentials,
is_cloud=is_cloud,
wiki_base=wiki_base,
)
return OnyxConfluence(
onyx_confluence_client = OnyxConfluence(
api_version="cloud" if is_cloud else "latest",
# Remove trailing slash from wiki_base if present
url=wiki_base.rstrip("/"),
Expand All @@ -393,3 +409,9 @@ def build_confluence_client(
max_backoff_seconds=60,
cloud=is_cloud,
)
validate_connector_configuration(
confluence_client=onyx_confluence_client,
max_backoff_retries=6,
max_backoff_seconds=10,
)
return onyx_confluence_client

0 comments on commit 66930a6

Please sign in to comment.