diff --git a/docs_website/docs/integrations/add_github_integration.mdx b/docs_website/docs/integrations/add_github_integration.mdx new file mode 100644 index 000000000..c4c7c44af --- /dev/null +++ b/docs_website/docs/integrations/add_github_integration.mdx @@ -0,0 +1,114 @@ +--- +id: add_github_integration +title: GitHub Integration Guide +sidebar_label: GitHub Integration +--- + +:::info +Please check the [GitHub User Guide](../user_guide/github_integration.mdx) for detailed instructions on using GitHub features. +::: + +## Overview + +The **GitHub Integration Guide** offers instructions to set up and configure GitHub within Querybook. Follow these steps to allow interaction between Querybook and your GitHub repositories. + +> **Note:** The GitHub Integration is an experimental feature. Ensure that all configurations are correctly set to avoid setup issues. + +## Implementation + +To integrate GitHub with Querybook, follow the steps below. This setup involves configuring GitHub OAuth, setting up necessary environment variables, and enabling the GitHub Integration feature. + +### 1. Setup GitHub OAuth Application + +Before integrating GitHub with Querybook, you need to create an OAuth application on GitHub to obtain the necessary credentials. + +1. **Navigate to GitHub Settings:** + + - Go to your GitHub account settings. + - Click on **Developer settings**. + - Select **OAuth Apps** and then click **New OAuth App**. + +2. **Register a New Application:** + + - **Application Name:** Choose a name for your application, e.g., `Querybook Integration`. + - **Homepage URL:** Enter your Querybook instance URL, e.g., `https://your-querybook-domain.com`. + - **Authorization Callback URL:** Set this to `https://your-querybook-domain.com/github/oauth2callback`. + +3. **Save the Application:** + + - After registering, GitHub will provide a **Client ID** and **Client Secret**. Keep these credentials secure as they are required for the integration. + +### 2. Install Dependencies + +Ensure that the required Python packages are installed. GitHub Integration relies on OAuth libraries and other dependencies. + +Add the following line to your `requirements/local.txt`: + +```plaintext +-r github.txt +``` + +**Note:** +The `github.txt` file includes `pygithub==2.4.0` and `cryptography==3.4.8`, which are essential for interacting with the GitHub API and securing tokens. +For more details, refer to [`infra_installation.mdx`](../configurations/infra_installation.mdx). + +### 3. Configure GitHub Integration + +Configure Querybook to use the GitHub feature by setting the necessary environment variables and updating configuration files. +Secrets such as `GITHUB_CLIENT_SECRET` and `GITHUB_CRYPTO_SECRET` should be stored securely in environment variables, while non-sensitive information can be placed in `querybook_config.yaml`. + +1. **Set Config Variables:** + + ```env + GITHUB_CLIENT_ID=github_app_client_id + GITHUB_CLIENT_SECRET=github_app_client_secret + GITHUB_CRYPTO_SECRET=crypto_secret + GITHUB_REPO_NAME=github_username/github_repository + GITHUB_REPO_BRANCH=main # Optional, defaults to 'main' branch + ``` + + - **GITHUB_CLIENT_ID:** The Client ID obtained from GitHub OAuth App. + - **GITHUB_CLIENT_SECRET:** The Client Secret obtained from GitHub OAuth App. + - **GITHUB_CRYPTO_SECRET:** A secret key used for encrypting GitHub tokens in the database. + - **GITHUB_REPO_NAME:** The repository name in the format `user/repo_name` (e.g., `github_username/querybook-datadocs`). + - **GITHUB_REPO_BRANCH:** The branch to which commits are pushed. Defaults to `main` if not set. + + **Note:** + To obtain `GITHUB_REPO_NAME`, format your repository name as `username/repository`. For example: + + 1. Navigate to your GitHub profile and click on **Repositories**. + 2. Select the repository you want to link. + 3. Enter the repository name in the format `username/repository_name`. You can obtain this from your repository's GitHub URL. For instance, if your repository URL is `https://github.com/username123/querybook-datadocs`, the repository name would be `username123/querybook-datadocs`. + +### 4. Enable the Feature in Querybook + +To display the GitHub button on the Querybook UI for DataDocs, edit the `querybook_public_config.yaml` to enable GitHub Integration feature: + +```yaml +github_integration: + enabled: true +``` + +## Example Configuration + +Below is an example configuration snippet demonstrating how to set up GitHub Integration in `querybook_config.yaml` and `querybook_public_config.yaml`: + +```yaml +querybook_config: + GITHUB_CLIENT_ID: 'your_github_client_id' + GITHUB_CLIENT_SECRET: '---Redacted---' + GITHUB_CRYPTO_SECRET: '---Redacted---' + GITHUB_REPO_NAME: 'github_username/querybook-datadocs' + GITHUB_REPO_BRANCH: 'main' + +public_config: + github_integration: + enabled: true +``` + +## Additional Tips for Developers + +- **Security:** Keep your GitHub OAuth credentials secure. Avoid hardcoding sensitive information in configuration files. Store secrets safely and securely using environment variables. +- **Testing:** After setting up, perform test commits to verify that the integration works as expected before deploying to production environments. + +For more information, refer to the [GitHub User Guide](../user_guide/github_integration.mdx) and GitHub's [OAuth Apps Documentation](https://docs.github.com/en/developers/apps/building-oauth-apps/authorizing-oauth-apps). diff --git a/docs_website/docs/user_guide/github_integration.mdx b/docs_website/docs/user_guide/github_integration.mdx new file mode 100644 index 000000000..c9d5f90e9 --- /dev/null +++ b/docs_website/docs/user_guide/github_integration.mdx @@ -0,0 +1,81 @@ +--- +id: github_integration +title: GitHub User Guide +sidebar_label: GitHub Integration +--- + +## Overview + +The **GitHub** feature allows you to seamlessly link your DataDocs with GitHub repositories. This integration enables you to commit DataDoc versions directly to GitHub, track changes over time, and collaborate more effectively using GitHub's version control capabilities. + +> **Note:** GitHub Integration is an **experimental** feature. It may undergo significant changes in future releases. + +## Getting Started + +### Linking Your DataDoc to a GitHub Repository + +1. **Access GitHub Integration:** + + - Open the DataDoc you wish to integrate. + - Click on the **GitHub** icon in the DataDoc right side bar. + +![GitHub DataDoc Sidebar](/img/user_guide/github/github_datadoc_sidebar.png) + +2. **Authorize GitHub:** + + - Click on the **Connect Now** button. + - You will be redirected to GitHub to authorize Querybook. + - After authorization, you'll be redirected back to Querybook. + +![Connect GitHub](/img/user_guide/github/connect_github.png) + +3. **Link Directory:** + + - Enter the **Directory Path** within the repository where DataDoc versions will be stored. You can either specify a custom directory path or use the default directory named `datadocs`. + - Click **Link Directory** button to finalize the process. + +![GitHub Directory Linking](/img/user_guide/github/github_directory_linking.png) + +### Committing Changes to GitHub + +1. **Commit Your DataDoc:** + + - In your DataDoc, click on the **Push to GitHub** tab located at the top of the modal. + - Enter a descriptive **Commit Message** summarizing your changes. + - Click the **Push** button to push the new changes to the linked GitHub repository. + +![GitHub Push](/img/user_guide/github/github_push.png) + +2. **View Commit History:** + + - Navigate to the **GitHub Versions** section within your DataDoc. + - Here, you can view the commit history, and compare and restore previous versions. + +![Commit History](/img/user_guide/github/github_versions.png) + +## Best Practices + +- **Frequent Commits:** Commit your changes regularly to maintain a clear history of your DataDoc's evolution. +- **Descriptive Messages:** Use clear and descriptive commit messages to make it easier to understand the purpose of each commit. + +## Version History and Branching + +### Branching + +Querybook does not support traditional branching as all edits are shared in real time. Commits are directly pushed to GitHub, eliminating the concept of local and remote changes. + +### Workarounds + +- **Clone the DataDoc:** Create a separate copy to experiment with changes without affecting the main version. +- **Link to a Different Repository:** Connect the DataDoc to an alternative GitHub repository for testing purposes. + +By following these approaches, users can safely manage and experiment with their DataDocs while maintaining a streamlined version history. + +## Troubleshooting + +If you encounter issues while using GitHub Integration, consider the following steps: + +- **Ensure Proper Linking:** Verify that your DataDoc is correctly linked to the intended GitHub repository. +- **Check Permissions:** Make sure the OAuth application has the necessary permissions to access and modify the repository. + +For further assistance, refer to the [GitHub Integration Guide](../integrations/add_github_integration.mdx). diff --git a/docs_website/sidebars.json b/docs_website/sidebars.json index 35a826618..192142b5c 100755 --- a/docs_website/sidebars.json +++ b/docs_website/sidebars.json @@ -40,6 +40,7 @@ "integrations/add_stats_logger", "integrations/add_surveys", "integrations/add_ai_assistant", + "integrations/add_github_integration", "integrations/customize_html", "integrations/embedded_iframe" ], @@ -54,6 +55,7 @@ "User Guide": [ "user_guide/ai_assistant", + "user_guide/github_integration", "user_guide/api_token", "user_guide/faq" ], diff --git a/docs_website/static/img/user_guide/github/connect_github.png b/docs_website/static/img/user_guide/github/connect_github.png new file mode 100644 index 000000000..8058e5c5e Binary files /dev/null and b/docs_website/static/img/user_guide/github/connect_github.png differ diff --git a/docs_website/static/img/user_guide/github/github_datadoc_sidebar.png b/docs_website/static/img/user_guide/github/github_datadoc_sidebar.png new file mode 100644 index 000000000..19607f93f Binary files /dev/null and b/docs_website/static/img/user_guide/github/github_datadoc_sidebar.png differ diff --git a/docs_website/static/img/user_guide/github/github_directory_linking.png b/docs_website/static/img/user_guide/github/github_directory_linking.png new file mode 100644 index 000000000..d7487e38c Binary files /dev/null and b/docs_website/static/img/user_guide/github/github_directory_linking.png differ diff --git a/docs_website/static/img/user_guide/github/github_push.png b/docs_website/static/img/user_guide/github/github_push.png new file mode 100644 index 000000000..0caab550d Binary files /dev/null and b/docs_website/static/img/user_guide/github/github_push.png differ diff --git a/docs_website/static/img/user_guide/github/github_versions.png b/docs_website/static/img/user_guide/github/github_versions.png new file mode 100644 index 000000000..db509c509 Binary files /dev/null and b/docs_website/static/img/user_guide/github/github_versions.png differ diff --git a/package.json b/package.json index 201f34125..3afa4a61b 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "querybook", - "version": "3.35.0", + "version": "3.36.0", "description": "A Big Data Webapp", "private": true, "scripts": { diff --git a/querybook/config/querybook_default_config.yaml b/querybook/config/querybook_default_config.yaml index 42bd611c2..d4d8e7935 100644 --- a/querybook/config/querybook_default_config.yaml +++ b/querybook/config/querybook_default_config.yaml @@ -106,3 +106,10 @@ VECTOR_STORE_PROVIDER: ~ VECTOR_STORE_CONFIG: embeddings_arg_name: 'embedding_function' index_name: 'vector_index_v1' + +# --------------- GitHub Integration --------------- +GITHUB_CLIENT_ID: ~ +GITHUB_CLIENT_SECRET: ~ +GITHUB_REPO_NAME: ~ +GITHUB_BRANCH: 'main' +GITHUB_CRYPTO_SECRET: '' diff --git a/querybook/config/querybook_public_config.yaml b/querybook/config/querybook_public_config.yaml index 4f2e7362f..44df88a14 100644 --- a/querybook/config/querybook_public_config.yaml +++ b/querybook/config/querybook_public_config.yaml @@ -38,3 +38,6 @@ table_sampling: default_sample_rate: 0 # 0 means no sampling sample_user_guide_link: '' sampling_tool_tip_delay: 10000 # delay duration (ms) of sampling tool tip + +github_integration: + enabled: false diff --git a/querybook/migrations/versions/aa328ae9dced_add_github_datadoc_link.py b/querybook/migrations/versions/aa328ae9dced_add_github_datadoc_link.py new file mode 100644 index 000000000..522d9df9d --- /dev/null +++ b/querybook/migrations/versions/aa328ae9dced_add_github_datadoc_link.py @@ -0,0 +1,56 @@ +"""Add GitHub Datadoc Link + +Revision ID: aa328ae9dced +Revises: f7b11b3e3a95 +Create Date: 2024-10-23 21:04:55.052696 + +""" + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "aa328ae9dced" +down_revision = "f7b11b3e3a95" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "github_link", + sa.Column("id", sa.Integer(), autoincrement=True, nullable=False), + sa.Column("datadoc_id", sa.Integer(), nullable=False), + sa.Column("user_id", sa.Integer(), nullable=False), + sa.Column( + "directory", + sa.String(length=255), + nullable=False, + server_default="datadocs", + ), + sa.Column( + "created_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False + ), + sa.Column( + "updated_at", sa.DateTime(), server_default=sa.text("now()"), nullable=False + ), + sa.ForeignKeyConstraint( + ["datadoc_id"], + ["data_doc.id"], + ), + sa.ForeignKeyConstraint( + ["user_id"], + ["user.id"], + ), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("datadoc_id"), + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table("github_link") + # ### end Alembic commands ### diff --git a/querybook/server/clients/github_client.py b/querybook/server/clients/github_client.py new file mode 100644 index 000000000..1dc726533 --- /dev/null +++ b/querybook/server/clients/github_client.py @@ -0,0 +1,178 @@ +from github import Github, GithubException, Auth +from typing import List, Dict, Optional + +from lib.github.serializers import ( + deserialize_datadoc_from_markdown, + serialize_datadoc_to_markdown, +) +from lib.logger import get_logger +from models.datadoc import DataDoc +from models.github import GitHubLink + +LOG = get_logger(__name__) + + +class GitHubClient: + def __init__( + self, + access_token: str, + repo_name: str, + branch: str, + github_link: Optional[GitHubLink] = None, + ): + """ + Initialize the GitHub client with an access token. + Args: + access_token (str): The GitHub access token. + repo_name (str): The GitHub repository name. + branch (str): The branch name. + github_link (Optional[GitHubLink]): The GitHub link object. + """ + self.github_link = github_link + self.branch = branch + auth = Auth.Token(access_token) + self.client = Github(auth=auth, per_page=5) + self.user = self.client.get_user() + self.repo = self._get_repository(repo_name) + self.file_path = self._build_file_path() if github_link else None + + def _get_repository(self, repo_name: str): + if not repo_name: + LOG.error("Repository name is required") + raise Exception("Repository name is required") + return self.client.get_repo(repo_name) + + def _build_file_path(self) -> str: + directory = self.github_link.directory + file_name = f"datadoc_{self.github_link.datadoc.id}.md" + return f"{directory}/{file_name}" + + def commit_datadoc(self, commit_message: Optional[str] = None) -> None: + """ + Commit a DataDoc to the repository. + Args: + commit_message (Optional[str]): Commit message. Defaults to a standard message. + Raises: + Exception: If committing the DataDoc fails. + """ + if not self.github_link: + raise Exception("GitHub link is required for this operation") + + datadoc = self.github_link.datadoc + content = serialize_datadoc_to_markdown(datadoc) + if not commit_message: + commit_message = ( + f"Update DataDoc {datadoc.id}: {datadoc.title or 'Untitled'}" + ) + + try: + contents = self.repo.get_contents(self.file_path, ref=self.branch) + self._update_file(contents, content, commit_message) + LOG.info(f"Updated file {self.file_path} in repository.") + except GithubException as e: + if e.status == 404: + self._create_file(content, commit_message) + LOG.info(f"Created file {self.file_path} in repository.") + else: + LOG.error(f"GitHubException during commit: {e}") + raise Exception(f"Failed to commit DataDoc: {e}") + + def _update_file(self, contents, content: str, commit_message: str) -> None: + """ + Update an existing file in the repository. + Args: + contents: The current contents of the file. + content (str): New content for the file. + commit_message (str): Commit message. + Raises: + Exception: If updating the file fails. + """ + try: + self.repo.update_file( + path=contents.path, + message=commit_message, + content=content, + sha=contents.sha, + branch=self.branch, + ) + except GithubException as e: + LOG.error(f"Error updating file {self.file_path}: {e}") + raise Exception(f"Failed to update DataDoc: {e}") + + def _create_file(self, content: str, commit_message: str) -> None: + """ + Create a new file in the repository. + Args: + content (str): Content for the new file. + commit_message (str): Commit message. + Raises: + Exception: If creating the file fails. + """ + try: + self.repo.create_file( + path=self.file_path, + message=commit_message, + content=content, + branch=self.branch, + ) + except GithubException as e: + LOG.error(f"Error creating file {self.file_path}: {e}") + raise Exception(f"Failed to create DataDoc: {e}") + + def get_datadoc_versions(self, page: int = 1) -> List[Dict]: + """ + Get the versions of a DataDoc with pagination. + Args: + page (int): Page number. + Returns: + List[Dict]: A list of commit dictionaries. + """ + if not self.github_link: + raise Exception("GitHub link is required for this operation") + + try: + commits = self.repo.get_commits( + path=self.file_path, + sha=self.branch, + ).get_page(page - 1) + return [commit.raw_data for commit in commits] + except GithubException as e: + LOG.error(f"GitHubException during get_datadoc_versions: {e}") + return [] + + def get_datadoc_at_commit(self, commit_sha: str) -> DataDoc: + """ + Get a DataDoc at a specific commit. + Args: + commit_sha (str): The commit SHA. + Returns: + DataDoc: The DataDoc object at the specified commit. + Raises: + Exception: If getting the DataDoc at the commit fails. + """ + if not self.github_link: + raise Exception("GitHub link is required for this operation") + + try: + file_contents = self.repo.get_contents(path=self.file_path, ref=commit_sha) + content = file_contents.decoded_content.decode("utf-8") + return deserialize_datadoc_from_markdown(content) + except GithubException as e: + LOG.error(f"GitHubException during get_datadoc_at_commit: {e}") + raise Exception(f"Failed to get DataDoc at commit {commit_sha}: {e}") + + def get_repo_directories(self) -> List[str]: + """ + Get all directories in the repository. + Returns: + List[str]: A list of directory names. + """ + try: + contents = self.repo.get_contents("") + directories = [ + content.path for content in contents if content.type == "dir" + ] + return directories + except GithubException as e: + LOG.error(f"GitHubException during get_directories: {e}") + return [] diff --git a/querybook/server/datasources/__init__.py b/querybook/server/datasources/__init__.py index 0ba28d65f..bad09b4ef 100644 --- a/querybook/server/datasources/__init__.py +++ b/querybook/server/datasources/__init__.py @@ -18,6 +18,8 @@ from . import comment from . import survey from . import query_transform +from . import github + # Keep this at the end of imports to make sure the plugin APIs override the default ones try: @@ -47,3 +49,4 @@ survey query_transform api_plugin +github diff --git a/querybook/server/datasources/github.py b/querybook/server/datasources/github.py new file mode 100644 index 000000000..3dd607136 --- /dev/null +++ b/querybook/server/datasources/github.py @@ -0,0 +1,148 @@ +from functools import wraps +from app.datasource import api_assert, register +from clients.github_client import GitHubClient +from env import QuerybookSettings +from lib.github.github import github_manager +from typing import Dict, List, Optional +from lib.github.serializers import serialize_datadoc_to_markdown +from logic import github as logic +from logic import datadoc as datadoc_logic +from const.datasources import RESOURCE_NOT_FOUND_STATUS_CODE +from logic.datadoc_permission import assert_can_read, assert_can_write +from app.auth.permission import verify_data_doc_permission +from flask_login import current_user + + +def with_github_client(f): + @wraps(f) + def decorated_function(*args, **kwargs): + datadoc_id = kwargs.get("datadoc_id") + github_link = logic.get_repo_link(datadoc_id) + access_token = github_manager.get_github_token() + github_client = GitHubClient( + github_link=github_link, + access_token=access_token, + repo_name=QuerybookSettings.GITHUB_REPO_NAME, + branch=QuerybookSettings.GITHUB_BRANCH, + ) + return f(github_client, *args, **kwargs) + + return decorated_function + + +@register("/github/auth/", methods=["GET"]) +def connect_github() -> Dict[str, str]: + return github_manager.initiate_github_integration() + + +@register("/github/is_authorized/", methods=["GET"]) +def is_github_authorized() -> Dict[str, bool]: + try: + github_manager.get_github_token() + is_authorized = True + except Exception: + is_authorized = False + return {"is_authorized": is_authorized} + + +@register("/github/datadocs//link/", methods=["POST"]) +def link_datadoc_to_github( + datadoc_id: int, + directory: str, +) -> Dict: + datadoc = datadoc_logic.get_data_doc_by_id(datadoc_id) + api_assert( + datadoc is not None, + "DataDoc not found", + status_code=RESOURCE_NOT_FOUND_STATUS_CODE, + ) + assert_can_write(datadoc_id) + verify_data_doc_permission(datadoc_id) + + github_link = logic.create_repo_link( + datadoc_id=datadoc_id, user_id=current_user.id, directory=directory + ) + return github_link.to_dict() + + +@register("/github/datadocs//is_linked/", methods=["GET"]) +def is_datadoc_linked(datadoc_id: int) -> Dict[str, Optional[str]]: + datadoc = datadoc_logic.get_data_doc_by_id(datadoc_id) + api_assert( + datadoc is not None, + "DataDoc not found", + status_code=RESOURCE_NOT_FOUND_STATUS_CODE, + ) + assert_can_read(datadoc_id) + verify_data_doc_permission(datadoc_id) + + github_link = logic.get_repo_link(datadoc_id) + return {"linked_directory": github_link.directory if github_link else None} + + +@register("/github/datadocs//directories/", methods=["GET"]) +@with_github_client +def get_github_directories( + github_client: GitHubClient, datadoc_id: int +) -> Dict[str, List[str]]: + assert_can_read(datadoc_id) + verify_data_doc_permission(datadoc_id) + directories = github_client.get_repo_directories() + return {"directories": directories} + + +@register("/github/datadocs//commit/", methods=["POST"]) +@with_github_client +def commit_datadoc( + github_client: GitHubClient, + datadoc_id: int, + commit_message: Optional[str] = None, +) -> Dict: + assert_can_write(datadoc_id) + verify_data_doc_permission(datadoc_id) + github_client.commit_datadoc(commit_message=commit_message) + return {"message": "DataDoc committed successfully"} + + +@register("/github/datadocs//versions/", methods=["GET"]) +@with_github_client +def get_datadoc_versions( + github_client: GitHubClient, datadoc_id: int, limit: int = 5, offset: int = 0 +) -> List[Dict]: + assert_can_read(datadoc_id) + verify_data_doc_permission(datadoc_id) + page = offset // limit + 1 + versions = github_client.get_datadoc_versions(page=page) + return versions + + +@register("/github/datadocs//compare/", methods=["GET"]) +@with_github_client +def compare_datadoc_versions( + github_client: GitHubClient, datadoc_id: int, commit_sha: str +) -> Dict: + """ + Compare the current DataDoc with a specific commit. + """ + assert_can_read(datadoc_id) + verify_data_doc_permission(datadoc_id) + current_datadoc = datadoc_logic.get_data_doc_by_id(datadoc_id) + api_assert( + current_datadoc is not None, + "Current DataDoc not found", + status_code=RESOURCE_NOT_FOUND_STATUS_CODE, + ) + current_markdown = serialize_datadoc_to_markdown( + current_datadoc, exclude_metadata=True + ) + + # Get the DataDoc content at the specified commit and re-serialize with metadata excluded + commit_datadoc = github_client.get_datadoc_at_commit(commit_sha) + commit_markdown = serialize_datadoc_to_markdown( + commit_datadoc, exclude_metadata=True + ) + + return { + "current_content": current_markdown, + "commit_content": commit_markdown, + } diff --git a/querybook/server/datasources_socketio/datadoc.py b/querybook/server/datasources_socketio/datadoc.py index 9397162bd..d7455cd9a 100644 --- a/querybook/server/datasources_socketio/datadoc.py +++ b/querybook/server/datasources_socketio/datadoc.py @@ -216,6 +216,17 @@ def update_data_doc(id, fields): datadoc_collab.update_datadoc(id, fields, sid=request.sid) +@register_socket("restore_data_doc", namespace=DATA_DOC_NAMESPACE) +@data_doc_socket +def restore_data_doc(datadoc_id: int, commit_sha: str, commit_message: str): + datadoc_collab.restore_data_doc( + datadoc_id=datadoc_id, + commit_sha=commit_sha, + commit_message=commit_message, + sid=request.sid, + ) + + @register_socket("update_data_cell", namespace=DATA_DOC_NAMESPACE) @data_doc_socket def update_data_cell(doc_id, cell_id, fields): diff --git a/querybook/server/env.py b/querybook/server/env.py index 2128717a3..088edff76 100644 --- a/querybook/server/env.py +++ b/querybook/server/env.py @@ -156,3 +156,10 @@ class QuerybookSettings(object): VECTOR_STORE_CONFIG = get_env_config("VECTOR_STORE_CONFIG") or {} EMBEDDINGS_PROVIDER = get_env_config("EMBEDDINGS_PROVIDER") EMBEDDINGS_CONFIG = get_env_config("EMBEDDINGS_CONFIG") or {} + + # GitHub Integration + GITHUB_CLIENT_ID = get_env_config("GITHUB_CLIENT_ID") + GITHUB_CLIENT_SECRET = get_env_config("GITHUB_CLIENT_SECRET") + GITHUB_REPO_NAME = get_env_config("GITHUB_REPO_NAME") + GITHUB_BRANCH = get_env_config("GITHUB_BRANCH") + GITHUB_CRYPTO_SECRET = get_env_config("GITHUB_CRYPTO_SECRET") diff --git a/querybook/server/lib/github/__init__.py b/querybook/server/lib/github/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/querybook/server/lib/github/github.py b/querybook/server/lib/github/github.py new file mode 100644 index 000000000..055a93745 --- /dev/null +++ b/querybook/server/lib/github/github.py @@ -0,0 +1,126 @@ +import certifi +from flask import session as flask_session, request +from github import Github, Auth +from app.auth.github_auth import GitHubLoginManager +from app.auth.utils import AuthenticationError +from env import QuerybookSettings +from lib.logger import get_logger +from app.flask_app import flask_app +from typing import Optional, Dict, Any +from flask_login import current_user +from ..utils.token_utils import TokenManager + +LOG = get_logger(__file__) + +GITHUB_OAUTH_CALLBACK = "/github/oauth2callback" +GITHUB_ACCESS_TOKEN = "github_access_token" +OAUTH_STATE_KEY = "oauth_state" + + +class GitHubManager(GitHubLoginManager): + def __init__( + self, + additional_scopes: Optional[list] = None, + client_id: Optional[str] = None, + client_secret: Optional[str] = None, + ): + self.additional_scopes = additional_scopes or [] + self._client_id = client_id + self._client_secret = client_secret + super().__init__() + + self.token_manager = TokenManager( + token_type=GITHUB_ACCESS_TOKEN, + encryption_key=QuerybookSettings.GITHUB_CRYPTO_SECRET, + ) + + @property + def oauth_config(self) -> Dict[str, Any]: + config = super().oauth_config + config["scope"] = "user email " + " ".join(self.additional_scopes) + config[ + "callback_url" + ] = f"{QuerybookSettings.PUBLIC_URL}{GITHUB_OAUTH_CALLBACK}" + if self._client_id: + config["client_id"] = self._client_id + if self._client_secret: + config["client_secret"] = self._client_secret + return config + + def save_github_token(self, token: str) -> None: + self.token_manager.save_token(current_user.id, token) + + def get_github_token(self) -> str: + token = self.token_manager.get_token(current_user.id) + return self.validate_token(token) + + def validate_token(self, token: str) -> str: + try: + auth = Auth.Token(token) + github_client = Github(auth=auth) + github_user = github_client.get_user() + if github_user and github_user.login: + LOG.debug(f"Validated GitHub token for user: {github_user.login}") + return token + else: + LOG.error("GitHub token validation failed: User login not found") + self.token_manager.invalidate_token(current_user.id) + raise AuthenticationError("GitHub token validation failed.") + except Exception as e: + LOG.error(f"GitHub API error during token validation: {e}") + raise AuthenticationError("GitHub API error during token validation.") + + def initiate_github_integration(self) -> Dict[str, str]: + github = self.oauth_session + authorization_url, state = github.authorization_url( + self.oauth_config["authorization_url"] + ) + flask_session[OAUTH_STATE_KEY] = state + return {"url": authorization_url} + + def github_integration_callback(self) -> str: + try: + github = self.oauth_session + + github_state = flask_session.pop(OAUTH_STATE_KEY, None) + # Validate the state parameter to protect against CSRF attacks + if github_state is None or github_state != request.args.get("state"): + raise AuthenticationError("Invalid state parameter") + + access_token = github.fetch_token( + self.oauth_config["token_url"], + client_secret=self.oauth_config["client_secret"], + authorization_response=request.url, + cert=certifi.where(), + ) + token = access_token["access_token"] + self.save_github_token(token) + return self.success_response() + except Exception as e: + LOG.error(f"Failed to obtain credentials: {e}") + return self.error_response(str(e)) + + def success_response(self) -> str: + return """ +

Success! Please close the tab.

+ + """ + + def error_response(self, error_message: str) -> str: + return f""" +

Failed to obtain credentials, reason: {error_message}

+ """ + + +github_manager = GitHubManager( + additional_scopes=["repo"], + client_id=QuerybookSettings.GITHUB_CLIENT_ID, + client_secret=QuerybookSettings.GITHUB_CLIENT_SECRET, +) + + +@flask_app.route(GITHUB_OAUTH_CALLBACK) +def github_callback() -> str: + return github_manager.github_integration_callback() diff --git a/querybook/server/lib/github/serializers.py b/querybook/server/lib/github/serializers.py new file mode 100644 index 000000000..85507c189 --- /dev/null +++ b/querybook/server/lib/github/serializers.py @@ -0,0 +1,234 @@ +import yaml +import re +from typing import List, Tuple, Optional +from models.datadoc import DataDoc, DataCell +from const.data_doc import DataCellType +from datetime import datetime, timezone + + +def parse_datetime_as_utc(date_str: Optional[str]) -> datetime: + """ + Parse the given date string to a datetime object in UTC. + """ + if isinstance(date_str, datetime): + return date_str.astimezone(timezone.utc) + if date_str: + return datetime.fromisoformat(date_str).astimezone(timezone.utc) + return datetime.now(timezone.utc).replace(tzinfo=timezone.utc) + + +def serialize_datadoc_to_markdown( + datadoc: DataDoc, exclude_metadata: bool = False +) -> str: + """ + Serialize a DataDoc instance to a Markdown string with YAML front matter. + """ + markdown_parts = [] + + if not exclude_metadata: + datadoc_metadata = { + "id": datadoc.id, + "environment_id": datadoc.environment_id, + "public": datadoc.public, + "archived": datadoc.archived, + "owner_uid": datadoc.owner_uid, + "created_at": ( + datadoc.created_at.isoformat() if datadoc.created_at else None + ), + "updated_at": ( + datadoc.updated_at.isoformat() if datadoc.updated_at else None + ), + "meta": datadoc.meta, + "title": datadoc.title, + } + try: + front_matter = ( + f"---\n{yaml.dump(datadoc_metadata, default_flow_style=False)}---\n\n" + ) + markdown_parts.append(front_matter) + + except yaml.YAMLError as e: + raise ValueError(f"Error serializing DataDoc metadata to YAML: {e}") + + title = f"# {datadoc.title}\n\n" + markdown_parts.append(title) + + content = serialize_datacells( + cells=datadoc.cells, exclude_metadata=exclude_metadata + ) + markdown_parts.append(content) + + return "".join(markdown_parts) + + +def serialize_datacells(cells: List[DataCell], exclude_metadata: bool = False) -> str: + """ + Serialize a list of DataCell instances to a Markdown string. + """ + cell_strings = [] + for cell in cells: + cell_content = serialize_cell_content( + cell=cell, exclude_metadata=exclude_metadata + ) + + if not exclude_metadata: + # Since GitHub's Markdown renderer does not recognize multiple --- blocks as separate YAML sections, + # we serialize cell metadata in HTML comment to hide it from rendered view + cell_metadata = { + "id": cell.id, + "cell_type": cell.cell_type.name.lower(), + "created_at": cell.created_at.isoformat() if cell.created_at else None, + "updated_at": cell.updated_at.isoformat() if cell.updated_at else None, + "meta": cell.meta, + } + try: + cell_metadata_yaml = yaml.dump(cell_metadata, default_flow_style=False) + except yaml.YAMLError as e: + raise ValueError(f"Error serializing cell metadata to YAML: {e}") + + cell_metadata_comment = f"\n" + cell_strings.append(cell_metadata_comment + cell_content) + else: + cell_strings.append(cell_content) + + return "\n\n".join(cell_strings) + + +def serialize_cell_content(cell: DataCell, exclude_metadata: bool = False) -> str: + """ + Serialize a single DataCell instance to a Markdown string based on its type + """ + cell_meta = cell.meta or {} + + if cell.cell_type == DataCellType.query: + query_title = cell_meta.get("title", "Query") + header = f"## Query: {query_title}\n\n" + if exclude_metadata: # Exclude code fences + content = f"{cell.context.strip()}\n" + else: + content = f"```sql\n{cell.context.strip()}\n```\n" + return header + content + + elif cell.cell_type == DataCellType.text: + header = "## Text\n\n" + content = f"{cell.context.strip()}\n" + return header + content + + elif cell.cell_type == DataCellType.chart: + header = "## Chart\n\n" + content = "*Chart generated from the metadata.*\n" + return header + content + + else: + raise ValueError(f"Unknown cell type: {cell.cell_type}") + + +def deserialize_datadoc_from_markdown(markdown_str: str) -> DataDoc: + """ + Deserialize a Markdown string to a DataDoc instance. + """ + front_matter, content = extract_front_matter(markdown_str) + datadoc = create_datadoc_from_metadata(front_matter) + datadoc.cells = deserialize_datadoc_content(content) + return datadoc + + +def extract_front_matter(markdown_str: str) -> Tuple[dict, str]: + """ + Extract YAML front matter and the remaining content from the markdown string. + """ + front_matter_pattern = re.compile(r"^---\n(.*?)\n---\n\n", re.DOTALL) + match = front_matter_pattern.match(markdown_str) + if match: + front_matter_str = match.group(1) + content = markdown_str[match.end() :] + try: + front_matter = yaml.safe_load(front_matter_str) + except yaml.YAMLError as e: + raise ValueError(f"Error parsing front matter YAML: {e}") + else: + raise ValueError("Invalid Markdown format: Missing front matter.") + return front_matter, content + + +def create_datadoc_from_metadata(metadata: dict) -> DataDoc: + """ + Create a DataDoc instance from metadata dictionary. + """ + datadoc = DataDoc( + id=metadata.get("id"), + environment_id=metadata.get("environment_id"), + public=metadata.get("public", True), + archived=metadata.get("archived", False), + owner_uid=metadata.get("owner_uid"), + created_at=parse_datetime_as_utc(metadata.get("created_at")), + updated_at=parse_datetime_as_utc(metadata.get("updated_at")), + title=metadata.get("title", ""), + ) + datadoc.meta = metadata.get("meta", {}) + return datadoc + + +def deserialize_datadoc_content(content_str: str) -> List[DataCell]: + """ + Deserialize the content part of the markdown into a list of DataCell instances. + Handles Query, Text, and Chart cell types. + """ + cells = [] + # Split the content by the HTML comment markers. Each cell starts with + cell_blocks = re.split(r"\n", content_str, flags=re.DOTALL) + + # The first split item is the title, skip it + cell_blocks = cell_blocks[1:] + + metadata_blocks = re.findall(r"", content_str, flags=re.DOTALL) + + if len(cell_blocks) != len(metadata_blocks): + raise ValueError("Mismatch between metadata and cell content blocks.") + + for metadata_str, cell_content in zip(metadata_blocks, cell_blocks): + try: + metadata = yaml.safe_load(metadata_str) + except yaml.YAMLError as e: + raise ValueError(f"Error parsing cell metadata YAML: {e}") + + cell_type = metadata.get("cell_type", "").lower() + try: + cell_type_enum = DataCellType[cell_type] + except KeyError: + raise ValueError(f"Unknown cell_type: {cell_type}") + + # Determine the cell content based on cell type + if cell_type_enum == DataCellType.query: + # Extract the SQL code block + sql_pattern = re.compile( + r"## Query: [^\n]+\n\n```sql\n([\s\S]*?)\n```", re.DOTALL + ) + match = sql_pattern.search(cell_content) + if not match: + raise ValueError("Query cell missing SQL code block.") + context = match.group(1).strip() + elif cell_type_enum == DataCellType.text: + # Extract text content + text_pattern = re.compile(r"## Text\n\n([\s\S]+)", re.DOTALL) + match = text_pattern.search(cell_content) + if not match: + raise ValueError("Text cell missing content.") + context = match.group(1).strip() + elif cell_type_enum == DataCellType.chart: + # Chart cells have no context since they're created via metadata + context = None + else: + raise ValueError(f"Unsupported cell type: {cell_type_enum}") + + cell = DataCell( + id=metadata.get("id"), + cell_type=cell_type_enum, + context=context if cell_type_enum != DataCellType.chart else None, + created_at=parse_datetime_as_utc(metadata.get("created_at")), + updated_at=parse_datetime_as_utc(metadata.get("updated_at")), + meta=metadata.get("meta", {}), + ) + cells.append(cell) + + return cells diff --git a/querybook/server/lib/utils/crypto_utils.py b/querybook/server/lib/utils/crypto_utils.py new file mode 100644 index 000000000..4aae1b24e --- /dev/null +++ b/querybook/server/lib/utils/crypto_utils.py @@ -0,0 +1,36 @@ +from cryptography.fernet import Fernet +from lib.logger import get_logger + +LOG = get_logger(__file__) + + +class EncryptionError(Exception): + pass + + +class DecryptionError(Exception): + pass + + +def get_cipher(encryption_key: bytes) -> Fernet: + return Fernet(encryption_key) + + +def encrypt_token(token: str, encryption_key: bytes) -> str: + try: + cipher = get_cipher(encryption_key) + encrypted_token = cipher.encrypt(token.encode()) + return encrypted_token.decode() + except Exception as e: + LOG.error(f"Encryption failed: {e}") + raise EncryptionError("Failed to encrypt token") + + +def decrypt_token(encrypted_token: str, encryption_key: bytes) -> str: + try: + cipher = get_cipher(encryption_key) + decrypted_token = cipher.decrypt(encrypted_token.encode()) + return decrypted_token.decode() + except Exception as e: + LOG.error(f"Token decryption failed: {e}") + raise DecryptionError("Failed to decrypt token") diff --git a/querybook/server/lib/utils/token_utils.py b/querybook/server/lib/utils/token_utils.py new file mode 100644 index 000000000..92bfd12d5 --- /dev/null +++ b/querybook/server/lib/utils/token_utils.py @@ -0,0 +1,58 @@ +from logic.user import get_user_by_id, update_user_properties +from .crypto_utils import encrypt_token, decrypt_token, DecryptionError +from app.auth.utils import AuthenticationError +from lib.logger import get_logger + +LOG = get_logger(__file__) + + +class TokenManager: + def __init__(self, token_type: str, encryption_key: str): + """ + Initializes the TokenManager with a token type and its encryption key. + + Args: + token_type (str): The key under which the token is stored (e.g., 'github_access_token'). + encryption_key (str): The encryption key as a string. + """ + self.token_type = token_type + self.encryption_key = encryption_key.encode() + + def save_token(self, user_id: int, token: str) -> None: + encrypted_token = encrypt_token(token, self.encryption_key) + try: + update_user_properties(user_id, **{self.token_type: encrypted_token}) + LOG.debug( + f"Saved encrypted token '{self.token_type}' for user ID {user_id}" + ) + except Exception as e: + LOG.error(f"Failed to update user properties: {e}") + raise AuthenticationError(f"Failed to save token '{self.token_type}'") + + def get_token(self, user_id: int) -> str: + user = get_user_by_id(user_id) + if not user: + LOG.error(f"User not found when retrieving '{self.token_type}' token") + raise AuthenticationError("User not found") + + encrypted_token = user.properties.get(self.token_type) + if not encrypted_token: + LOG.error(f"Token '{self.token_type}' not found in user properties") + raise AuthenticationError(f"Token '{self.token_type}' not found") + + try: + token = decrypt_token(encrypted_token, self.encryption_key) + return token + except DecryptionError as e: + LOG.error(f"Failed to decrypt token '{self.token_type}': {e}") + self.invalidate_token(user_id) + raise AuthenticationError(f"Invalid token '{self.token_type}'") + + def invalidate_token(self, user_id: int): + try: + update_user_properties(user_id, **{self.token_type: None}) + LOG.debug(f"Removed token '{self.token_type}' for user ID {user_id}") + except Exception as e: + LOG.error( + f"Failed to remove token '{self.token_type}' for user ID {user_id}: {e}" + ) diff --git a/querybook/server/logic/datadoc.py b/querybook/server/logic/datadoc.py index 37ff2ddbf..ec7c15c03 100644 --- a/querybook/server/logic/datadoc.py +++ b/querybook/server/logic/datadoc.py @@ -248,6 +248,59 @@ def clone_data_doc(id, owner_uid, commit=True, session=None): return new_data_doc +@with_session +def restore_data_doc_from_commit( + datadoc_id: int, commit_datadoc: DataDoc, commit=True, session=None +) -> DataDoc: + data_doc = get_data_doc_by_id(datadoc_id, session=session) + assert data_doc is not None, "DataDoc not found" + + # Update the DataDoc's title and meta + data_doc = update_data_doc( + datadoc_id, + title=commit_datadoc.title, + meta=commit_datadoc.meta, + commit=False, + session=session, + ) + + # Delete existing DataDocCells and DataCells + for existing_cell in data_doc.cells: + delete_data_doc_cell( + data_doc_id=data_doc.id, + data_cell_id=existing_cell.id, + commit=False, + session=session, + ) + + # Create new DataCells from commit and add them to the DataDoc + for index, cell in enumerate(commit_datadoc.cells): + data_cell = create_data_cell( + cell_type=cell.cell_type.name, + context=cell.context, + meta=cell.meta, + commit=False, + session=session, + ) + insert_data_doc_cell( + data_doc_id=data_doc.id, + cell_id=data_cell.id, + index=index, + commit=False, + session=session, + ) + + if commit: + session.commit() + update_es_data_doc_by_id(data_doc.id) + update_es_queries_by_datadoc_id(data_doc.id) + else: + session.flush() + + session.refresh(data_doc) + return data_doc + + """ ---------------------------------------------------------------------------------------------------------- DATA CELL diff --git a/querybook/server/logic/datadoc_collab.py b/querybook/server/logic/datadoc_collab.py index 8e1154548..d384a40ed 100644 --- a/querybook/server/logic/datadoc_collab.py +++ b/querybook/server/logic/datadoc_collab.py @@ -4,9 +4,13 @@ ) from app.flask_app import socketio from app.db import with_session +from clients.github_client import GitHubClient from const.data_doc import DATA_DOC_NAMESPACE +from datasources.github import with_github_client from logic import datadoc as logic +from logic import user as user_logic from logic.datadoc_permission import assert_can_read, assert_can_write +from flask_login import current_user @with_session @@ -43,6 +47,41 @@ def update_datadoc(doc_id, fields, sid="", session=None): return doc_dict +@with_session +@with_github_client +def restore_data_doc( + github_client: GitHubClient, + datadoc_id: int, + commit_sha: str, + commit_message: str, + sid="", + session=None, +): + assert_can_write(datadoc_id, session=session) + verify_data_doc_permission(datadoc_id, session=session) + + commit_datadoc = github_client.get_datadoc_at_commit(commit_sha) + restored_datadoc = logic.restore_data_doc_from_commit( + datadoc_id, commit_datadoc, commit=True, session=session + ) + + user = user_logic.get_user_by_id(current_user.id, session=session) + assert user is not None, "User does not exist" + + # Emit the restored DataDoc to clients + socketio.emit( + "data_doc_restored", + ( + sid, + restored_datadoc.to_dict(with_cells=True), + commit_message, + user.get_name(), + ), + namespace=DATA_DOC_NAMESPACE, + room=datadoc_id, + ) + + @with_session def insert_data_cell( doc_id, index, cell_type, context=None, meta=None, sid="", session=None diff --git a/querybook/server/logic/github.py b/querybook/server/logic/github.py new file mode 100644 index 000000000..d56945225 --- /dev/null +++ b/querybook/server/logic/github.py @@ -0,0 +1,40 @@ +from app.db import with_session +from models.github import GitHubLink +from models.datadoc import DataDoc + + +@with_session +def create_repo_link( + datadoc_id: int, + user_id: int, + directory: str, + commit=True, + session=None, +): + datadoc = DataDoc.get(id=datadoc_id, session=session) + assert datadoc is not None, f"DataDoc with id {datadoc_id} not found" + + github_link = GitHubLink.get(datadoc_id=datadoc_id, session=session) + if github_link is None: + github_link = GitHubLink.create( + { + "datadoc_id": datadoc_id, + "user_id": user_id, + "directory": directory, + }, + commit=commit, + session=session, + ) + else: + github_link = GitHubLink.update( + id=github_link.id, + fields={"directory": directory}, + commit=commit, + session=session, + ) + return github_link + + +@with_session +def get_repo_link(datadoc_id: int, session=None): + return GitHubLink.get(datadoc_id=datadoc_id, session=session) diff --git a/querybook/server/models/__init__.py b/querybook/server/models/__init__.py index cf3dce9f2..6550df625 100644 --- a/querybook/server/models/__init__.py +++ b/querybook/server/models/__init__.py @@ -15,3 +15,4 @@ from .data_element import * from .comment import * from .survey import * +from .github import * diff --git a/querybook/server/models/github.py b/querybook/server/models/github.py new file mode 100644 index 000000000..9405c3fcb --- /dev/null +++ b/querybook/server/models/github.py @@ -0,0 +1,37 @@ +import sqlalchemy as sql +from sqlalchemy.sql import func +from lib.sqlalchemy import CRUDMixin +from sqlalchemy.orm import backref, relationship +from app import db + +Base = db.Base + + +class GitHubLink(Base, CRUDMixin): + __tablename__ = "github_link" + id = sql.Column(sql.Integer, primary_key=True, autoincrement=True) + datadoc_id = sql.Column( + sql.Integer, sql.ForeignKey("data_doc.id"), nullable=False, unique=True + ) + user_id = sql.Column(sql.Integer, sql.ForeignKey("user.id"), nullable=False) + directory = sql.Column(sql.String(255), nullable=False, default="datadocs") + created_at = sql.Column(sql.DateTime, server_default=func.now(), nullable=False) + updated_at = sql.Column( + sql.DateTime, server_default=func.now(), onupdate=func.now(), nullable=False + ) + + datadoc = relationship( + "DataDoc", + backref=backref("github_link", uselist=False, cascade="all, delete-orphan"), + ) + user = relationship("User", backref=backref("github_link", uselist=False)) + + def to_dict(self): + return { + "id": self.id, + "datadoc_id": self.datadoc_id, + "user_id": self.user_id, + "directory": self.directory, + "created_at": self.created_at, + "updated_at": self.updated_at, + } diff --git a/querybook/tests/test_lib/test_github_integration/test_github_client.py b/querybook/tests/test_lib/test_github_integration/test_github_client.py new file mode 100644 index 000000000..b05c93a23 --- /dev/null +++ b/querybook/tests/test_lib/test_github_integration/test_github_client.py @@ -0,0 +1,176 @@ +import pytest +from unittest.mock import MagicMock +from clients.github_client import GitHubClient +from models.datadoc import DataDoc +from models.github import GitHubLink +from github import GithubException + + +@pytest.fixture +def mock_github(monkeypatch): + mock_github = MagicMock() + monkeypatch.setattr("clients.github_client.Github", mock_github) + return mock_github + + +@pytest.fixture +def mock_github_link(): + datadoc = DataDoc(id=1, title="Test Doc", cells=[]) + return GitHubLink( + datadoc=datadoc, + user_id=1, + directory="datadocs", + ) + + +@pytest.fixture +def mock_repo(): + return MagicMock() + + +def test_initialization(mock_github, mock_github_link, mock_repo): + access_token = "fake_token" + repo_name = "test_repo" + branch = "main" + mock_github_instance = mock_github.return_value + mock_github_instance.get_repo.return_value = mock_repo + client = GitHubClient( + access_token=access_token, + repo_name=repo_name, + branch=branch, + github_link=mock_github_link, + ) + assert client.client is not None + assert client.user is not None + assert client.repo is not None + + +def test_commit_datadoc_update(mock_github, mock_github_link, mock_repo): + access_token = "fake_token" + repo_name = "test_repo" + branch = "main" + mock_github_instance = mock_github.return_value + mock_github_instance.get_repo.return_value = mock_repo + mock_repo.get_contents.return_value = MagicMock(sha="fake_sha") + client = GitHubClient( + access_token=access_token, + repo_name=repo_name, + branch=branch, + github_link=mock_github_link, + ) + client.commit_datadoc() + mock_repo.update_file.assert_called_once() + + with pytest.raises(Exception) as excinfo: + client = GitHubClient( + access_token="fake_token", repo_name=repo_name, branch=branch + ) + client.commit_datadoc() + assert "GitHub link is required for this operation" in str(excinfo.value) + + +def test_commit_datadoc_create(mock_github, mock_github_link, mock_repo): + access_token = "fake_token" + repo_name = "test_repo" + branch = "main" + mock_github_instance = mock_github.return_value + mock_github_instance.get_repo.return_value = mock_repo + mock_repo.get_contents.side_effect = GithubException(404, "Not Found", None) + client = GitHubClient( + access_token=access_token, + repo_name=repo_name, + branch=branch, + github_link=mock_github_link, + ) + client.commit_datadoc() + mock_repo.create_file.assert_called_once() + + with pytest.raises(Exception) as excinfo: + client = GitHubClient( + access_token="fake_token", repo_name=repo_name, branch=branch + ) + client.commit_datadoc() + assert "GitHub link is required for this operation" in str(excinfo.value) + + +def test_get_datadoc_versions(mock_github, mock_github_link, mock_repo): + access_token = "fake_token" + repo_name = "test_repo" + branch = "main" + mock_github_instance = mock_github.return_value + mock_github_instance.get_repo.return_value = mock_repo + mock_commit = MagicMock() + mock_commit.raw_data = {"sha": "123"} + mock_commits = MagicMock() + mock_commits.get_page.return_value = [mock_commit] + mock_repo.get_commits.return_value = mock_commits + client = GitHubClient( + access_token=access_token, + repo_name=repo_name, + branch=branch, + github_link=mock_github_link, + ) + versions = client.get_datadoc_versions() + assert len(versions) == 1 + assert versions[0]["sha"] == "123" + + with pytest.raises(Exception) as excinfo: + client = GitHubClient( + access_token="fake_token", repo_name=repo_name, branch=branch + ) + client.get_datadoc_versions() + assert "GitHub link is required for this operation" in str(excinfo.value) + + +def test_get_repo_directories(mock_github, mock_github_link, mock_repo): + access_token = "fake_token" + repo_name = "test_repo" + branch = "main" + mock_github_instance = mock_github.return_value + mock_github_instance.get_repo.return_value = mock_repo + mock_directory = MagicMock() + mock_directory.type = "dir" + mock_directory.path = "datadocs" + mock_repo.get_contents.return_value = [mock_directory] + client = GitHubClient( + access_token=access_token, + repo_name=repo_name, + branch=branch, + github_link=mock_github_link, + ) + directories = client.get_repo_directories() + assert len(directories) == 1 + assert directories[0] == "datadocs" + + +def test_get_datadoc_at_commit(mock_github, mock_github_link, mock_repo): + access_token = "fake_token" + repo_name = "test_repo" + branch = "main" + mock_github_instance = mock_github.return_value + mock_github_instance.get_repo.return_value = mock_repo + mock_file_contents = MagicMock() + mock_file_contents.decoded_content = ( + b"---\nid: 1\ntitle: DataDoc\n---\n\n" + b"\n" + b"## Text\n\nContent\n" + ) + mock_repo.get_contents.return_value = mock_file_contents + client = GitHubClient( + access_token=access_token, + repo_name=repo_name, + branch=branch, + github_link=mock_github_link, + ) + datadoc = client.get_datadoc_at_commit(commit_sha="fake_sha") + assert datadoc.title == "DataDoc" + assert datadoc.id == 1 + assert datadoc.cells[0].context == "Content" + + with pytest.raises(Exception) as excinfo: + client = GitHubClient( + access_token="fake_token", repo_name=repo_name, branch=branch + ) + client.get_datadoc_at_commit(commit_sha="fake_sha") + assert "GitHub link is required for this operation" in str(excinfo.value) diff --git a/querybook/tests/test_lib/test_github_integration/test_serializers.py b/querybook/tests/test_lib/test_github_integration/test_serializers.py new file mode 100644 index 000000000..d12877e51 --- /dev/null +++ b/querybook/tests/test_lib/test_github_integration/test_serializers.py @@ -0,0 +1,174 @@ +import pytest +from const.data_doc import DataCellType +from lib.github.serializers import ( + serialize_datadoc_to_markdown, + deserialize_datadoc_from_markdown, +) +from models.datadoc import DataCell, DataDoc +from datetime import datetime, timezone + + +@pytest.fixture +def mock_datadoc(): + cells = [ + DataCell( + id=1, + cell_type=DataCellType.query, + context="SELECT * FROM table;", + created_at=datetime(2023, 1, 1, 0, 0, 0, tzinfo=timezone.utc), + updated_at=datetime(2023, 1, 1, 0, 0, 0, tzinfo=timezone.utc), + meta={"title": "Sample Query"}, + ), + DataCell( + id=2, + cell_type=DataCellType.text, + context="

This is a text cell with HTML content.

", + created_at=datetime(2023, 1, 1, 0, 0, 0, tzinfo=timezone.utc), + updated_at=datetime(2023, 1, 1, 0, 0, 0, tzinfo=timezone.utc), + meta={}, + ), + DataCell( + id=3, + cell_type=DataCellType.chart, + context=None, # Context is None for chart cells + created_at=datetime(2023, 1, 1, 0, 0, 0, tzinfo=timezone.utc), + updated_at=datetime(2023, 1, 1, 0, 0, 0, tzinfo=timezone.utc), + meta={"chart_type": "line"}, + ), + ] + datadoc = DataDoc( + id=1, + environment_id=1, + public=True, + archived=False, + owner_uid="user1", + created_at=datetime(2023, 1, 1, 0, 0, 0, tzinfo=timezone.utc), + updated_at=datetime(2023, 1, 1, 0, 0, 0, tzinfo=timezone.utc), + title="Test DataDoc", + cells=cells, + ) + datadoc.meta = {"variables": []} + return datadoc + + +def test_serialize_datadoc_to_markdown(mock_datadoc): + expected_markdown = ( + "---\n" + "archived: false\n" + "created_at: '2023-01-01T00:00:00+00:00'\n" + "environment_id: 1\n" + "id: 1\n" + "meta:\n" + " variables: []\n" + "owner_uid: user1\n" + "public: true\n" + "title: Test DataDoc\n" + "updated_at: '2023-01-01T00:00:00+00:00'\n" + "---\n\n" + "# Test DataDoc\n\n" + "\n" + "## Query: Sample Query\n\n" + "```sql\nSELECT * FROM table;\n```\n\n" + "\n" + "## Text\n\n" + "

This is a text cell with HTML content.

\n\n" + "\n" + "## Chart\n\n" + "*Chart generated from the metadata.*\n" + ) + + serialized = serialize_datadoc_to_markdown(mock_datadoc) + + # Normalize line endings and strip trailing spaces for comparison + # Preserve line breaks by joining with '\n' + serialized = "".join([line.rstrip() for line in serialized.strip().splitlines()]) + expected_markdown = "".join( + [line.rstrip() for line in expected_markdown.strip().splitlines()] + ) + assert serialized == expected_markdown + + +def test_deserialize_datadoc_from_markdown(mock_datadoc): + markdown_str = serialize_datadoc_to_markdown(mock_datadoc) + deserialized_datadoc = deserialize_datadoc_from_markdown(markdown_str) + assert deserialized_datadoc.to_dict(with_cells=True) == mock_datadoc.to_dict( + with_cells=True + ) + + +def test_deserialize_with_inner_code_blocks(): + """ + Test deserialization where text/query content contains user written ``` backticks that may interfere with deserialization process. + """ + markdown_str = ( + "---\n" + "archived: false\n" + "created_at: '2023-01-01T00:00:00+00:00'\n" + "environment_id: 1\n" + "id: 2\n" + "meta:\n" + " variables: []\n" + "owner_uid: user1\n" + "public: true\n" + "title: Document with Code Blocks\n" + "updated_at: '2023-01-01T00:00:00+00:00'\n" + "---\n\n" + "# Document with Code Blocks\n\n" + "\n" + "## Text\n\n" + "Here is some text with a code block:\n" + "```python\nprint('Hello, World!')\n```\n\n" + ) + + deserialized = deserialize_datadoc_from_markdown(markdown_str) + expected_datadoc = DataDoc( + id=2, + environment_id=1, + public=True, + archived=False, + owner_uid="user1", + created_at=datetime(2023, 1, 1, tzinfo=timezone.utc), + updated_at=datetime(2023, 1, 1, tzinfo=timezone.utc), + title="Document with Code Blocks", + cells=[ + DataCell( + id=2, + cell_type=DataCellType.text, + context="Here is some text with a code block:\n```python\nprint('Hello, World!')\n```", + created_at=datetime(2023, 1, 1, tzinfo=timezone.utc), + updated_at=datetime(2023, 1, 1, tzinfo=timezone.utc), + meta={}, + ) + ], + ) + expected_datadoc.meta = {"variables": []} + + assert deserialized.to_dict(with_cells=True) == expected_datadoc.to_dict( + with_cells=True + ) diff --git a/querybook/webapp/components/DataDocGitHub/CommitCard.tsx b/querybook/webapp/components/DataDocGitHub/CommitCard.tsx new file mode 100644 index 000000000..2d1257d18 --- /dev/null +++ b/querybook/webapp/components/DataDocGitHub/CommitCard.tsx @@ -0,0 +1,67 @@ +import React from 'react'; + +import { ComponentType, ElementType } from 'const/analytics'; +import { trackClick } from 'lib/analytics'; +import { ICommit } from 'resource/github'; +import { AsyncButton } from 'ui/AsyncButton/AsyncButton'; +import { Button } from 'ui/Button/Button'; +import { Card } from 'ui/Card/Card'; +import { Link } from 'ui/Link/Link'; +import { AccentText, StyledText } from 'ui/StyledText/StyledText'; + +import './GitHub.scss'; + +interface IProps { + version: ICommit; + onRestore: (version: ICommit) => Promise; + onCompare: (version?: ICommit) => void; +} + +export const CommitCard: React.FC = ({ + version, + onRestore, + onCompare, +}) => ( + + + {version.commit.message} + +
+ + Author:{' '} + {version.commit.author.name} + + + Date:{' '} + {new Date(version.commit.author.date).toLocaleString()} + +
+
+ + onRestore(version)} + className="ml8" + title="Restore Version" + hoverColor="var(--color-accent-dark)" + pushable + /> +
+
+); diff --git a/querybook/webapp/components/DataDocGitHub/DataDocGitHubButton.tsx b/querybook/webapp/components/DataDocGitHub/DataDocGitHubButton.tsx new file mode 100644 index 000000000..19abd26df --- /dev/null +++ b/querybook/webapp/components/DataDocGitHub/DataDocGitHubButton.tsx @@ -0,0 +1,41 @@ +import React, { useCallback, useState } from 'react'; + +import { IconButton } from 'ui/Button/IconButton'; + +import { GitHubIntegration } from './GitHubIntegration'; + +interface IProps { + docId: number; +} + +export const DataDocGitHubButton: React.FunctionComponent = ({ + docId, +}) => { + const [isGitHubModalOpen, setIsGitHubModalOpen] = useState(false); + + const handleOpenGitHubModal = useCallback(() => { + setIsGitHubModalOpen(true); + }, []); + + const handleCloseGitHubModal = useCallback(() => { + setIsGitHubModalOpen(false); + }, []); + + return ( + <> + + {isGitHubModalOpen && ( + + )} + + ); +}; diff --git a/querybook/webapp/components/DataDocGitHub/GitHub.scss b/querybook/webapp/components/DataDocGitHub/GitHub.scss new file mode 100644 index 000000000..05ea86cd9 --- /dev/null +++ b/querybook/webapp/components/DataDocGitHub/GitHub.scss @@ -0,0 +1,99 @@ +.GitHubAuth { + text-align: center; +} + +.github-tab-item .flex-center { + display: flex; + align-items: center; +} + +.github-tab-item .flex-center .Icon { + margin-right: 4px; +} + +.feature-disabled { + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; + text-align: center; + color: var(--color-light); +} + +.GitHubVersions { + .commit-list-wrapper { + position: relative; + overflow: visible; + } + + .commit-list { + max-height: 600px; + overflow-y: auto; + } + + .commit-actions { + display: flex; + align-items: center; + } + + .load-more-button { + margin-top: 12px; + margin-bottom: 16px; + } + + .compare-slide-out-panel { + position: absolute; + overflow: visible; + top: 0; + right: 0; + height: 100%; + width: 60%; + background-color: var(--bg); + transition: transform 0.3s ease; + z-index: 1000; + transform: translateX(100%); + + &.open { + transform: translateX(0%); + } + + &.full-screen { + position: fixed; + width: 100% !important; + height: 100% !important; + top: 0; + left: 0; + transform: translateX(0%); + } + + .panel-header { + position: absolute; + top: 0; + z-index: 9; + right: 0; + + .tooltip { + z-index: 1000; + } + } + + .GitHubVersionsComparePanel { + display: flex; + flex-direction: column; + height: 100%; + background-color: var(--bg); + padding: 8px; + box-sizing: border-box; + overflow-y: auto; + } + } +} + +.GitHubSettings { + border-radius: 4px; + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); +} + +.GitHubSettings-section-content { + border-radius: 4px; +} diff --git a/querybook/webapp/components/DataDocGitHub/GitHubAuth.tsx b/querybook/webapp/components/DataDocGitHub/GitHubAuth.tsx new file mode 100644 index 000000000..0cb713aca --- /dev/null +++ b/querybook/webapp/components/DataDocGitHub/GitHubAuth.tsx @@ -0,0 +1,31 @@ +import React from 'react'; + +import { AsyncButton } from 'ui/AsyncButton/AsyncButton'; +import { Icon } from 'ui/Icon/Icon'; +import { Message } from 'ui/Message/Message'; + +import './GitHub.scss'; + +interface IProps { + onAuthorize: () => Promise; +} + +export const GitHubAuth: React.FunctionComponent = ({ + onAuthorize, +}) => ( +
+ + + +
+); diff --git a/querybook/webapp/components/DataDocGitHub/GitHubDirectory.tsx b/querybook/webapp/components/DataDocGitHub/GitHubDirectory.tsx new file mode 100644 index 000000000..31918c522 --- /dev/null +++ b/querybook/webapp/components/DataDocGitHub/GitHubDirectory.tsx @@ -0,0 +1,172 @@ +import { Form, Formik } from 'formik'; +import React, { useCallback, useEffect, useMemo, useState } from 'react'; +import toast from 'react-hot-toast'; +import * as Yup from 'yup'; + +import { GitHubResource } from 'resource/github'; +import { AsyncButton } from 'ui/AsyncButton/AsyncButton'; +import { FormWrapper } from 'ui/Form/FormWrapper'; +import { SimpleField } from 'ui/FormikField/SimpleField'; +import { Icon } from 'ui/Icon/Icon'; +import { Loading } from 'ui/Loading/Loading'; +import { Message } from 'ui/Message/Message'; + +interface IProps { + docId: number; + linkedDirectory?: string | null; + onLinkDirectory: (directory: string) => Promise; +} + +const validationSchema = Yup.object().shape({ + /** + * Regex Examples: + * Valid: + * - parent + * - parent/child + * - parent/child_grandchild + * + * Invalid: + * - parent/ (Trailing slash) + * - parent//child (Consecutive slashes) + * - parent/child# (Invalid character '#') + */ + directory: Yup.string() + .notRequired() + .matches( + /^(?!.*\/$)(?!.*\/\/)[A-Za-z0-9_-]+(?:\/[A-Za-z0-9_-]+)*$/, + 'Invalid directory path. Use letters, numbers, "_", or "-". No trailing or consecutive "/". Example: parent/child' + ), +}); + +const DEFAULT_DIRECTORY = 'datadocs'; + +export const GitHubDirectory: React.FC = ({ + docId, + linkedDirectory, + onLinkDirectory, +}) => { + const [directories, setDirectories] = useState([]); + const [errorMessage, setErrorMessage] = useState(null); + const [isLoading, setIsLoading] = useState(false); + + const fetchDirectories = useCallback(async () => { + setIsLoading(true); + try { + const { data } = await GitHubResource.getDirectories(docId); + setDirectories(data.directories); + } catch (error) { + console.error('Failed to fetch directories:', error); + setErrorMessage('Failed to fetch directories. Please try again.'); + } finally { + setIsLoading(false); + } + }, [docId]); + + useEffect(() => { + fetchDirectories(); + }, [fetchDirectories]); + + const handleSubmit = async (values: { directory: string }) => { + const directory = values.directory || DEFAULT_DIRECTORY; + try { + await onLinkDirectory(directory); + toast.success('Directory linked successfully!'); + } catch (error) { + console.error('Error linking directory:', error); + setErrorMessage('Failed to link directory. Please try again.'); + throw error; + } + }; + + const formContent = ( + + {({ submitForm, isSubmitting, isValid, setFieldValue }) => ( + +
+ ( +
+ {' '} + Create '{inputValue}' directory +
+ )} + onCreateOption={(inputValue) => { + setDirectories((prev) => [...prev, inputValue]); + setFieldValue('directory', inputValue); + }} + onChange={(option) => + setFieldValue('directory', option) + } + help={`Select or create a directory for DataDoc commits. You can input nested directory paths like 'parent/child'. Defaults to ${DEFAULT_DIRECTORY} if left empty.`} + /> +
+ +
+ +
+ )} +
+ ); + + const linkedDirectoryMessage = ( + + ); + const unlinkedDirectoryMessage = ( + + ); + + return ( +
+ {linkedDirectory + ? linkedDirectoryMessage + : unlinkedDirectoryMessage} + {isLoading ? ( + + ) : ( + formContent + )} + {errorMessage && ( + + )} +
+ ); +}; diff --git a/querybook/webapp/components/DataDocGitHub/GitHubFeatures.tsx b/querybook/webapp/components/DataDocGitHub/GitHubFeatures.tsx new file mode 100644 index 000000000..0063ac71f --- /dev/null +++ b/querybook/webapp/components/DataDocGitHub/GitHubFeatures.tsx @@ -0,0 +1,161 @@ +import React, { useCallback, useEffect, useState } from 'react'; + +import { TooltipDirection } from 'const/tooltip'; +import { GitHubResource } from 'resource/github'; +import { Loading } from 'ui/Loading/Loading'; +import { Message } from 'ui/Message/Message'; +import { Tabs } from 'ui/Tabs/Tabs'; + +import { GitHubPush } from './GitHubPush'; +import { GitHubSettings } from './GitHubSettings'; +import { GitHubVersions } from './GitHubVersions'; + +interface IProps { + docId: number; +} + +const GITHUB_TABS = [ + { + key: 'push', + name: 'Push to GitHub', + icon: 'GitPullRequest' as const, + tooltip: 'Push your changes to GitHub', + tooltipPos: 'up' as TooltipDirection, + }, + { + key: 'versions', + name: 'GitHub Versions', + icon: 'History' as const, + tooltip: 'View and manage previous versions', + tooltipPos: 'up' as TooltipDirection, + }, + { + key: 'settings', + name: 'Settings', + icon: 'Settings' as const, + tooltip: 'Configure GitHub integration settings', + tooltipPos: 'up' as TooltipDirection, + }, +]; + +type GitHubTabKey = 'push' | 'versions' | 'settings'; + +export const GitHubFeatures: React.FC = ({ docId }) => { + const [activeTab, setActiveTab] = useState('push'); + const [linkedDirectory, setLinkedDirectory] = useState(null); + const [isLoading, setIsLoading] = useState(true); + const [errorMessage, setErrorMessage] = useState(null); + + const handleTabSelect = (key: GitHubTabKey) => { + setActiveTab(key); + }; + + const fetchLinkedDirectory = useCallback(async () => { + try { + const response = await GitHubResource.isGitHubLinked(docId); + if (response.data.linked_directory) { + setLinkedDirectory(response.data.linked_directory); + } else { + setLinkedDirectory(null); + } + } catch (error) { + console.error('Failed to fetch linked directory:', error); + setErrorMessage( + 'Failed to fetch linked directory. Please try again.' + ); + } finally { + setIsLoading(false); + } + }, [docId]); + + useEffect(() => { + fetchLinkedDirectory(); + }, [fetchLinkedDirectory]); + + const handleLinkDirectory = useCallback( + async (directory: string) => { + try { + const linkResponse = await GitHubResource.linkGitHub( + docId, + directory + ); + setLinkedDirectory(linkResponse.data.directory); + setErrorMessage(null); + } catch (error) { + console.error('Failed to link directory:', error); + setErrorMessage('Failed to link directory. Please try again.'); + } + }, + [docId] + ); + + if (isLoading) { + return ; + } + + const directoryMessage = ( + + ); + + return ( +
+ + {activeTab === 'push' && ( +
+ {directoryMessage} + +
+ )} + {activeTab === 'versions' && ( +
+ {directoryMessage} + +
+ )} + {activeTab === 'settings' && ( + + )} + {errorMessage && ( + + )} +
+ ); +}; diff --git a/querybook/webapp/components/DataDocGitHub/GitHubIntegration.tsx b/querybook/webapp/components/DataDocGitHub/GitHubIntegration.tsx new file mode 100644 index 000000000..a74fcb919 --- /dev/null +++ b/querybook/webapp/components/DataDocGitHub/GitHubIntegration.tsx @@ -0,0 +1,86 @@ +import React, { useCallback, useEffect, useState } from 'react'; + +import { ComponentType, ElementType } from 'const/analytics'; +import { trackClick } from 'lib/analytics'; +import { GitHubResource, IGitHubAuthResponse } from 'resource/github'; +import { Loading } from 'ui/Loading/Loading'; +import { Message } from 'ui/Message/Message'; +import { Modal } from 'ui/Modal/Modal'; + +import { GitHubAuth } from './GitHubAuth'; +import { GitHubFeatures } from './GitHubFeatures'; + +interface IProps { + docId: number; + onClose: () => void; +} + +export const GitHubIntegration: React.FC = ({ docId, onClose }) => { + const [isAuthorized, setIsAuthorized] = useState(false); + const [errorMessage, setErrorMessage] = useState(null); + const [isLoading, setIsLoading] = useState(true); + + useEffect(() => { + const checkStatus = async () => { + try { + const authResponse = await GitHubResource.isAuthorized(); + setIsAuthorized(authResponse.data.is_authorized); + } catch (error) { + console.error('Failed to check GitHub status:', error); + setErrorMessage( + 'Failed to check GitHub status. Please try again.' + ); + } finally { + setIsLoading(false); + } + }; + + checkStatus(); + }, [docId]); + + const handleAuthorizeGitHub = useCallback(async () => { + trackClick({ + component: ComponentType.GITHUB, + element: ElementType.GITHUB_CONNECT_BUTTON, + }); + + try { + const { data }: { data: IGitHubAuthResponse } = + await GitHubResource.authorizeGitHub(); + const url = data.url; + if (!url) { + throw new Error('Failed to get GitHub authorization URL'); + } + const authWindow = window.open(url); + + const receiveMessage = () => { + authWindow.close(); + delete window.receiveChildMessage; + window.removeEventListener('message', receiveMessage, false); + setIsAuthorized(true); + }; + window.receiveChildMessage = receiveMessage; + } catch (error) { + console.error('GitHub authorization failed:', error); + setErrorMessage('GitHub authorization failed. Please try again.'); + setIsAuthorized(false); + } + }, []); + + return ( + + {isLoading ? ( + + ) : !isAuthorized ? ( + + ) : ( + + )} + {errorMessage && } + + ); +}; diff --git a/querybook/webapp/components/DataDocGitHub/GitHubPush.tsx b/querybook/webapp/components/DataDocGitHub/GitHubPush.tsx new file mode 100644 index 000000000..9b57f37a2 --- /dev/null +++ b/querybook/webapp/components/DataDocGitHub/GitHubPush.tsx @@ -0,0 +1,87 @@ +import { Form, Formik } from 'formik'; +import React, { useCallback, useState } from 'react'; +import { toast } from 'react-hot-toast'; + +import { GitHubResource } from 'resource/github'; +import { Button } from 'ui/Button/Button'; +import { FeatureDisabledMessage } from 'ui/DisabledSection/FeatureDisabledMessage'; +import { FormWrapper } from 'ui/Form/FormWrapper'; +import { SimpleField } from 'ui/FormikField/SimpleField'; +import { Message } from 'ui/Message/Message'; + +import './GitHub.scss'; + +interface IProps { + docId: number; + linkedDirectory: string | null; +} + +export const GitHubPush: React.FunctionComponent = ({ + docId, + linkedDirectory, +}) => { + const [isSubmitting, setIsSubmitting] = useState(false); + const [errorMessage, setErrorMessage] = useState(null); + + const handlePush = useCallback( + async (values: { commitMessage: string }) => { + setIsSubmitting(true); + setErrorMessage(null); + try { + await GitHubResource.commitDataDoc(docId, values.commitMessage); + toast.success('Commit pushed successfully!'); + } catch (error) { + console.error('Failed to push commit:', error); + setErrorMessage( + 'Failed to push commit. Please ensure the file path exists.' + ); + toast.error('Failed to push commit'); + } finally { + setIsSubmitting(false); + } + }, + [docId, setErrorMessage] + ); + + if (!linkedDirectory) { + return ( + + ); + } + + return ( + + {({ handleSubmit, isValid }) => ( + +
+ +
+
+ + {errorMessage && ( +
+ +
+ )} +
+ )} +
+ ); +}; diff --git a/querybook/webapp/components/DataDocGitHub/GitHubSettings.tsx b/querybook/webapp/components/DataDocGitHub/GitHubSettings.tsx new file mode 100644 index 000000000..214cc2996 --- /dev/null +++ b/querybook/webapp/components/DataDocGitHub/GitHubSettings.tsx @@ -0,0 +1,63 @@ +import React from 'react'; + +import { Card } from 'ui/Card/Card'; +import { Icon } from 'ui/Icon/Icon'; +import { Link } from 'ui/Link/Link'; +import { Message } from 'ui/Message/Message'; +import { StyledText } from 'ui/StyledText/StyledText'; + +import { GitHubDirectory } from './GitHubDirectory'; + +import './GitHub.scss'; + +interface IProps { + docId: number; + linkedDirectory?: string | null; + onLinkDirectory: (directory: string) => Promise; +} + +export const GitHubSettings: React.FC = ({ + docId, + linkedDirectory, + onLinkDirectory, +}) => { + const authorizationCardDom = ( +
+ + + Your GitHub account is successfully authorized. Manage your + GitHub authorized OAuth apps{' '} + + here{' '} + + + . + + +
+ ); + + const directoryCardDom = ( +
+ +
+ ); + + return ( + + {authorizationCardDom} + {directoryCardDom} + + ); +}; diff --git a/querybook/webapp/components/DataDocGitHub/GitHubVersions.tsx b/querybook/webapp/components/DataDocGitHub/GitHubVersions.tsx new file mode 100644 index 000000000..0a7214c48 --- /dev/null +++ b/querybook/webapp/components/DataDocGitHub/GitHubVersions.tsx @@ -0,0 +1,233 @@ +import React, { useCallback, useState } from 'react'; + +import { QueryComparison } from 'components/TranspileQueryModal/QueryComparison'; +import { ComponentType, ElementType } from 'const/analytics'; +import { useRestoreDataDoc } from 'hooks/dataDoc/useRestoreDataDoc'; +import { usePaginatedResource } from 'hooks/usePaginatedResource'; +import { useResource } from 'hooks/useResource'; +import { trackClick } from 'lib/analytics'; +import { GitHubResource, ICommit } from 'resource/github'; +import { AsyncButton } from 'ui/AsyncButton/AsyncButton'; +import { IconButton } from 'ui/Button/IconButton'; +import { FeatureDisabledMessage } from 'ui/DisabledSection/FeatureDisabledMessage'; +import { ErrorPage } from 'ui/ErrorPage/ErrorPage'; +import { Link } from 'ui/Link/Link'; +import { Loading } from 'ui/Loading/Loading'; +import { Message } from 'ui/Message/Message'; + +import { CommitCard } from './CommitCard'; + +import './GitHub.scss'; + +interface IProps { + docId: number; + linkedDirectory: string | null; +} + +export const GitHubVersions: React.FunctionComponent = ({ + docId, + linkedDirectory, +}) => { + const [isCompareOpen, setIsCompareOpen] = useState(false); + const [selectedCommit, setSelectedCommit] = useState(null); + const [isFullScreen, setIsFullScreen] = useState(false); + + const { + data: commitVersions, + isLoading, + isError, + fetchMore, + hasMore, + } = usePaginatedResource( + useCallback( + (limit, offset) => + GitHubResource.getDataDocVersions(docId, limit, offset), + [docId] + ), + { batchSize: 5 } + ); + + const { + data: comparisonData, + isLoading: isComparisonLoading, + isError: isComparisonError, + } = useResource( + React.useCallback(() => { + if (selectedCommit) { + return GitHubResource.compareDataDocVersions( + docId, + selectedCommit.sha + ); + } + }, [docId, selectedCommit]), + { + fetchOnMount: !!selectedCommit, + } + ); + + const restoreDataDoc = useRestoreDataDoc(); + + const handleRestore = useCallback( + async (commit: ICommit) => { + const commitId = commit.sha; + const commitMessage = commit.commit.message; + await restoreDataDoc(docId, commitId, commitMessage); + }, + [docId, restoreDataDoc] + ); + + const toggleCompare = useCallback( + (version?: ICommit) => { + if (version) { + if (isCompareOpen && selectedCommit?.sha === version.sha) { + setIsCompareOpen(false); + setSelectedCommit(null); + setIsFullScreen(false); + } else { + setSelectedCommit(version); + setIsCompareOpen(true); + } + } else { + setIsCompareOpen(false); + setSelectedCommit(null); + setIsFullScreen(false); + } + }, + [isCompareOpen, selectedCommit] + ); + + const toggleFullScreen = useCallback(() => { + if (!isFullScreen) { + trackClick({ + component: ComponentType.GITHUB, + element: ElementType.GITHUB_COMPARE_FULLSCREEN_BUTTON, + }); + } + setIsFullScreen((prev) => !prev); + }, [isFullScreen]); + + if (!linkedDirectory) { + return ( + + ); + } + + if (isLoading) { + return ( +
+ +
Loading versions...
+
+ ); + } + + if (isError) { + return ( + + ); + } + + if (commitVersions.length === 0) { + return ( +
+ +
+ ); + } + + const commitListDOM = ( +
+ {commitVersions.map((version) => ( + + ))} +
+ ); + + const loadMoreButtonDOM = hasMore ? ( + + Load More + + ) : null; + + const comparePanelDOM = ( +
+ {selectedCommit && ( +
+
+ + + + + toggleCompare()} + size={16} + tooltip="Close Compare Panel" + tooltipPos="left" + /> +
+ {isComparisonLoading ? ( + + ) : isComparisonError || !comparisonData ? ( + + ) : ( + + )} +
+ )} +
+ ); + + return ( +
+
+ {commitListDOM} + {loadMoreButtonDOM} + {comparePanelDOM} +
+
+ ); +}; diff --git a/querybook/webapp/components/DataDocRightSidebar/DataDocRightSidebar.tsx b/querybook/webapp/components/DataDocRightSidebar/DataDocRightSidebar.tsx index c22e012f4..68538fd53 100644 --- a/querybook/webapp/components/DataDocRightSidebar/DataDocRightSidebar.tsx +++ b/querybook/webapp/components/DataDocRightSidebar/DataDocRightSidebar.tsx @@ -3,8 +3,10 @@ import { useDispatch, useSelector } from 'react-redux'; import { DataDocBoardsButton } from 'components/DataDocBoardsButton/DataDocBoardsButton'; import { DataDocDAGExporterButton } from 'components/DataDocDAGExporter/DataDocDAGExporterButton'; +import { DataDocGitHubButton } from 'components/DataDocGitHub/DataDocGitHubButton'; import { DataDocTemplateButton } from 'components/DataDocTemplateButton/DataDocTemplateButton'; import { DataDocUIGuide } from 'components/UIGuide/DataDocUIGuide'; +import PublicConfig from 'config/querybook_public_config.yaml'; import { ComponentType, ElementType } from 'const/analytics'; import { IDataDoc, IDataDocMeta } from 'const/datadoc'; import { useAnnouncements } from 'hooks/redux/useAnnouncements'; @@ -48,6 +50,7 @@ export const DataDocRightSidebar: React.FunctionComponent = ({ }) => { const numAnnouncements = useAnnouncements().length; const exporterExists = useExporterExists(); + const githubIntegrationEnabled = PublicConfig.github_integration.enabled; const selfRef = React.useRef(); const { showScrollToTop, scrollToTop } = useScrollToTop({ @@ -83,6 +86,10 @@ export const DataDocRightSidebar: React.FunctionComponent = ({ ); + const githubButtonDOM = githubIntegrationEnabled && ( + + ); + const buttonSection = (
@@ -131,6 +138,7 @@ export const DataDocRightSidebar: React.FunctionComponent = ({
{runAllButtonDOM} + {githubButtonDOM} {isEditable && exporterExists && ( )} diff --git a/querybook/webapp/components/TranspileQueryModal/QueryComparison.scss b/querybook/webapp/components/TranspileQueryModal/QueryComparison.scss index 6ee8654e2..2f6b53a7a 100644 --- a/querybook/webapp/components/TranspileQueryModal/QueryComparison.scss +++ b/querybook/webapp/components/TranspileQueryModal/QueryComparison.scss @@ -14,3 +14,10 @@ width: 100%; } } + +.truncate { + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + max-width: 100%; +} diff --git a/querybook/webapp/components/TranspileQueryModal/QueryComparison.tsx b/querybook/webapp/components/TranspileQueryModal/QueryComparison.tsx index 4a72d46f8..8b3c941d0 100644 --- a/querybook/webapp/components/TranspileQueryModal/QueryComparison.tsx +++ b/querybook/webapp/components/TranspileQueryModal/QueryComparison.tsx @@ -68,7 +68,7 @@ export const QueryComparison: React.FC<{ {fromQueryTitle && (
{typeof fromQueryTitle === 'string' ? ( - {fromQueryTitle} + {fromQueryTitle} ) : ( fromQueryTitle )} @@ -87,7 +87,7 @@ export const QueryComparison: React.FC<{ {toQueryTitle && (
{typeof toQueryTitle === 'string' ? ( - {toQueryTitle} + {toQueryTitle} ) : ( toQueryTitle )} diff --git a/querybook/webapp/config.d.ts b/querybook/webapp/config.d.ts index 672524d93..69069788c 100644 --- a/querybook/webapp/config.d.ts +++ b/querybook/webapp/config.d.ts @@ -126,6 +126,9 @@ declare module 'config/querybook_public_config.yaml' { sample_user_guide_link: string; sampling_tool_tip_delay: number; }; + github_integration: { + enabled: boolean; + }; }; export default data; } diff --git a/querybook/webapp/const/analytics.ts b/querybook/webapp/const/analytics.ts index 7d923b14a..3ea663fbf 100644 --- a/querybook/webapp/const/analytics.ts +++ b/querybook/webapp/const/analytics.ts @@ -21,6 +21,7 @@ export enum ComponentType { TABLE_DETAIL_VIEW = 'TABLE_DETAIL_VIEW', TABLE_NAVIGATOR_SEARCH = 'TABLE_NAVIGATOR_SEARCH', AI_ASSISTANT = 'AI_ASSISTANT', + GITHUB = 'GITHUB', SURVEY = 'SURVEY', LIST_PAGE = 'LIST_PAGE', } @@ -115,6 +116,12 @@ export enum ElementType { QUERY_GENERATION_REJECT_BUTTON = 'QUERY_GENERATION_REJECT_BUTTON', QUERY_GENERATION_APPLY_BUTTON = 'QUERY_GENERATION_APPLY_BUTTON', QUERY_GENERATION_APPLY_AND_RUN_BUTTON = 'QUERY_GENERATION_APPLY_AND_RUN_BUTTON', + + // Github Integration + GITHUB_CONNECT_BUTTON = 'GITHUB_CONNECT_BUTTON', + GITHUB_RESTORE_DATADOC_BUTTON = 'GITHUB_RESTORE_DATADOC_BUTTON', + GITHUB_COMPARE_BUTTON = 'GITHUB_COMPARE_BUTTON', + GITHUB_COMPARE_FULLSCREEN_BUTTON = 'GITHUB_COMPARE_FULLSCREEN_BUTTON', } export interface EventData { diff --git a/querybook/webapp/hooks/dataDoc/useRestoreDataDoc.ts b/querybook/webapp/hooks/dataDoc/useRestoreDataDoc.ts new file mode 100644 index 000000000..a8136e787 --- /dev/null +++ b/querybook/webapp/hooks/dataDoc/useRestoreDataDoc.ts @@ -0,0 +1,52 @@ +import { useCallback } from 'react'; +import toast from 'react-hot-toast'; +import { useDispatch } from 'react-redux'; + +import { ComponentType, ElementType } from 'const/analytics'; +import { trackClick } from 'lib/analytics'; +import { sendConfirm } from 'lib/querybookUI'; +import { restoreDataDoc } from 'redux/dataDoc/action'; +import { Dispatch } from 'redux/store/types'; + +export function useRestoreDataDoc() { + const dispatch: Dispatch = useDispatch(); + + const handleConfirm = useCallback( + (docId: number, commitId: string, commitMessage: string) => () => { + trackClick({ + component: ComponentType.GITHUB, + element: ElementType.GITHUB_RESTORE_DATADOC_BUTTON, + }); + + toast.promise( + dispatch(restoreDataDoc(docId, commitId, commitMessage)), + { + loading: 'Restoring DataDoc...', + success: 'DataDoc has been successfully restored!', + error: 'Failed to restore DataDoc. Please try again.', + } + ); + }, + [dispatch] + ); + + return useCallback( + async ( + docId: number, + commitId: string, + commitMessage: string + ): Promise => { + sendConfirm({ + header: 'Restore DataDoc?', + message: + 'You are about to restore this DataDoc to the selected commit. Restoring will overwrite your current work. Please ensure you have committed any ongoing changes before proceeding.', + onConfirm: handleConfirm(docId, commitId, commitMessage), + confirmColor: 'cancel', + cancelColor: 'default', + confirmText: 'Confirm Restore', + confirmIcon: 'AlertOctagon', + }); + }, + [handleConfirm] + ); +} diff --git a/querybook/webapp/lib/data-doc/datadoc-socketio.ts b/querybook/webapp/lib/data-doc/datadoc-socketio.ts index 517f8688c..b85b0befc 100644 --- a/querybook/webapp/lib/data-doc/datadoc-socketio.ts +++ b/querybook/webapp/lib/data-doc/datadoc-socketio.ts @@ -26,6 +26,15 @@ export interface IDataDocSocketEvent { (rawDataDoc, isSameOrigin: boolean) => any >; + dataDocRestored?: IDataDocSocketEventPromise< + ( + rawDataDoc: any, + commitMessage: string, + username: string, + isSameOrigin: boolean + ) => any + >; + updateDataCell?: IDataDocSocketEventPromise< (rawDataCell, isSameOrigin: boolean) => any >; @@ -161,6 +170,17 @@ export class DataDocSocket { ); }; + public restoreDataDoc = ( + docId: number, + commitId: string, + commitMessage: string + ) => { + this.socket.emit('restore_data_doc', docId, commitId, commitMessage); + return this.makePromise>( + 'dataDocRestored' + ); + }; + public updateDataCell = ( cellId: number, fields: { meta?: IDataCellMeta; context?: string } @@ -320,6 +340,19 @@ export class DataDocSocket { ); }); + this.socket.on( + 'data_doc_restored', + (originator, rawDataDoc, commitMessage, username) => { + this.resolvePromiseAndEvent( + 'dataDocRestored', + originator, + rawDataDoc, + commitMessage, + username + ); + } + ); + this.socket.on('data_cell_updated', (originator, rawDataCell) => { this.resolvePromiseAndEvent( 'updateDataCell', diff --git a/querybook/webapp/redux/dataDoc/action.ts b/querybook/webapp/redux/dataDoc/action.ts index 7886bb1c9..a3d8c48d9 100644 --- a/querybook/webapp/redux/dataDoc/action.ts +++ b/querybook/webapp/redux/dataDoc/action.ts @@ -273,6 +273,17 @@ export function deleteDataDoc(docId: number): ThunkResult> { }; } +export function restoreDataDoc( + docId: number, + commitId: string, + commitMessage: string +): ThunkResult> { + return async (dispatch) => { + await dataDocSocket.restoreDataDoc(docId, commitId, commitMessage); + await dispatch(fetchDataDoc(docId)); + }; +} + export function insertDataDocCell( docId: number, index: number, diff --git a/querybook/webapp/redux/dataDocWebsocket/dataDocWebsocket.ts b/querybook/webapp/redux/dataDocWebsocket/dataDocWebsocket.ts index e22c73831..407fccc7b 100644 --- a/querybook/webapp/redux/dataDocWebsocket/dataDocWebsocket.ts +++ b/querybook/webapp/redux/dataDocWebsocket/dataDocWebsocket.ts @@ -1,3 +1,5 @@ +import toast from 'react-hot-toast'; + import { IAccessRequest } from 'const/accessRequest'; import { IDataDocEditor } from 'const/datadoc'; import dataDocSocket, { @@ -61,6 +63,25 @@ export function openDataDoc(docId: number): ThunkResult> { } }, }, + + dataDocRestored: { + resolve: ( + rawDataDoc, + commitMessage, + username, + isSameOrigin + ) => { + dispatch(fetchDataDoc(docId)); + + // Show a notification to other users + if (!isSameOrigin) { + toast.success( + `DataDoc restored by ${username}: "${commitMessage}"` + ); + } + }, + }, + updateDataCell: { resolve: (rawDataCell, isSameOrigin) => { if (!isSameOrigin) { diff --git a/querybook/webapp/resource/github.ts b/querybook/webapp/resource/github.ts new file mode 100644 index 000000000..321ef9c76 --- /dev/null +++ b/querybook/webapp/resource/github.ts @@ -0,0 +1,56 @@ +import ds from 'lib/datasource'; + +export interface IGitHubAuthResponse { + url: string; +} + +export interface ICommitAuthor { + date: string; + email: string; + name: string; +} + +export interface ICommitData { + author: ICommitAuthor; + message: string; +} + +export interface ICommit { + html_url: string; + commit: ICommitData; + sha: string; +} + +export const GitHubResource = { + authorizeGitHub: () => ds.fetch('/github/auth/'), + isAuthorized: () => + ds.fetch<{ is_authorized: boolean }>('/github/is_authorized/'), + linkGitHub: (docId: number, directory: string) => + ds.save<{ directory: string }>(`/github/datadocs/${docId}/link/`, { + directory, + }), + isGitHubLinked: (docId: number) => + ds.fetch<{ linked_directory: string | null }>( + `/github/datadocs/${docId}/is_linked/` + ), + getDirectories: (docId: number) => + ds.fetch<{ directories: string[] }>( + `/github/datadocs/${docId}/directories/` + ), + commitDataDoc: (docId: number, commitMessage: string) => + ds.save<{ message: string }>(`/github/datadocs/${docId}/commit/`, { + commit_message: commitMessage, + }), + getDataDocVersions: (docId: number, limit: number, offset: number) => + ds.fetch(`/github/datadocs/${docId}/versions/`, { + limit, + offset, + }), + compareDataDocVersions: (docId: number, commitSha: string) => + ds.fetch<{ + current_content: string; + commit_content: string; + }>(`/github/datadocs/${docId}/compare/`, { + commit_sha: commitSha, + }), +}; diff --git a/querybook/webapp/ui/Button/Button.tsx b/querybook/webapp/ui/Button/Button.tsx index c3670d69d..b8c9f6048 100644 --- a/querybook/webapp/ui/Button/Button.tsx +++ b/querybook/webapp/ui/Button/Button.tsx @@ -20,6 +20,7 @@ export type ButtonProps = React.HTMLAttributes & className?: string; color?: ButtonColorType; + hoverColor?: string; theme?: ButtonThemeType; disabled?: boolean; diff --git a/querybook/webapp/ui/DisabledSection/FeatureDisabledMessage.tsx b/querybook/webapp/ui/DisabledSection/FeatureDisabledMessage.tsx new file mode 100644 index 000000000..cbf84498f --- /dev/null +++ b/querybook/webapp/ui/DisabledSection/FeatureDisabledMessage.tsx @@ -0,0 +1,22 @@ +import React from 'react'; + +import { Icon } from 'ui/Icon/Icon'; +import { Message } from 'ui/Message/Message'; + +interface FeatureDisabledMessageProps { + message?: string; +} + +export const FeatureDisabledMessage: React.FunctionComponent< + FeatureDisabledMessageProps +> = ({ message = 'This feature is currently disabled.' }) => ( +
+ + +
+); diff --git a/querybook/webapp/ui/Icon/LucideIcons.ts b/querybook/webapp/ui/Icon/LucideIcons.ts index bac6f3be9..f3abd82d2 100644 --- a/querybook/webapp/ui/Icon/LucideIcons.ts +++ b/querybook/webapp/ui/Icon/LucideIcons.ts @@ -7,6 +7,7 @@ */ import { Activity, + AlertCircle, AlertOctagon, AlertTriangle, AlignCenterHorizontal, @@ -52,9 +53,12 @@ import { FileText, Filter, FormInput, + Github, + GitPullRequest, GripVertical, Hash, HelpCircle, + History, Home, Info, Italic, @@ -107,6 +111,7 @@ import { Trash2, Type, Underline, + Unlink, Unlock, Upload, UserMinus, @@ -122,6 +127,7 @@ import { const AllLucideIcons = { Activity, + AlertCircle, AlertOctagon, AlertTriangle, AlignCenterVertical, @@ -167,10 +173,13 @@ const AllLucideIcons = { FileText, Filter, FormInput, + Github, + GitPullRequest, GripVertical, Hash, HelpCircle, Home, + History, Info, Italic, Key, @@ -223,6 +232,7 @@ const AllLucideIcons = { Trash2, Type, Underline, + Unlink, Unlock, Upload, UserMinus, diff --git a/querybook/webapp/ui/SimpleReactSelect/SimpleReactSelect.tsx b/querybook/webapp/ui/SimpleReactSelect/SimpleReactSelect.tsx index e37baad18..ed6284294 100644 --- a/querybook/webapp/ui/SimpleReactSelect/SimpleReactSelect.tsx +++ b/querybook/webapp/ui/SimpleReactSelect/SimpleReactSelect.tsx @@ -18,6 +18,8 @@ export interface ISimpleReactSelectProps { withDeselect?: boolean; isDisabled?: boolean; creatable?: boolean; + formatCreateLabel?: (inputValue: string) => React.ReactNode; + onCreateOption?: (inputValue: string) => void; selectProps?: Partial>; closeMenuOnSelect?: boolean; hideSelectedOptions?: boolean; @@ -37,6 +39,8 @@ export function SimpleReactSelect({ onChange, isDisabled, creatable, + formatCreateLabel, + onCreateOption, selectProps = {}, withDeselect = false, @@ -93,7 +97,11 @@ export function SimpleReactSelect({ return ( {creatable ? ( - + ) : (