diff --git a/tap_github/authenticator.py b/tap_github/authenticator.py index 13b20bde..8f5acf1e 100644 --- a/tap_github/authenticator.py +++ b/tap_github/authenticator.py @@ -3,10 +3,10 @@ import logging import time from copy import deepcopy -from datetime import datetime +from datetime import datetime, timedelta from os import environ from random import choice, shuffle -from typing import Any, Dict, List, Optional, Set +from typing import Any, Dict, List, Optional, Set, Tuple import jwt import requests @@ -15,7 +15,9 @@ class TokenManager: - """A class to store a token's attributes and state.""" + """A class to store a token's attributes and state. + This parent class should not be used directly, use a subclass instead. + """ DEFAULT_RATE_LIMIT = 5000 # The DEFAULT_RATE_LIMIT_BUFFER buffer serves two purposes: @@ -25,7 +27,7 @@ class TokenManager: def __init__( self, - token: str, + token: Optional[str], rate_limit_buffer: Optional[int] = None, logger: Optional[Any] = None, ): @@ -50,6 +52,9 @@ def update_rate_limit(self, response_headers: Any) -> None: def is_valid_token(self) -> bool: """Try making a request with the current token. If the request succeeds return True, else False.""" + if not self.token: + return False + try: response = requests.get( url="https://api.github.com/rate_limit", @@ -61,7 +66,7 @@ def is_valid_token(self) -> bool: return True except requests.exceptions.HTTPError: msg = ( - f"A token was dismissed. " + f"A token could not be validated. " f"{response.status_code} Client Error: " f"{str(response.content)} (Reason: {response.reason})" ) @@ -70,7 +75,7 @@ def is_valid_token(self) -> bool: return False def has_calls_remaining(self) -> bool: - """Check if token is valid. + """Check if a token has capacity to make more calls. Returns: True if the token is valid and has enough api calls remaining. @@ -85,6 +90,14 @@ def has_calls_remaining(self) -> bool: return True +class PersonalTokenManager(TokenManager): + """A class to store token rate limiting information.""" + + def __init__(self, token: str, rate_limit_buffer: Optional[int] = None, **kwargs): + """Init PersonalTokenRateLimit info.""" + super().__init__(token, rate_limit_buffer=rate_limit_buffer, **kwargs) + + def generate_jwt_token( github_app_id: str, github_private_key: str, @@ -110,7 +123,8 @@ def generate_app_access_token( github_app_id: str, github_private_key: str, github_installation_id: Optional[str] = None, -) -> str: +) -> Tuple[str, datetime]: + produced_at = datetime.now() jwt_token = generate_jwt_token(github_app_id, github_private_key) headers = {"Authorization": f"Bearer {jwt_token}"} @@ -135,14 +149,71 @@ def generate_app_access_token( if resp.status_code != 201: resp.raise_for_status() - return resp.json()["token"] + expires_at = produced_at + timedelta(hours=1) + return resp.json()["token"], expires_at + + +class AppTokenManager(TokenManager): + """A class to store an app token's attributes and state, and handle token refreshing""" + + DEFAULT_RATE_LIMIT = 15000 + DEFAULT_EXPIRY_BUFFER_MINS = 10 + + def __init__(self, env_key: str, rate_limit_buffer: Optional[int] = None, **kwargs): + if rate_limit_buffer is None: + rate_limit_buffer = self.DEFAULT_RATE_LIMIT_BUFFER + super().__init__(None, rate_limit_buffer=rate_limit_buffer, **kwargs) + + parts = env_key.split(";;") + self.github_app_id = parts[0] + self.github_private_key = (parts[1:2] or [""])[0].replace("\\n", "\n") + self.github_installation_id: Optional[str] = (parts[2:3] or [""])[0] + + self.token_expires_at: Optional[datetime] = None + self.claim_token() + + def claim_token(self): + """Updates the TokenManager's token and token_expires_at attributes. + + The outcome will be _either_ that self.token is updated to a newly claimed valid token and + self.token_expires_at is updated to the anticipated expiry time (erring on the side of an early estimate) + _or_ self.token and self.token_expires_at are both set to None. + """ + self.token = None + self.token_expires_at = None + + # Make sure we have the details we need + if not self.github_app_id or not self.github_private_key: + raise ValueError( + "GITHUB_APP_PRIVATE_KEY could not be parsed. The expected format is " + '":app_id:;;-----BEGIN RSA PRIVATE KEY-----\\n_YOUR_P_KEY_\\n-----END RSA PRIVATE KEY-----"' + ) + + self.token, self.token_expires_at = generate_app_access_token( + self.github_app_id, self.github_private_key, self.github_installation_id + ) + + # Check if the token isn't valid. If not, overwrite it with None + if not self.is_valid_token(): + if self.logger: + self.logger.warning( + "An app token was generated but could not be validated." + ) + self.token = None + self.token_expires_at = None class GitHubTokenAuthenticator(APIAuthenticatorBase): """Base class for offloading API auth.""" + @staticmethod + def get_env(): + return dict(environ) + def prepare_tokens(self) -> List[TokenManager]: - # Save GitHub tokens + """Prep GitHub tokens""" + + env_dict = self.get_env() rate_limit_buffer = self._config.get("rate_limit_buffer", None) personal_tokens: Set[str] = set() @@ -156,52 +227,42 @@ def prepare_tokens(self) -> List[TokenManager]: # Accept multiple tokens using environment variables GITHUB_TOKEN* env_tokens = { value - for key, value in environ.items() + for key, value in env_dict.items() if key.startswith("GITHUB_TOKEN") } if len(env_tokens) > 0: self.logger.info( f"Found {len(env_tokens)} 'GITHUB_TOKEN' environment variables for authentication." ) - personal_tokens = env_tokens + personal_tokens = personal_tokens.union(env_tokens) token_managers: List[TokenManager] = [] for token in personal_tokens: - token_manager = TokenManager( + token_manager = PersonalTokenManager( token, rate_limit_buffer=rate_limit_buffer, logger=self.logger ) if token_manager.is_valid_token(): token_managers.append(token_manager) + else: + logging.warn("A token was dismissed.") # Parse App level private key and generate a token - if "GITHUB_APP_PRIVATE_KEY" in environ.keys(): + if "GITHUB_APP_PRIVATE_KEY" in env_dict.keys(): # To simplify settings, we use a single env-key formatted as follows: # "{app_id};;{-----BEGIN RSA PRIVATE KEY-----\n_YOUR_PRIVATE_KEY_\n-----END RSA PRIVATE KEY-----}" - parts = environ["GITHUB_APP_PRIVATE_KEY"].split(";;") - github_app_id = parts[0] - github_private_key = (parts[1:2] or [""])[0].replace("\\n", "\n") - github_installation_id = (parts[2:3] or [""])[0] - - if not (github_private_key): - self.logger.warning( - "GITHUB_APP_PRIVATE_KEY could not be parsed. The expected format is " - '":app_id:;;-----BEGIN RSA PRIVATE KEY-----\n_YOUR_P_KEY_\n-----END RSA PRIVATE KEY-----"' + env_key = env_dict["GITHUB_APP_PRIVATE_KEY"] + try: + app_token_manager = AppTokenManager( + env_key, rate_limit_buffer=rate_limit_buffer, logger=self.logger ) - - else: - app_token = generate_app_access_token( - github_app_id, github_private_key, github_installation_id or None - ) - token_manager = TokenManager( - app_token, rate_limit_buffer=rate_limit_buffer, logger=self.logger + if app_token_manager.is_valid_token(): + token_managers.append(app_token_manager) + except ValueError as e: + self.logger.warn( + f"An error was thrown while preparing an app token: {e}" ) - if token_manager.is_valid_token(): - token_managers.append(token_manager) self.logger.info(f"Tap will run with {len(token_managers)} auth tokens") - - # Create a dict of TokenManager - # TODO - separate app_token and add logic to refresh the token using generate_app_access_token. return token_managers def __init__(self, stream: RESTStream) -> None: diff --git a/tap_github/tests/test_authenticator.py b/tap_github/tests/test_authenticator.py index c63853ca..68227815 100644 --- a/tap_github/tests/test_authenticator.py +++ b/tap_github/tests/test_authenticator.py @@ -1,10 +1,17 @@ +import re from datetime import datetime, timedelta from unittest.mock import MagicMock, patch import pytest import requests +from singer_sdk.streams import RESTStream -from tap_github.authenticator import TokenManager +from tap_github.authenticator import ( + AppTokenManager, + GitHubTokenAuthenticator, + PersonalTokenManager, + TokenManager, +) class TestTokenManager: @@ -112,3 +119,308 @@ def test_has_calls_remaining_fails_if_few_calls_remaining_and_reset_time_not_rea token_manager.update_rate_limit(mock_response_headers) assert not token_manager.has_calls_remaining() + + +class TestAppTokenManager: + + def test_initialization_with_3_part_env_key(self): + with patch.object(AppTokenManager, "claim_token", return_value=None): + token_manager = AppTokenManager("12345;;key\\ncontent;;67890") + assert token_manager.github_app_id == "12345" + assert token_manager.github_private_key == "key\ncontent" + assert token_manager.github_installation_id == "67890" + + def test_initialization_with_2_part_env_key(self): + with patch.object(AppTokenManager, "claim_token", return_value=None): + token_manager = AppTokenManager("12345;;key\\ncontent") + assert token_manager.github_app_id == "12345" + assert token_manager.github_private_key == "key\ncontent" + assert token_manager.github_installation_id == "" + + def test_initialization_with_malformed_env_key(self): + expected_error_expression = re.escape( + "GITHUB_APP_PRIVATE_KEY could not be parsed. The expected format is " + '":app_id:;;-----BEGIN RSA PRIVATE KEY-----\\n_YOUR_P_KEY_\\n-----END RSA PRIVATE KEY-----"' + ) + with pytest.raises(ValueError, match=expected_error_expression): + AppTokenManager("12345key\\ncontent") + + def test_generate_token_with_invalid_credentials(self): + with patch.object(AppTokenManager, "is_valid_token", return_value=False), patch( + "tap_github.authenticator.generate_app_access_token", + return_value=("some_token", MagicMock()), + ): + token_manager = AppTokenManager("12345;;key\\ncontent;;67890") + assert token_manager.token is None + assert token_manager.token_expires_at is None + + def test_successful_token_generation(self): + token_time = MagicMock() + with patch.object(AppTokenManager, "is_valid_token", return_value=True), patch( + "tap_github.authenticator.generate_app_access_token", + return_value=("valid_token", token_time), + ): + token_manager = AppTokenManager("12345;;key\\ncontent;;67890") + token_manager.claim_token() + assert token_manager.token == "valid_token" + assert token_manager.token_expires_at == token_time + + +@pytest.fixture +def mock_stream(): + stream = MagicMock(spec=RESTStream) + stream.logger = MagicMock() + stream.tap_name = "tap_github" + stream.config = {"rate_limit_buffer": 5} + return stream + + +class TestGitHubTokenAuthenticator: + + def test_prepare_tokens_returns_empty_if_none_found(self, mock_stream): + with patch.object( + GitHubTokenAuthenticator, "get_env", return_value={"GITHUB_TLJKJFDS": "gt1"} + ), patch.object(PersonalTokenManager, "is_valid_token", return_value=True): + + auth = GitHubTokenAuthenticator(stream=mock_stream) + token_managers = auth.prepare_tokens() + + assert len(token_managers) == 0 + + def test_config_auth_token_only(self, mock_stream): + with patch.object( + GitHubTokenAuthenticator, + "get_env", + return_value={"OTHER_TOKEN": "blah", "NOT_THE_RIGHT_TOKEN": "meh"}, + ), patch.object(PersonalTokenManager, "is_valid_token", return_value=True): + + stream = mock_stream + stream.config.update({"auth_token": "gt5"}) + auth = GitHubTokenAuthenticator(stream=stream) + token_managers = auth.prepare_tokens() + + assert len(token_managers) == 1 + assert token_managers[0].token == "gt5" + + def test_config_additional_auth_tokens_only(self, mock_stream): + with patch.object( + GitHubTokenAuthenticator, + "get_env", + return_value={"OTHER_TOKEN": "blah", "NOT_THE_RIGHT_TOKEN": "meh"}, + ), patch.object(PersonalTokenManager, "is_valid_token", return_value=True): + + stream = mock_stream + stream.config.update({"additional_auth_tokens": ["gt7", "gt8", "gt9"]}) + auth = GitHubTokenAuthenticator(stream=stream) + token_managers = auth.prepare_tokens() + + assert len(token_managers) == 3 + assert sorted({tm.token for tm in token_managers}) == ["gt7", "gt8", "gt9"] + + def test_env_personal_tokens_only(self, mock_stream): + with patch.object( + GitHubTokenAuthenticator, + "get_env", + return_value={ + "GITHUB_TOKEN1": "gt1", + "GITHUB_TOKENxyz": "gt2", + "OTHER_TOKEN": "blah", + }, + ), patch.object(PersonalTokenManager, "is_valid_token", return_value=True): + + auth = GitHubTokenAuthenticator(stream=mock_stream) + token_managers = auth.prepare_tokens() + + assert len(token_managers) == 2 + assert sorted({tm.token for tm in token_managers}) == ["gt1", "gt2"] + + def test_env_app_key_only(self, mock_stream): + with patch.object( + GitHubTokenAuthenticator, + "get_env", + return_value={"GITHUB_APP_PRIVATE_KEY": "123;;key", "OTHER_TOKEN": "blah"}, + ), patch.object(AppTokenManager, "is_valid_token", return_value=True), patch( + "tap_github.authenticator.generate_app_access_token", + return_value=("installationtoken12345", MagicMock()), + ): + + auth = GitHubTokenAuthenticator(stream=mock_stream) + token_managers = auth.prepare_tokens() + + assert len(token_managers) == 1 + assert token_managers[0].token == "installationtoken12345" + + def test_all_token_types(self, mock_stream): + # Expectations: + # - the presence of additional_auth_tokens causes personal tokens in the environment to be ignored. + # - the other types all coexist + with patch.object( + GitHubTokenAuthenticator, + "get_env", + return_value={ + "GITHUB_TOKEN1": "gt1", + "GITHUB_TOKENxyz": "gt2", + "GITHUB_APP_PRIVATE_KEY": "123;;key;;install_id", + "OTHER_TOKEN": "blah", + }, + ), patch.object(TokenManager, "is_valid_token", return_value=True), patch( + "tap_github.authenticator.generate_app_access_token", + return_value=("installationtoken12345", MagicMock()), + ): + + stream = mock_stream + stream.config.update( + {"auth_token": "gt5", "additional_auth_tokens": ["gt7", "gt8", "gt9"]} + ) + auth = GitHubTokenAuthenticator(stream=stream) + token_managers = auth.prepare_tokens() + + assert len(token_managers) == 5 + assert sorted({tm.token for tm in token_managers}) == [ + "gt5", + "gt7", + "gt8", + "gt9", + "installationtoken12345", + ] + + def test_all_token_types_except_additional_auth_tokens(self, mock_stream): + # Expectations: + # - in the absence of additional_auth_tokens, all the other types can coexist + with patch.object( + GitHubTokenAuthenticator, + "get_env", + return_value={ + "GITHUB_TOKEN1": "gt1", + "GITHUB_TOKENxyz": "gt2", + "GITHUB_APP_PRIVATE_KEY": "123;;key;;install_id", + "OTHER_TOKEN": "blah", + }, + ), patch.object(TokenManager, "is_valid_token", return_value=True), patch( + "tap_github.authenticator.generate_app_access_token", + return_value=("installationtoken12345", MagicMock()), + ): + + stream = mock_stream + stream.config.update( + { + "auth_token": "gt5", + } + ) + auth = GitHubTokenAuthenticator(stream=stream) + token_managers = auth.prepare_tokens() + + assert len(token_managers) == 4 + assert sorted({tm.token for tm in token_managers}) == [ + "gt1", + "gt2", + "gt5", + "installationtoken12345", + ] + + def test_auth_token_and_additional_auth_tokens_deduped(self, mock_stream): + with patch.object( + GitHubTokenAuthenticator, + "get_env", + return_value={ + "GITHUB_TOKEN1": "gt1", + "GITHUB_TOKENxyz": "gt2", + "OTHER_TOKEN": "blah", + }, + ), patch.object(TokenManager, "is_valid_token", return_value=True), patch( + "tap_github.authenticator.generate_app_access_token", + return_value=("installationtoken12345", MagicMock()), + ): + + stream = mock_stream + stream.config.update( + { + "auth_token": "gt1", + "additional_auth_tokens": ["gt1", "gt1", "gt8", "gt8", "gt9"], + } + ) + auth = GitHubTokenAuthenticator(stream=stream) + token_managers = auth.prepare_tokens() + + assert len(token_managers) == 3 + assert sorted({tm.token for tm in token_managers}) == ["gt1", "gt8", "gt9"] + + def test_auth_token_and_env_tokens_deduped(self, mock_stream): + with patch.object( + GitHubTokenAuthenticator, + "get_env", + return_value={ + "GITHUB_TOKEN1": "gt1", + "GITHUB_TOKENa": "gt2", + "GITHUB_TOKENxyz": "gt2", + "OTHER_TOKEN": "blah", + }, + ), patch.object(TokenManager, "is_valid_token", return_value=True), patch( + "tap_github.authenticator.generate_app_access_token", + return_value=("installationtoken12345", MagicMock()), + ): + + stream = mock_stream + stream.config.update({"auth_token": "gt1"}) + auth = GitHubTokenAuthenticator(stream=stream) + token_managers = auth.prepare_tokens() + + assert len(token_managers) == 2 + assert sorted({tm.token for tm in token_managers}) == ["gt1", "gt2"] + + def test_handle_error_if_app_key_invalid(self, mock_stream): + # Confirm expected behaviour if an error is raised while setting up the app token manager: + # - don"t crash + # - print the error as a warning + # - continue with any other obtained tokens + with patch.object( + GitHubTokenAuthenticator, + "get_env", + return_value={"GITHUB_APP_PRIVATE_KEY": "123garbagekey"}, + ), patch("tap_github.authenticator.AppTokenManager") as mock_app_manager: + mock_app_manager.side_effect = ValueError("Invalid key format") + + auth = GitHubTokenAuthenticator(stream=mock_stream) + auth.prepare_tokens() + + mock_stream.logger.warn.assert_called_with( + "An error was thrown while preparing an app token: Invalid key format" + ) + + def test_exclude_generated_app_token_if_invalid(self, mock_stream): + with patch.object( + GitHubTokenAuthenticator, + "get_env", + return_value={"GITHUB_APP_PRIVATE_KEY": "123;;key"}, + ), patch.object(AppTokenManager, "is_valid_token", return_value=False), patch( + "tap_github.authenticator.generate_app_access_token", + return_value=("installationtoken12345", MagicMock()), + ): + + auth = GitHubTokenAuthenticator(stream=mock_stream) + token_managers = auth.prepare_tokens() + + assert len(token_managers) == 0 + + def test_prepare_tokens_returns_empty_if_all_tokens_invalid(self, mock_stream): + with patch.object( + GitHubTokenAuthenticator, + "get_env", + return_value={"GITHUB_TOKEN1": "gt1", "GITHUB_APP_PRIVATE_KEY": "123;;key"}, + ), patch.object( + PersonalTokenManager, "is_valid_token", return_value=False + ), patch.object( + AppTokenManager, "is_valid_token", return_value=False + ), patch( + "tap_github.authenticator.generate_app_access_token", + return_value=("installationtoken12345", MagicMock()), + ): + + stream = mock_stream + stream.config.update( + {"auth_token": "gt5", "additional_auth_tokens": ["gt7", "gt8", "gt9"]} + ) + auth = GitHubTokenAuthenticator(stream=stream) + token_managers = auth.prepare_tokens() + + assert len(token_managers) == 0