diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 00000000..3365cbe1 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,2 @@ +# These owners will be requested for review when someone opens a pull request. +* @ASFHyP3/platform diff --git a/.github/actions/deploy-stac/action.yml b/.github/actions/deploy-stac/action.yml new file mode 100644 index 00000000..66fec261 --- /dev/null +++ b/.github/actions/deploy-stac/action.yml @@ -0,0 +1,67 @@ +name: Deploy STAC API +description: Deploy the STAC API + +inputs: + AWS_ACCESS_KEY_ID: + required: true + AWS_SECRET_ACCESS_KEY: + required: true + CLOUDFORMATION_ROLE_ARN: + required: true + DB_ADMIN_PASSWORD: + required: true + DB_READ_PASSWORD: + required: true + CIDR_IP: + required: true + CERTIFICATE_ARN: + required: true + STACK_NAME: + required: true + GITHUB_BRANCH: + required: true + DOMAIN_NAME: + required: true + CF_TEMPLATE_BUCKET: + default: cf-templates-aubvn3i9olmk-us-west-2 # For HyP3 account + +runs: + using: composite + steps: + - uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ inputs.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ inputs.AWS_SECRET_ACCESS_KEY }} + aws-region: us-west-2 + - uses: actions/setup-python@v4 + with: + python-version: 3.9 + - name: Install Lambda dependencies + shell: bash + run: make -s install-lambda-deps + - name: Install CodeBuild dependencies + shell: bash + run: python -m pip install -r requirements-run-codebuild.txt + - name: Deploy CloudFormation stack + shell: bash + run: | + make -s deploy \ + stack_name=${{ inputs.STACK_NAME }} \ + s3_bucket=${{ inputs.CF_TEMPLATE_BUCKET }} \ + cloudformation_role_arn=${{ inputs.CLOUDFORMATION_ROLE_ARN }} \ + db_admin_password=${{ inputs.DB_ADMIN_PASSWORD }} \ + db_read_password=${{ inputs.DB_READ_PASSWORD }} \ + cidr_ip=${{ inputs.CIDR_IP }} \ + github_branch=${{ inputs.GITHUB_BRANCH }} \ + domain_name=${{ inputs.DOMAIN_NAME }} \ + certificate_arn=${{ inputs.CERTIFICATE_ARN }} + - name: Get CodeBuild project + shell: bash + run: | + echo "CODEBUILD_PROJECT=$(aws cloudformation describe-stacks \ + --stack-name ${{ inputs.STACK_NAME }} \ + --query 'Stacks[0].Outputs[?OutputKey==`BuildProject`].OutputValue' \ + --output text)" >> $GITHUB_ENV + - name: Run CodeBuild + shell: bash + run: python run_codebuild.py diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..383b1536 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,19 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file + +version: 2 +updates: + - package-ecosystem: "pip" + directory: "/" + schedule: + interval: "daily" + labels: + - "bumpless" + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "daily" + labels: + - "bumpless" diff --git a/.github/workflows/changelog.yml b/.github/workflows/changelog.yml new file mode 100644 index 00000000..3f09d909 --- /dev/null +++ b/.github/workflows/changelog.yml @@ -0,0 +1,18 @@ +name: Changelog updated? + +on: + pull_request: + types: + - opened + - labeled + - unlabeled + - synchronize + branches: + - main + - develop + +jobs: + call-changelog-check-workflow: + uses: ASFHyP3/actions/.github/workflows/reusable-changelog-check.yml@v0.4.0 + secrets: + USER_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/deploy-stac-prod.yml b/.github/workflows/deploy-stac-prod.yml new file mode 100644 index 00000000..c4be785d --- /dev/null +++ b/.github/workflows/deploy-stac-prod.yml @@ -0,0 +1,34 @@ +name: Deploy STAC API prod + +on: + push: + branches: + - main + +jobs: + deploy-stac-api: + runs-on: ubuntu-latest + + environment: + name: asf-stac + + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/deploy-stac + with: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + CLOUDFORMATION_ROLE_ARN: ${{ secrets.CLOUDFORMATION_ROLE_ARN }} + DB_ADMIN_PASSWORD: ${{ secrets.DB_ADMIN_PASSWORD }} + DB_READ_PASSWORD: ${{ secrets.DB_READ_PASSWORD }} + CIDR_IP: ${{ secrets.CIDR_IP }} + CERTIFICATE_ARN: ${{ secrets.CERTIFICATE_ARN }} + STACK_NAME: asf-stac + GITHUB_BRANCH: main + DOMAIN_NAME: stac.asf.alaska.edu + + call-bump-version-workflow: + needs: deploy-stac-api + uses: ASFHyP3/actions/.github/workflows/reusable-bump-version.yml@v0.4.0 + secrets: + USER_TOKEN: ${{ secrets.TOOLS_BOT_PAK }} diff --git a/.github/workflows/deploy-stac-test.yml b/.github/workflows/deploy-stac-test.yml new file mode 100644 index 00000000..1221b33b --- /dev/null +++ b/.github/workflows/deploy-stac-test.yml @@ -0,0 +1,28 @@ +name: Deploy STAC API test + +on: + push: + branches: + - develop + +jobs: + deploy-stac-api: + runs-on: ubuntu-latest + + environment: + name: asf-stac-test + + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/deploy-stac + with: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + CLOUDFORMATION_ROLE_ARN: ${{ secrets.CLOUDFORMATION_ROLE_ARN }} + DB_ADMIN_PASSWORD: ${{ secrets.DB_ADMIN_PASSWORD }} + DB_READ_PASSWORD: ${{ secrets.DB_READ_PASSWORD }} + CIDR_IP: ${{ secrets.CIDR_IP }} + CERTIFICATE_ARN: ${{ secrets.CERTIFICATE_ARN }} + STACK_NAME: asf-stac-test + GITHUB_BRANCH: develop + DOMAIN_NAME: stac-test.asf.alaska.edu diff --git a/.github/workflows/labeled-pr.yml b/.github/workflows/labeled-pr.yml new file mode 100644 index 00000000..a0a3ecf8 --- /dev/null +++ b/.github/workflows/labeled-pr.yml @@ -0,0 +1,15 @@ +name: Is PR labeled? + +on: + pull_request: + types: + - opened + - labeled + - unlabeled + - synchronize + branches: + - main + +jobs: + call-labeled-pr-check-workflow: + uses: ASFHyP3/actions/.github/workflows/reusable-labeled-pr-check.yml@v0.4.0 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000..b5b9dc16 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,14 @@ +name: Create Release + +on: + push: + tags: + - 'v*' + +jobs: + call-release-workflow: + uses: ASFHyP3/actions/.github/workflows/reusable-release.yml@v0.4.0 + with: + release_prefix: ASF STAC + secrets: + USER_TOKEN: ${{ secrets.TOOLS_BOT_PAK }} diff --git a/.github/workflows/static-analysis.yml b/.github/workflows/static-analysis.yml new file mode 100644 index 00000000..38f8ecbc --- /dev/null +++ b/.github/workflows/static-analysis.yml @@ -0,0 +1,24 @@ +name: Static code analysis + +on: push + +jobs: + flake8: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3.1.0 + - uses: actions/setup-python@v4 + with: + python-version: 3.9 + - run: make install + - run: make flake8 + + cfn-lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3.1.0 + - uses: actions/setup-python@v4 + with: + python-version: 3.9 + - run: make install + - run: make cfn-lint diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..02b708bc --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,9 @@ +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [0.1.0] +### Added +- Initial release of STAC API endpoint backed by a PostgreSQL database diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..4d2aabc7 --- /dev/null +++ b/LICENSE @@ -0,0 +1,29 @@ +BSD 3-Clause License + +Copyright (c) 2022, Alaska Satellite Facility +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..d6098103 --- /dev/null +++ b/Makefile @@ -0,0 +1,59 @@ +install: + python -m pip install --upgrade pip && \ + python -m pip install -r requirements.txt + +install-pypgstac: + python -m pip install --upgrade pip && \ + python -m pip install $$(grep pypgstac requirements.txt) + +install-lambda-deps: + python -m pip install --upgrade pip && \ + python -m pip install -r requirements-apps-api.txt -t apps/api/src/ + +deploy: + aws cloudformation package \ + --template-file apps/cloudformation.yml \ + --s3-bucket ${s3_bucket} \ + --output-template-file packaged.yml \ + && aws cloudformation deploy \ + --template-file packaged.yml \ + --stack-name ${stack_name} \ + --capabilities CAPABILITY_NAMED_IAM \ + --role-arn ${cloudformation_role_arn} \ + --parameter-overrides \ + DatabaseAdminPassword=${db_admin_password} \ + DatabaseReadPassword=${db_read_password} \ + CidrIp=${cidr_ip} \ + GithubBranch=${github_branch} \ + DomainName=${domain_name} \ + CertificateArn=${certificate_arn} + +psql: + PGHOST=${db_host} PGPORT=5432 PGDATABASE=postgres PGUSER=${db_user} PGPASSWORD=${db_password} psql + +configure-database: install-or-upgrade-postgis pypgstac-migrate configure-database-roles + +install-or-upgrade-postgis: + PGHOST=${db_host} PGPORT=5432 PGDATABASE=postgres PGUSER=postgres PGPASSWORD=${db_admin_password} psql \ + -f install-or-upgrade-postgis.sql + +pypgstac-migrate: + PGHOST=${db_host} PGPORT=5432 PGDATABASE=postgres PGUSER=postgres PGPASSWORD=${db_admin_password} pypgstac migrate + +configure-database-roles: + PGHOST=${db_host} PGPORT=5432 PGDATABASE=postgres PGUSER=postgres PGPASSWORD=${db_admin_password} psql \ + --set=db_read_password=${db_read_password} \ + -f configure-database-roles.sql + +run-api: + POSTGRES_HOST_READER=${db_host} POSTGRES_HOST_WRITER=${db_host} POSTGRES_PORT=5432 \ + POSTGRES_DBNAME=postgres POSTGRES_USER=postgres POSTGRES_PASS=${db_admin_password} \ + python -m stac_fastapi.pgstac.app + +static: flake8 cfn-lint + +flake8: + flake8 --max-line-length=120 + +cfn-lint: + cfn-lint --template `find . -name cloudformation.yml` --info --ignore-checks W3002 diff --git a/README.md b/README.md index 19db3705..71ae1869 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,97 @@ -# ASF_STAC +# asf-stac -A repository containing code related to the creation and hosting of STAC catalogs by the ASF tools team. +Creation and hosting of STAC catalogs by the ASF Tools team. + +The test API is available at +and the Swagger UI is available at . + +TODO: document prod URL + +## Developer setup + +Clone the repository, create the environment, and install the developer dependencies: + +``` +git clone git@github.com:ASFHyP3/asf-stac.git +cd asf-stac + +conda create -c conda-forge -n asf-stac python=3.9 postgresql +conda activate asf-stac + +make install +``` + +## Requirements for connecting to the database + +Refer to this section when manually connecting to the database or when running the API locally. + +The database only accepts connections from within the ASF Full VPN or from clients +with the client security group attached. See the ingress rules for the database security group in the +[database CloudFormation template](apps/database/cloudformation.yml). + +The database host and database user credentials are available via the AWS Secrets Manager console +in the AWS account where the CloudFormation stack was deployed. + +## Manually connecting to the database + +We shouldn't need to manually connect to the database during normal operations, +as the API will connect automatically, but we can if we need to (e.g. for debugging purposes). + +Confirm that you have [PostgreSQL](https://www.postgresql.org/download/) installed, then run: + +``` +make psql db_host= db_user= db_password= +``` + +## Running the API locally + +You can run the STAC API frontend locally and it will automatically connect to the AWS-hosted database. + +The local API provides access to the Transaction extension (which provides create/update/delete endpoints), +while the publicly available API does not. Therefore, if you need access to the Transaction endpoints, you +must run the API locally: + +``` +make run-api db_host= db_admin_password= +``` + +You should see something like `Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)` in the output; you can +query the API at that URL. + +You can confirm that the Transaction extension is enabled by opening the local API URL in a web browser +and appending `/api.html` to open the Swagger UI. You should see various create/update/delete endpoints +under the "Transaction Extension" heading. You should be able to successfully query these endpoints via +the local API, but not via the publicly available API. (TODO: after removing those endpoints completely +from the public API, update this paragraph to reflect that they will no longer appear in the Swagger UI.) + +## Ingesting a STAC dataset + +Run `python ingest_data.py -h` for usage instructions. You must run the ingest script against +a locally running API, as the script requires access to the Transaction endpoints. + +## Upgrading the database + +The initial AWS deployment creates a Postgres database, installs the PostGIS extension, and then installs +[PgSTAC](https://stac-utils.github.io/pgstac). Follow these steps to upgrade the database: + +1. Run the following command to list the Postgres versions supported by Amazon RDS: + ``` + aws rds describe-db-engine-versions --engine postgres + ``` + Identify the entry that corresponds to the current version of the database. + Then identify the newest available version from the list of valid upgrade targets for the current version. + This will be the new version for the database. + +2. Change the Postgres version specified in the [database CloudFormation template](apps/database/cloudformation.yml) + to the new version. + +3. Next, refer to the tables shown + [here](https://docs.aws.amazon.com/AmazonRDS/latest/PostgreSQLReleaseNotes/postgresql-extensions.html) + to determine which version of the PostGIS extension is supported by the new Postgres version. + +4. Change the PostGIS version specified in the [install/upgrade script](install-or-upgrade-postgis.sql). + +5. Deploy to AWS. + +PgSTAC upgrades are handled automatically: the deployment pipeline migrates the database to the installed +version of `pypgstac`. See for more information about migrations. diff --git a/apps/api/cloudformation.yml b/apps/api/cloudformation.yml new file mode 100644 index 00000000..7eeb9e4a --- /dev/null +++ b/apps/api/cloudformation.yml @@ -0,0 +1,134 @@ +AWSTemplateFormatVersion: 2010-09-09 + +Parameters: + + DatabaseHost: + Type: String + NoEcho: true + + DatabaseReadPassword: + Type: String + NoEcho: true + + SubnetIds: + Type: CommaDelimitedList + + SecurityGroupId: + Type: AWS::EC2::SecurityGroup::Id + + DomainName: + Type: String + + CertificateArn: + Type: String + +Resources: + Lambda: + Type: AWS::Lambda::Function + Properties: + Environment: + Variables: + POSTGRES_HOST_READER: !Ref DatabaseHost + POSTGRES_HOST_WRITER: !Ref DatabaseHost + POSTGRES_PORT: 5432 + POSTGRES_DBNAME: postgres + POSTGRES_USER: pgstac_read + POSTGRES_PASS: !Ref DatabaseReadPassword + Code: src/ + Handler: api.handler + MemorySize: 2000 + Role: !GetAtt LambdaRole.Arn + Runtime: python3.9 + Timeout: 30 + VpcConfig: + SecurityGroupIds: + - !Ref SecurityGroupId + SubnetIds: !Ref SubnetIds + + LambdaLogGroup: + Type: AWS::Logs::LogGroup + Properties: + LogGroupName: !Sub "/aws/lambda/${Lambda}" + RetentionInDays: 90 + + LambdaRole: + Type: AWS::IAM::Role + Properties: + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + Action: sts:AssumeRole + Principal: + Service: lambda.amazonaws.com + Effect: Allow + ManagedPolicyArns: + - !Ref LambdaPolicy + - arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole + + LambdaPolicy: + Type: AWS::IAM::ManagedPolicy + Properties: + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - logs:CreateLogStream + - logs:PutLogEvents + Resource: !Sub "arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/*" + + Api: + Type: AWS::ApiGatewayV2::Api + Properties: + Name: !Ref AWS::StackName + ProtocolType: HTTP + Target: !GetAtt Lambda.Arn + CredentialsArn: !GetAtt ApiRole.Arn + + ApiOverrides: + Type: AWS::ApiGatewayV2::ApiGatewayManagedOverrides + Properties: + ApiId: !Ref Api + Stage: + AccessLogSettings: + DestinationArn: !GetAtt ApiLogGroup.Arn + Format: '{"sourceIp":"$context.identity.sourceIp","httpMethod":"$context.httpMethod","path":"$context.path","status":"$context.status","responseLength":"$context.responseLength","responseLatency":"$context.responseLatency","requestTime":"$context.requestTime","protocol":"$context.protocol","userAgent":"$context.identity.userAgent","requestId":"$context.requestId"}' + + ApiLogGroup: + Type: AWS::Logs::LogGroup + Properties: + RetentionInDays: 180 + + ApiRole: + Type: AWS::IAM::Role + Properties: + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + Action: sts:AssumeRole + Principal: + Service: apigateway.amazonaws.com + Effect: Allow + Policies: + - PolicyName: policy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: lambda:InvokeFunction + Resource: !GetAtt Lambda.Arn + + CustomDomainName: + Type: AWS::ApiGatewayV2::DomainName + Properties: + DomainName: !Ref DomainName + DomainNameConfigurations: + - CertificateArn: !Ref CertificateArn + EndpointType: REGIONAL + + ApiMapping: + Type: AWS::ApiGatewayV2::ApiMapping + Properties: + ApiId: !Ref Api + DomainName: !Ref CustomDomainName + Stage: $default diff --git a/apps/api/src/api.py b/apps/api/src/api.py new file mode 100644 index 00000000..a537b4b0 --- /dev/null +++ b/apps/api/src/api.py @@ -0,0 +1 @@ +from stac_fastapi.pgstac.app import handler # noqa: F401 diff --git a/apps/cloudformation.yml b/apps/cloudformation.yml new file mode 100644 index 00000000..83d8a833 --- /dev/null +++ b/apps/cloudformation.yml @@ -0,0 +1,62 @@ +AWSTemplateFormatVersion: 2010-09-09 + +Parameters: + + GithubBranch: + Type: String + + CidrIp: + Type: String + + DatabaseAdminPassword: + Type: String + NoEcho: true + + DatabaseReadPassword: + Type: String + NoEcho: true + + DomainName: + Type: String + + CertificateArn: + Type: String + +Outputs: + + BuildProject: + Value: !GetAtt Database.Outputs.BuildProject + +Resources: + + VPC: + Type: AWS::CloudFormation::Stack + Properties: + Parameters: + EnvironmentName: !Ref AWS::StackName + TemplateURL: vpc/cloudformation.yml + + Api: + Type: AWS::CloudFormation::Stack + Properties: + Parameters: + DatabaseHost: !GetAtt Database.Outputs.DatabaseHost + DatabaseReadPassword: !Ref DatabaseReadPassword + SecurityGroupId: !GetAtt Database.Outputs.ClientSecurityGroupId + SubnetIds: !GetAtt VPC.Outputs.PublicSubnets + DomainName: !Ref DomainName + CertificateArn: !Ref CertificateArn + TemplateURL: api/cloudformation.yml + + Database: + Type: AWS::CloudFormation::Stack + Properties: + Parameters: + DatabaseAdminPassword: !Ref DatabaseAdminPassword + DatabaseReadPassword: !Ref DatabaseReadPassword + VpcId: !GetAtt VPC.Outputs.VPC + PublicSubnetIds: !GetAtt VPC.Outputs.PublicSubnets + PrivateSubnetIds: !GetAtt VPC.Outputs.PrivateSubnets + CidrIp: !Ref CidrIp + GithubBranch: !Ref GithubBranch + TemplateURL: database/cloudformation.yml diff --git a/apps/database/buildspec.yml b/apps/database/buildspec.yml new file mode 100644 index 00000000..c3b03f31 --- /dev/null +++ b/apps/database/buildspec.yml @@ -0,0 +1,14 @@ +version: 0.2 + +phases: + install: + runtime-versions: + python: 3.9 + commands: + - apt update + - apt install -y postgresql-12 + - make install-pypgstac + - pyenv rehash + build: + commands: + - make configure-database db_host=$PGHOST db_admin_password=$PGPASSWORD db_read_password=$READ_PASSWORD diff --git a/apps/database/cloudformation.yml b/apps/database/cloudformation.yml new file mode 100644 index 00000000..c5b76c9f --- /dev/null +++ b/apps/database/cloudformation.yml @@ -0,0 +1,170 @@ +AWSTemplateFormatVersion: 2010-09-09 + +Parameters: + + DatabaseAdminPassword: + Type: String + NoEcho: true + + DatabaseReadPassword: + Type: String + NoEcho: true + + VpcId: + Type: AWS::EC2::VPC::Id + + PublicSubnetIds: + Type: CommaDelimitedList + + PrivateSubnetIds: + Type: CommaDelimitedList + + CidrIp: + Type: String + + GithubBranch: + Type: String + +Outputs: + + BuildProject: + Value: !Ref CodeBuildProject + + ClientSecurityGroupId: + Value: !Ref ClientSecurityGroup + + DatabaseHost: + Value: !GetAtt DatabaseInstance.Endpoint.Address + +Resources: + + DatabaseInstance: + Type: AWS::RDS::DBInstance + Properties: + AllocatedStorage: '20' # TODO depends on StorageType? + DBInstanceClass: db.t3.micro # TODO different class for prod? + DBSubnetGroupName: !Ref DatabaseSubnetGroup + VPCSecurityGroups: + - !Ref DatabaseSecurityGroup + Engine: postgres + EngineVersion: '14.4' + MasterUsername: postgres + MasterUserPassword: !Ref DatabaseAdminPassword + # TODO: MaxAllocatedStorage: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-rds-dbinstance.html#cfn-rds-dbinstance-maxallocatedstorage + Port: '5432' + # TODO: ProcessorFeatures: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-rds-dbinstance.html#cfn-rds-dbinstance-processorfeatures + PubliclyAccessible: true + # TODO: StorageType: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-rds-dbinstance.html#cfn-rds-dbinstance-storagetype + + DatabaseSubnetGroup: + Type: AWS::RDS::DBSubnetGroup + Properties: + DBSubnetGroupDescription: !Sub "Subnet Group for ${AWS::StackName} database" + SubnetIds: !Ref PublicSubnetIds + + DatabaseSecurityGroup: + Type: AWS::EC2::SecurityGroup + Properties: + GroupDescription: !Sub "Security group for ${AWS::StackName} database" + VpcId: !Ref VpcId + SecurityGroupIngress: + - IpProtocol: tcp + CidrIp: !Ref CidrIp + FromPort: 5432 + ToPort: 5432 + - IpProtocol: tcp + SourceSecurityGroupId: !Ref ClientSecurityGroup + FromPort: 5432 + ToPort: 5432 + + ClientSecurityGroup: + Type: AWS::EC2::SecurityGroup + Properties: + GroupDescription: !Sub "Security group for clients of ${AWS::StackName} database" + VpcId: !Ref VpcId + + DatabaseSecret: + Type: AWS::SecretsManager::Secret + Properties: + Description: !Sub "${AWS::StackName} database credentials" + SecretString: !Sub '{"database_host": "${DatabaseInstance.Endpoint.Address}", "admin_user": "postgres", "admin_password": "${DatabaseAdminPassword}", "read_user": "pgstac_read", "read_password": "${DatabaseReadPassword}"}' + + CodeBuildProject: + Type: AWS::CodeBuild::Project + Properties: + Environment: + ComputeType: BUILD_GENERAL1_SMALL + Type: LINUX_CONTAINER + Image: aws/codebuild/standard:5.0 + EnvironmentVariables: + - Name: PGHOST + Type: SECRETS_MANAGER + Value: !Sub "${DatabaseSecret}:database_host" + - Name: PGPASSWORD + Type: SECRETS_MANAGER + Value: !Sub "${DatabaseSecret}:admin_password" + - Name: READ_PASSWORD + Type: SECRETS_MANAGER + Value: !Sub "${DatabaseSecret}:read_password" + ServiceRole: !Ref CodeBuildServiceRole + Source: + Type: GITHUB + Location: https://github.com/ASFHyP3/asf-stac.git + GitCloneDepth: 1 + BuildSpec: apps/database/buildspec.yml + SourceVersion: !Ref GithubBranch + Artifacts: + Type: NO_ARTIFACTS + VpcConfig: + VpcId: !Ref VpcId + Subnets: !Ref PrivateSubnetIds + SecurityGroupIds: + - !Ref ClientSecurityGroup + + CodeBuildLogGroup: + Type: AWS::Logs::LogGroup + Properties: + LogGroupName: !Sub "/aws/codebuild/${CodeBuildProject}" + RetentionInDays: 90 + + CodeBuildServiceRole: + Type: AWS::IAM::Role + Properties: + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + Action: sts:AssumeRole + Principal: + Service: codebuild.amazonaws.com + Effect: Allow + Policies: + - PolicyName: policy + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - logs:CreateLogStream + - logs:PutLogEvents + Resource: !Sub "arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/codebuild/*" + - Effect: Allow + Action: + - ec2:CreateNetworkInterface + - ec2:DescribeDhcpOptions + - ec2:DescribeNetworkInterfaces + - ec2:DeleteNetworkInterface + - ec2:DescribeSubnets + - ec2:DescribeSecurityGroups + - ec2:DescribeVpcs + Resource: "*" + - Effect: Allow + Action: secretsmanager:GetSecretValue + Resource: !Ref DatabaseSecret + - Effect: Allow + Action: ec2:CreateNetworkInterfacePermission + Resource: !Sub "arn:aws:ec2:${AWS::Region}:${AWS::AccountId}:network-interface/*" + Condition: + StringEquals: + ec2:AuthorizedService: codebuild.amazonaws.com + StringLike: + ec2:Subnet: !Sub "arn:aws:ec2:${AWS::Region}:${AWS::AccountId}:subnet/*" diff --git a/apps/vpc/cloudformation.yml b/apps/vpc/cloudformation.yml new file mode 100644 index 00000000..6f3af552 --- /dev/null +++ b/apps/vpc/cloudformation.yml @@ -0,0 +1,227 @@ +# https://docs.aws.amazon.com/codebuild/latest/userguide/cloudformation-vpc-template.html +Description: This template deploys a VPC, with a pair of public and private subnets spread + across two Availability Zones. It deploys an internet gateway, with a default + route on the public subnets. It deploys a pair of NAT gateways (one in each AZ), + and default routes for them in the private subnets. + +Parameters: + EnvironmentName: + Description: An environment name that is prefixed to resource names + Type: String + + VpcCIDR: + Description: Please enter the IP range (CIDR notation) for this VPC + Type: String + Default: 10.192.0.0/16 + + PublicSubnet1CIDR: + Description: Please enter the IP range (CIDR notation) for the public subnet in the first Availability Zone + Type: String + Default: 10.192.10.0/24 + + PublicSubnet2CIDR: + Description: Please enter the IP range (CIDR notation) for the public subnet in the second Availability Zone + Type: String + Default: 10.192.11.0/24 + + PrivateSubnet1CIDR: + Description: Please enter the IP range (CIDR notation) for the private subnet in the first Availability Zone + Type: String + Default: 10.192.20.0/24 + + PrivateSubnet2CIDR: + Description: Please enter the IP range (CIDR notation) for the private subnet in the second Availability Zone + Type: String + Default: 10.192.21.0/24 + +Resources: + VPC: + Type: AWS::EC2::VPC + Properties: + CidrBlock: !Ref VpcCIDR + EnableDnsSupport: true + EnableDnsHostnames: true + Tags: + - Key: Name + Value: !Ref EnvironmentName + + InternetGateway: + Type: AWS::EC2::InternetGateway + Properties: + Tags: + - Key: Name + Value: !Ref EnvironmentName + + InternetGatewayAttachment: + Type: AWS::EC2::VPCGatewayAttachment + Properties: + InternetGatewayId: !Ref InternetGateway + VpcId: !Ref VPC + + PublicSubnet1: + Type: AWS::EC2::Subnet + Properties: + VpcId: !Ref VPC + AvailabilityZone: !Select [ 0, !GetAZs '' ] + CidrBlock: !Ref PublicSubnet1CIDR + MapPublicIpOnLaunch: true + Tags: + - Key: Name + Value: !Sub ${EnvironmentName} Public Subnet (AZ1) + + PublicSubnet2: + Type: AWS::EC2::Subnet + Properties: + VpcId: !Ref VPC + AvailabilityZone: !Select [ 1, !GetAZs '' ] + CidrBlock: !Ref PublicSubnet2CIDR + MapPublicIpOnLaunch: true + Tags: + - Key: Name + Value: !Sub ${EnvironmentName} Public Subnet (AZ2) + + PrivateSubnet1: + Type: AWS::EC2::Subnet + Properties: + VpcId: !Ref VPC + AvailabilityZone: !Select [ 0, !GetAZs '' ] + CidrBlock: !Ref PrivateSubnet1CIDR + MapPublicIpOnLaunch: false + Tags: + - Key: Name + Value: !Sub ${EnvironmentName} Private Subnet (AZ1) + + PrivateSubnet2: + Type: AWS::EC2::Subnet + Properties: + VpcId: !Ref VPC + AvailabilityZone: !Select [ 1, !GetAZs '' ] + CidrBlock: !Ref PrivateSubnet2CIDR + MapPublicIpOnLaunch: false + Tags: + - Key: Name + Value: !Sub ${EnvironmentName} Private Subnet (AZ2) + + NatGateway1EIP: + Type: AWS::EC2::EIP + DependsOn: InternetGatewayAttachment + Properties: + Domain: vpc + + NatGateway2EIP: + Type: AWS::EC2::EIP + DependsOn: InternetGatewayAttachment + Properties: + Domain: vpc + + NatGateway1: + Type: AWS::EC2::NatGateway + Properties: + AllocationId: !GetAtt NatGateway1EIP.AllocationId + SubnetId: !Ref PublicSubnet1 + + NatGateway2: + Type: AWS::EC2::NatGateway + Properties: + AllocationId: !GetAtt NatGateway2EIP.AllocationId + SubnetId: !Ref PublicSubnet2 + + PublicRouteTable: + Type: AWS::EC2::RouteTable + Properties: + VpcId: !Ref VPC + Tags: + - Key: Name + Value: !Sub ${EnvironmentName} Public Routes + + DefaultPublicRoute: + Type: AWS::EC2::Route + DependsOn: InternetGatewayAttachment + Properties: + RouteTableId: !Ref PublicRouteTable + DestinationCidrBlock: 0.0.0.0/0 + GatewayId: !Ref InternetGateway + + PublicSubnet1RouteTableAssociation: + Type: AWS::EC2::SubnetRouteTableAssociation + Properties: + RouteTableId: !Ref PublicRouteTable + SubnetId: !Ref PublicSubnet1 + + PublicSubnet2RouteTableAssociation: + Type: AWS::EC2::SubnetRouteTableAssociation + Properties: + RouteTableId: !Ref PublicRouteTable + SubnetId: !Ref PublicSubnet2 + + + PrivateRouteTable1: + Type: AWS::EC2::RouteTable + Properties: + VpcId: !Ref VPC + Tags: + - Key: Name + Value: !Sub ${EnvironmentName} Private Routes (AZ1) + + DefaultPrivateRoute1: + Type: AWS::EC2::Route + Properties: + RouteTableId: !Ref PrivateRouteTable1 + DestinationCidrBlock: 0.0.0.0/0 + NatGatewayId: !Ref NatGateway1 + + PrivateSubnet1RouteTableAssociation: + Type: AWS::EC2::SubnetRouteTableAssociation + Properties: + RouteTableId: !Ref PrivateRouteTable1 + SubnetId: !Ref PrivateSubnet1 + + PrivateRouteTable2: + Type: AWS::EC2::RouteTable + Properties: + VpcId: !Ref VPC + Tags: + - Key: Name + Value: !Sub ${EnvironmentName} Private Routes (AZ2) + + DefaultPrivateRoute2: + Type: AWS::EC2::Route + Properties: + RouteTableId: !Ref PrivateRouteTable2 + DestinationCidrBlock: 0.0.0.0/0 + NatGatewayId: !Ref NatGateway2 + + PrivateSubnet2RouteTableAssociation: + Type: AWS::EC2::SubnetRouteTableAssociation + Properties: + RouteTableId: !Ref PrivateRouteTable2 + SubnetId: !Ref PrivateSubnet2 + +Outputs: + VPC: + Description: A reference to the created VPC + Value: !Ref VPC + + PublicSubnets: + Description: A list of the public subnets + Value: !Join [ ",", [ !Ref PublicSubnet1, !Ref PublicSubnet2 ]] + + PrivateSubnets: + Description: A list of the private subnets + Value: !Join [ ",", [ !Ref PrivateSubnet1, !Ref PrivateSubnet2 ]] + + PublicSubnet1: + Description: A reference to the public subnet in the 1st Availability Zone + Value: !Ref PublicSubnet1 + + PublicSubnet2: + Description: A reference to the public subnet in the 2nd Availability Zone + Value: !Ref PublicSubnet2 + + PrivateSubnet1: + Description: A reference to the private subnet in the 1st Availability Zone + Value: !Ref PrivateSubnet1 + + PrivateSubnet2: + Description: A reference to the private subnet in the 2nd Availability Zone + Value: !Ref PrivateSubnet2 diff --git a/configure-database-roles.sql b/configure-database-roles.sql new file mode 100644 index 00000000..f2eb6981 --- /dev/null +++ b/configure-database-roles.sql @@ -0,0 +1,4 @@ +\set ON_ERROR_STOP TRUE + +ALTER ROLE pgstac_read LOGIN PASSWORD :'db_read_password'; +GRANT SELECT ON pgstac.collections TO pgstac_read; diff --git a/data/france_urls.txt b/data/france_urls.txt new file mode 100644 index 00000000..eb1c9bd9 --- /dev/null +++ b/data/france_urls.txt @@ -0,0 +1,150 @@ +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N42E000/N42E000_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N42E001/N42E001_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N42E002/N42E002_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N42E003/N42E003_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N42E004/N42E004_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N42E005/N42E005_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N42E006/N42E006_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N42E007/N42E007_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N42E008/N42E008_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N42E009/N42E009_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N42W001/N42W001_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N42W002/N42W002_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N42W003/N42W003_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N42W004/N42W004_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N42W005/N42W005_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N43E000/N43E000_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N43E001/N43E001_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N43E002/N43E002_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N43E003/N43E003_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N43E004/N43E004_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N43E005/N43E005_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N43E006/N43E006_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N43E007/N43E007_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N43E008/N43E008_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N43E009/N43E009_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N43W001/N43W001_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N43W002/N43W002_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N43W003/N43W003_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N43W004/N43W004_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N43W005/N43W005_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N44E000/N44E000_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N44E001/N44E001_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N44E002/N44E002_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N44E003/N44E003_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N44E004/N44E004_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N44E005/N44E005_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N44E006/N44E006_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N44E007/N44E007_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N44E008/N44E008_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N44E009/N44E009_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N44W001/N44W001_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N44W002/N44W002_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N44W003/N44W003_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N44W004/N44W004_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N44W005/N44W005_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N45E000/N45E000_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N45E001/N45E001_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N45E002/N45E002_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N45E003/N45E003_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N45E004/N45E004_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N45E005/N45E005_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N45E006/N45E006_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N45E007/N45E007_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N45E008/N45E008_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N45E009/N45E009_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N45W001/N45W001_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N45W002/N45W002_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N45W003/N45W003_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N45W004/N45W004_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N45W005/N45W005_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N46E000/N46E000_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N46E001/N46E001_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N46E002/N46E002_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N46E003/N46E003_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N46E004/N46E004_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N46E005/N46E005_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N46E006/N46E006_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N46E007/N46E007_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N46E008/N46E008_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N46E009/N46E009_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N46W001/N46W001_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N46W002/N46W002_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N46W003/N46W003_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N46W004/N46W004_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N46W005/N46W005_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N47E000/N47E000_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N47E001/N47E001_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N47E002/N47E002_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N47E003/N47E003_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N47E004/N47E004_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N47E005/N47E005_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N47E006/N47E006_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N47E007/N47E007_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N47E008/N47E008_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N47E009/N47E009_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N47W001/N47W001_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N47W002/N47W002_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N47W003/N47W003_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N47W004/N47W004_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N47W005/N47W005_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N48E000/N48E000_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N48E001/N48E001_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N48E002/N48E002_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N48E003/N48E003_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N48E004/N48E004_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N48E005/N48E005_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N48E006/N48E006_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N48E007/N48E007_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N48E008/N48E008_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N48E009/N48E009_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N48W001/N48W001_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N48W002/N48W002_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N48W003/N48W003_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N48W004/N48W004_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N48W005/N48W005_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N49E000/N49E000_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N49E001/N49E001_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N49E002/N49E002_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N49E003/N49E003_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N49E004/N49E004_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N49E005/N49E005_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N49E006/N49E006_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N49E007/N49E007_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N49E008/N49E008_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N49E009/N49E009_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N49W001/N49W001_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N49W002/N49W002_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N49W003/N49W003_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N49W004/N49W004_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N49W005/N49W005_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N50E000/N50E000_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N50E001/N50E001_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N50E002/N50E002_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N50E003/N50E003_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N50E004/N50E004_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N50E005/N50E005_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N50E006/N50E006_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N50E007/N50E007_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N50E008/N50E008_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N50E009/N50E009_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N50W001/N50W001_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N50W002/N50W002_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N50W003/N50W003_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N50W004/N50W004_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N50W005/N50W005_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N51E000/N51E000_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N51E001/N51E001_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N51E002/N51E002_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N51E003/N51E003_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N51E004/N51E004_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N51E005/N51E005_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N51E006/N51E006_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N51E007/N51E007_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N51E008/N51E008_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N51E009/N51E009_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N51W001/N51W001_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N51W002/N51W002_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N51W003/N51W003_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N51W004/N51W004_summer_vv_COH12.tif +https://sentinel-1-global-coherence-earthbigdata.s3.us-west-2.amazonaws.com/data/tiles/N51W005/N51W005_summer_vv_COH12.tif diff --git a/ingest_data.py b/ingest_data.py new file mode 100644 index 00000000..b5c02529 --- /dev/null +++ b/ingest_data.py @@ -0,0 +1,70 @@ +""" +Adds a STAC dataset to a STAC API application. Skips objects that +already exist. + +Assumes that the dataset is arranged as a tree (connected acyclic +graph). This means that all objects should be reachable from the root +object and there should be no cycles. + +Assumes that links to child objects are specified as relative +filesystem paths. + +Assumes that the STAC API supports the Transaction extension. +""" + +import argparse +import json +from pathlib import Path +from urllib.parse import urljoin + +import requests + + +def traverse(stac_object_path: Path, api_url: str) -> None: + with open(stac_object_path, 'r') as f: + stac_object: dict = json.load(f) + + print(f'Adding STAC object {stac_object_path}') + add_stac_object(stac_object, api_url) + + for child_path in get_child_paths(stac_object, stac_object_path.parent): + traverse(child_path, api_url) + + +def get_child_paths(stac_object: dict, parent_path: Path) -> list[Path]: + return [ + parent_path / link_object['href'] + for link_object in stac_object['links'] if link_object['rel'] in ('child', 'item') + ] + + +def add_stac_object(stac_object: dict, api_url: str) -> None: + if stac_object['type'] in ('Catalog', 'Collection'): + endpoint = '/collections' + else: + assert stac_object['type'] == 'Feature' + endpoint = f'/collections/{stac_object["collection"]}/items' + + url = urljoin(api_url, endpoint) + response = requests.post(url, json=stac_object) + + if response.status_code == 409: + print('Skipping existing object') + else: + response.raise_for_status() + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument('root_object_path', help='Filesystem path to the root STAC object.') + parser.add_argument('api_url', help='URL for the STAC API.') + return parser.parse_args() + + +def main() -> None: + args = parse_args() + traverse(Path(args.root_object_path), args.api_url) + + +if __name__ == '__main__': + main() diff --git a/install-or-upgrade-postgis.sql b/install-or-upgrade-postgis.sql new file mode 100644 index 00000000..f701aa06 --- /dev/null +++ b/install-or-upgrade-postgis.sql @@ -0,0 +1,5 @@ +\set ON_ERROR_STOP TRUE +\set postgis_version '3.1.5' + +CREATE EXTENSION IF NOT EXISTS postgis WITH VERSION :'postgis_version'; +ALTER EXTENSION postgis UPDATE TO :'postgis_version'; diff --git a/requirements-apps-api.txt b/requirements-apps-api.txt new file mode 100644 index 00000000..799a607a --- /dev/null +++ b/requirements-apps-api.txt @@ -0,0 +1,6 @@ +mangum==0.16.0 +pygeofilter==0.2.0 +stac-fastapi.api==2.4.1 +stac-fastapi.extensions==2.4.1 +stac-fastapi.pgstac==2.3.0 +stac-fastapi.types==2.4.1 diff --git a/requirements-run-codebuild.txt b/requirements-run-codebuild.txt new file mode 100644 index 00000000..c366ded5 --- /dev/null +++ b/requirements-run-codebuild.txt @@ -0,0 +1 @@ +boto3==1.26.12 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..3265b9ce --- /dev/null +++ b/requirements.txt @@ -0,0 +1,11 @@ +-r requirements-apps-api.txt +-r requirements-run-codebuild.txt +boto3==1.26.12 +cfn-lint==0.71.1 +flake8==5.0.4 +pypgstac[psycopg]==0.6.10 +pystac==1.6.1 +requests==2.28.1 +shapely==1.8.5.post1 +tqdm==4.64.1 +uvicorn==0.19.0 diff --git a/run_codebuild.py b/run_codebuild.py new file mode 100644 index 00000000..ff59412b --- /dev/null +++ b/run_codebuild.py @@ -0,0 +1,35 @@ +import os +import sys +import time + +import boto3 + +CLIENT = boto3.client('codebuild') + + +def main() -> None: + project_name = os.environ['CODEBUILD_PROJECT'] + print(f'Starting CodeBuild for project {project_name}') + response = CLIENT.start_build(projectName=project_name) + + build_id = response['build']['id'] + print(f'Build ID: {build_id}') + + build_status = response['build']['buildStatus'] + print(f'Build status: {build_status}') + + while build_status == 'IN_PROGRESS': + time.sleep(5) + + response = CLIENT.batch_get_builds(ids=[build_id]) + assert len(response['builds']) == 1 + + build_status = response['builds'][0]['buildStatus'] + print(f'Build status: {build_status}') + + if build_status != 'SUCCEEDED': + sys.exit(f'CodeBuild failed with status {build_status}') + + +if __name__ == '__main__': + main() diff --git a/src/coherence_stac.py b/src/coherence_stac.py new file mode 100644 index 00000000..52d198e6 --- /dev/null +++ b/src/coherence_stac.py @@ -0,0 +1,261 @@ +from concurrent.futures import ThreadPoolExecutor +from datetime import datetime +from itertools import repeat +from pathlib import Path + +import boto3 +import pystac +from pystac.extensions import sar +from shapely import geometry +from tqdm import tqdm + +''' +Design: + +[x] connect to aws open bucket containing coherence data +[x] ls the prefixes we're interested in +[x] parse names (N00E014_007D_inc.tif) -> bbox, season, polarization, dataset +[x] create base sentinel-1 stac item using https://github.com/stac-extensions/sar +[x] add coherence metadata to base +[x] add to catalog +[x] save catalog locally + +structure: + item (tile + season) + inc + lsmap + COH(all 4) + rho + rmse + tau +''' + +SENTINEL1_CENTER_FREQUENCY = 5.405 +SEASONS = { + 'WINTER': (datetime(2019, 12, 1), datetime(2020, 2, 28)), + 'SPRING': (datetime(2020, 3, 1), datetime(2020, 5, 31)), + 'SUMMER': (datetime(2020, 6, 1), datetime(2020, 8, 31)), + 'FALL': (datetime(2020, 9, 1), datetime(2020, 11, 30)), +} +LICENSE = 'Creative Commons Attribution 4.0 International Public License' + +DATA_CITATION = ( + 'Kellndorfer, J. , O. Cartus, M. Lavalle, C. Magnard, P. Milillo, S. Oveisgharan, B. Osmanoglu, ' + 'P. Rosen, and U. Wegmuller. 2022. Global seasonal Sentinel-1 interferometric coherence and backscatter data set. ' + '[Indicate subset used]. Fairbanks, Alaska USA. NASA Alaska Satellite Facility Synthetic Aperture Radar ' + 'Distributed Active Archive Center. doi: https://doi.org/10.5067/8W33RRS6S2RV. [Date Accessed].' +) + +LITERATURE_CITATION = ( + 'Kellndorfer, J. , O. Cartus, M. Lavalle, C. Magnard, P. Milillo, S. Oveisgharan, B. ' + 'Osmanoglu, P. Rosen, and U. Wegmuller. 2022. Global seasonal Sentinel-1 interferometric coherence and ' + 'backscatter data set., Scientific Data. https://doi.org/10.1038/s41597-022-01189-6' +) + +DESCRIPTION = ( + 'This data set is the first-of-its-kind spatial representation of multi-seasonal, global C-band ' + 'Synthetic Aperture Radar (SAR) interferometric repeat-pass coherence and backscatter signatures. Coverage ' + 'comprises land masses and ice sheets from 82° Northern to 79° Southern latitudes. The data set is derived from ' + 'multi-temporal repeat-pass interferometric processing of about 205,000 Sentinel-1 C-band SAR images acquired in ' + 'Interferometric Wide-Swath Mode from 1-Dec-2019 to 30-Nov-2020. The data set encompasses three sets of seasonal ' + '(December-February, March-May, June-August, September-November) metrics produced with a pixel spacing of three ' + 'arcseconds: 1) Median 6-, 12-, 18-, 24-, 36-, and 48-days repeat-pass coherence at VV or HH polarizations, 2) ' + 'Mean radiometrically terrain corrected backscatter (γ0) at VV and VH, or HH and HV polarizations, and 3) ' + 'Estimated parameters of an exponential coherence decay model. The data set has been produced to obtain global, ' + 'spatially detailed information on how decorrelation affects interferometric measurements of surface displacement ' + 'and is rich in spatial and temporal information for a variety of mapping applications.' +) + + +def construct_url(s3_client, bucket, key): + location = s3_client.get_bucket_location(Bucket=bucket)['LocationConstraint'] + url = f'https://{bucket}.s3.{location}.amazonaws.com/{key}' + return url + + +def get_object_urls(s3_client, bucket, prefix, requester_pays=False): + kwargs = {'RequestPayer': 'requester'} if requester_pays else {} + response = s3_client.list_objects_v2(Bucket=bucket, Prefix=prefix, **kwargs) + keys = [x['Key'] for x in response['Contents']] + urls = [construct_url(s3_client, bucket, x) for x in keys] + return urls + + +# TODO why is there an extra zero in N48W090, will this cause issues? +def tileid_to_bbox(tileid): + north = int(tileid[1:3]) + if tileid[0] == 'S': + north *= -1 + south = north - 1 + + west = int(tileid[4:7]) + if tileid[3] == 'W': + west *= -1 + east = west + 1 + bbox = geometry.box(west, south, east, north) + return bbox + + +def parse_url(url): + parts = Path(url.upper()).stem.split('_') + if len(parts) == 3: + tileid, orbit, product = parts + bbox = tileid_to_bbox(tileid) + metadata = {'url': url, 'bbox': bbox, 'tileid': tileid, 'product': product} + return metadata + + tileid, season, polarization, product = parts + bbox = tileid_to_bbox(tileid) + date_range = SEASONS[season] + metadata = { + 'url': url, + 'bbox': bbox, + 'tileid': tileid, + 'product': product, + 'date_range': date_range, + 'season': season, + 'polarization': polarization, + } + return metadata + + +def create_stac_item(yearly_assets, seasonal_assets): + ex_asset = seasonal_assets[0] + item_id = f'{ex_asset["tileid"]}_{ex_asset["season"]}' + start_date = ex_asset['date_range'][0] + end_date = ex_asset['date_range'][1] + mid_date = start_date + (end_date - start_date) / 2 + properties = { + 'tileid': ex_asset['tileid'], + 'season': ex_asset['season'], + 'start_datetime': start_date.isoformat(), + 'end_datetime': end_date.isoformat(), + } + item = pystac.Item( + id=item_id, + geometry=geometry.mapping(ex_asset['bbox']), + bbox=ex_asset['bbox'].bounds, + datetime=mid_date, + properties=properties, + ) + + ext_sar = sar.SarExtension.ext(item, add_if_missing=True) + ext_sar.apply( + 'IW', + sar.FrequencyBand('C'), + [sar.Polarization('VV'), sar.Polarization('VH')], + 'COH', + SENTINEL1_CENTER_FREQUENCY, + looks_range=12, + looks_azimuth=3, + observation_direction=sar.ObservationDirection('right'), + ) + + for asset in yearly_assets: + item.add_asset( + key=asset['product'], + asset=pystac.Asset(href=asset['url'], media_type=pystac.MediaType.GEOTIFF), + ) + + for asset in seasonal_assets: + key = f'{asset["product"]}_{asset["polarization"]}' + asset_properties = {'polarization': asset['polarization']} + if 'COH' in asset['product']: + asset_properties['product'] = 'COH' + asset_properties['temporal_separation'] = f'{int(asset["product"][-2:])} days' + else: + asset_properties['product'] = asset['product'] + + item.add_asset( + key=key, + asset=pystac.Asset(href=asset['url'], media_type=pystac.MediaType.GEOTIFF, extra_fields=asset_properties), + ) + return item + + +def create_tile_stac_collection( + s3_client, bucket, prefix, date_interval=(datetime(2019, 12, 1), datetime(2020, 11, 30)) +): + urls = get_object_urls(s3_client, bucket, prefix, requester_pays=True) + asset_dicts = [parse_url(x) for x in urls] + items = [] + + yearly_assets = [x for x in asset_dicts if ('inc' in x['url']) or ('lsmap' in x['url'])] + for season in ('spring', 'summer', 'fall', 'winter'): + seasonal_assets = [x for x in asset_dicts if season in x['url']] + item = create_stac_item(yearly_assets, seasonal_assets) + items.append(item) + + tileid = asset_dicts[0]['tileid'] + spatial_extent = pystac.SpatialExtent(items[0].bbox) + temporal_extent = pystac.TemporalExtent(intervals=[date_interval]) + collection_extent = pystac.Extent(spatial=spatial_extent, temporal=temporal_extent) + collection = pystac.Collection( + id=tileid, + description=f'Sentinel-1 Coherence Tile {tileid}', + extent=collection_extent, + ) + collection.add_items(items) + return collection + + +def create_stac_catalog(): + extra_fields = {'License': LICENSE, 'Data Citation': DATA_CITATION, 'Literature Citation': LITERATURE_CITATION} + catalog = pystac.Catalog( + id='sentinel-1-global-coherence-earthbigdata', + description=DESCRIPTION, + catalog_type=pystac.CatalogType.RELATIVE_PUBLISHED, + extra_fields=extra_fields, + ) + return catalog + + +def save_stac_catalog_locally(catalog, catalog_name: str): + catalog_name = Path(catalog_name) + if not catalog_name.exists(): + catalog_name.mkdir() + catalog.normalize_hrefs(str(catalog_name)) + catalog.save() + return catalog_name / 'catalog.json' + + +def save_stac_catalog_s3(catalog, s3_client, bucket, key): + base_url = Path(construct_url(s3_client, bucket, key)) + catalog_name = base_url.name + catalog.normalize_hrefs(str(base_url)) + catalog.save(dest_href=catalog_name) + return catalog_name + + +def parse_france_list(data_path): + with open(data_path, 'r') as f: + urls = [x.strip() for x in f.readlines()] + tileids = [parse_url(x)['tileid'] for x in urls] + return list(set(tileids)) + + +if __name__ == '__main__': + bucket = 'sentinel-1-global-coherence-earthbigdata' + upload_bucket = 'ffwilliams2-shenanigans' + upload_key = 'stac/coherence_stac' + # tiles = ['N48W005', 'N49W005'] + tiles = parse_france_list('data/france_urls.txt') + prefixes = [f'data/tiles/{x}/' for x in tiles] + s3 = boto3.client('s3') + + # Multi-thread + print('creating...') + with ThreadPoolExecutor(max_workers=20) as executor: + results = list( + tqdm(executor.map(create_tile_stac_collection, repeat(s3), repeat(bucket), prefixes), total=len(prefixes)) + ) + catalog = create_stac_catalog() + for collection in results: + catalog.add_child(collection) + + catalog_name = save_stac_catalog_s3(catalog, s3, upload_bucket, upload_key) + jsons = [str(x) for x in Path(catalog_name).glob('**/*json')] + json_keys = [str(Path(upload_key).parent / x) for x in jsons] + print('uploading...') + with ThreadPoolExecutor(max_workers=20) as executor: + _ = list(tqdm(executor.map(s3.upload_file, jsons, repeat(upload_bucket), json_keys), total=len(jsons)))