diff --git a/scripts/generate_apache_file.py b/scripts/generate_apache_file.py deleted file mode 100644 index 56641ca..0000000 --- a/scripts/generate_apache_file.py +++ /dev/null @@ -1,198 +0,0 @@ -""" -This file (re)generates the apache hosts file for the active site -with all needed redirects for the various existing containers. - -requirements: the `docker` python package -""" -import logging -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - -### CONFIGURABLE CONFIGURATION -SERVER_NAME = "archive-live-optimade.materialscloud.org" # Used to generate the Apache file -DEMO_MODE = False # If True, do not check docker containers but just return fake data - -# Only filter containers from these images; must be a set -VALID_IMAGE_TAGS = {'optimade-python-tools-server:latest'} # TODO REPLACE HERE WITH CORRECT (SET OF) BASE IMAGE NAME(S) -# Check the exposed port pointing to this internal port in the container; skip if this port is not exposed, or if -# it is not exposed to 0.0.0.0 in the Host -INTERNAL_WEB_PORT = '5000/tcp' - - -### STATIC CONFIGURATION - -main_template = """ -DirectoryIndex index.php index.html - - - ServerName {SERVER_NAME} - DocumentRoot "/var/www/html" - - - AllowOverride All - Options -Indexes +FollowSymLinks - Require all granted - - Alias /.well-known/acme-challenge /var/www/letsencrypt/.well-known/acme-challenge - - Order allow,deny - Allow from all - - - # redirect all traffic to SSL, unless a specific VirtualHost for *:80 is specified, - # which would take precedence over the default virtual host. - # Make an exception for the location required for the Letsencrypt/ACME client challenge file - RewriteEngine on - RewriteCond %{{HTTPS}} !=on - RewriteCond %{{REQUEST_URI}} !/.well-known/acme-challenge - RewriteRule .* https://%{{SERVER_NAME}}%{{REQUEST_URI}} [R=301,L] - - - - - ServerName {SERVER_NAME} - DocumentRoot "/var/www/html" - - SSLEngine on - SSLCipherSuite AES256+EECDH:AES256+EDH - SSLProtocol All -SSLv2 -SSLv3 -TLSv1 - SSLHonorCipherOrder On - SSLCompression off - SSLCertificateFile /etc/letsencrypt/live/{SERVER_NAME}/fullchain.pem - SSLCertificateKeyFile /etc/letsencrypt/live/{SERVER_NAME}/privkey.pem - - - AllowOverride All - Options -Indexes +FollowSymLinks - Require all granted - - - SSLProxyEngine on - RewriteEngine on - - {CONTAINER_REDIRECTS} - - -""" - -template_container_redirect = """ - RewriteRule ^/{URL}$ /{URL}/ [R=301,L] - - ProxyPreserveHost On - ProxyPass http://localhost:{PORT}/ upgrade=ANY - ProxyPassReverse http://localhost:{PORT}/ - RequestHeader set X-Script-Name /{URL} - RequestHeader set X-Scheme https - -""" - -#### -# FUNCTIONALITY - -def get_url_from_container_name(name, url_prefix=''): - """Given the container name, extract back the part of the URL. - - This URL part will be used in apache redirects. - - The rule is the following. For a DOI like: - - https://doi.org/10.24435/materialscloud:qt-4b - - potential urls: - - 10.24435/materialscloud:qt-4b - 10.24435/materialscloud:2017.0008/v1 - - because container name doesn't allow '/', ':', ... - consider a mapping from url to container name by - - _ -> _u_ - : -> _c_ - / -> _s_ - - (Note: currently not implemented!) - - We then accept only container names starting with `10.24435_` and the URL - is obtained from the rest, possibly prepending an URL prefix (from the - kwargs of this function) - """ - valid_prefix = '10.24435_' - if not name.startswith(valid_prefix): - raise ValueError("Invalid container name") - - return f'{url_prefix}{name.replace("=", ":")[len(valid_prefix):]}' - -def get_container_metas(demo=False): - if demo: - return [ - { - "URL": "url1", - "PORT": "12341", - }, - { - "URL": "url2", - "PORT": "12342", - }, - ] - - import docker - - container_metas = [] - - client = docker.from_env() - # Only running ones - containers = client.containers.list() - for container in containers: - if not VALID_IMAGE_TAGS.intersection(container.image.tags): - # Probably there is a better way to filter in list(), for now I - # just skip anything that does not start from one of the provided tags - logger.info(f"[INCOMPATIBLE IMAGE] Skipping {container.name=} with {container.image.tags=}") - continue - - try: - host_ports = container.ports[INTERNAL_WEB_PORT] - except KeyError: - logger.info(f"[NO VALID PORT] Skipping {container.name=} with {container.image.tags=}, {container.ports=}") - continue - - host_port = None - for host_port_meta in host_ports: - if host_port_meta['HostIp'] == '0.0.0.0': - host_port = host_port_meta['HostPort'] - break - if host_port is None: - logger.info(f"[NO PORT EXPOSED ON 0.0.0.0] Skipping {container.name=} with {container.image.tags=}, {container.ports=}") - break - - try: - url = get_url_from_container_name(container.name) - except ValueError as exc: - logger.info(f'[INVALID CONTAINER NAME] Skipping {container.name=}, invalid prefix') - break - - logger.info(f'>> {container.short_id=}, {container.name=}, {host_port=}') - - container_metas.append({ - "URL": url, - "PORT": host_port - }) - - return container_metas - - -def generate_vhosts(demo): - container_redirects = [] - for container_meta in get_container_metas(demo=demo): - container_redirects.append(template_container_redirect.format( - **container_meta - )) - - vhosts_file = main_template.format( - SERVER_NAME = SERVER_NAME, - CONTAINER_REDIRECTS = "\n\n".join(container_redirects) - ) - return vhosts_file - -if __name__ == "__main__": - #print(generate_vhosts(demo=DEMO_MODE)) - print(get_container_metas(demo=DEMO_MODE)) diff --git a/src/mcloud_implementation/apache-vhosts-sockets.conf b/src/mcloud_implementation/apache-vhosts-sockets.conf new file mode 100644 index 0000000..dd8b6f0 --- /dev/null +++ b/src/mcloud_implementation/apache-vhosts-sockets.conf @@ -0,0 +1,87 @@ + + ServerName dev-optimade.materialscloud.org + + # Let's encrypt certbot ACME challenge for certificate renewal + Alias /.well-known/acme-challenge /var/www/letsencrypt/.well-known/acme-challenge + + Order allow,deny + Allow from all + + + # redirect all traffic to SSL, unless a specific VirtualHost for *:80 is specified, + # which would take precedence over the default virtual host. + # Make an exception for the location required for the Letsencrypt/ACME client challenge file + RewriteEngine on + RewriteCond %{HTTPS} !=on + RewriteCond %{REQUEST_URI} !/.well-known/acme-challenge + RewriteRule .* https://%{SERVER_NAME}%{REQUEST_URI} [R=301,L] + + + + + ServerName dev-optimade.materialscloud.org + + # Let's encrypt SSL certificate + SSLEngine on + SSLCertificateFile /etc/letsencrypt/live/dev-optimade.materialscloud.org/fullchain.pem + SSLCertificateKeyFile /etc/letsencrypt/live/dev-optimade.materialscloud.org/privkey.pem + + + DocumentRoot /var/www/html + + Options Indexes FollowSymLinks + AllowOverride All + Require all granted + + + ProxyPreserveHost On + + # Log redirection + LogLevel alert rewrite:trace3 + + # Define a custom log format including Apache variables (%U is REQUEST_URI) + LogFormat "%h %l %u %t \"%r\" %>s %b %{Host}i %U" custom_log + CustomLog /var/log/apache2/access.log custom_log + + # ---- + # index metadb + ProxyPass /index http://localhost:3214 + ProxyPassReverse /index http://localhost:3214 + # ---- + + RewriteEngine On + + # --------------------------- + # Direct /archive/$1/$2 to the corresponding unix socket $1.sock + # This seems to work... + + # always append a trailing slash + RewriteRule ^/{URL}$ /{URL}/ [R=301,L] + + # don't process files or directories + RewriteCond %{REQUEST_FILENAME} !-f + RewriteCond %{REQUEST_FILENAME} !-d + + # don't process the root path + RewriteCond %{REQUEST_URI} !^/$ + RewriteCond %{REQUEST_URI} !^/index.html$ + + # The following could be used to check if files corresponding to URI exists, but + # unfortunately this only works with regular files (not unix sockets) + #RewriteCond %{REQUEST_URI} ^/([^/]+)/?(.*)$ + # check that socket corresponding to $1 exists + #RewriteCond /home/ubuntu/sockets/$1.sock -f + + # redirect to the socket + RewriteRule ^/archive/([^/]+)/?(.*)$ unix:/home/ubuntu/optimade-sockets/$1.sock|http://127.0.0.1/$2 [QSA,P] + + # PROBLEM: + # The RewriteRule above doesn't pass through the query parameters (although QSA is specified) + # and I didn't find a way to fix it. Interestingly, when using a using a normal proxy, the + # query parameters are passed correctly, e.g. + # ProxyPass /test unix:/home/ubuntu/optimade-sockets/test.sock|http://127.0.0.1/v1/structures + # ProxyPassReverse /test unix:/home/ubuntu/optimade-sockets/test.sock|http://127.0.0.1/v1/structures + + # --------------------------- + + diff --git a/src/mcloud_implementation/generate_apache_file.py b/src/mcloud_implementation/generate_apache_file.py new file mode 100755 index 0000000..f602da5 --- /dev/null +++ b/src/mcloud_implementation/generate_apache_file.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 + +""" +This file (re)generates the apache hosts file for the active site +with all needed redirects for the various existing containers. + +requirements: the `docker` python package +""" +import logging + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +import docker + +### STATIC CONFIGURATION + +main_template = """ +DirectoryIndex index.php index.html + + + ServerName {SERVER_NAME} + + DocumentRoot "/var/www/html" + + AllowOverride All + Options -Indexes +FollowSymLinks + Require all granted + + + Alias /.well-known/acme-challenge /var/www/letsencrypt/.well-known/acme-challenge + + Order allow,deny + Allow from all + + + # redirect all traffic to SSL, unless a specific VirtualHost for *:80 is specified, + # which would take precedence over the default virtual host. + # Make an exception for the location required for the Letsencrypt/ACME client challenge file + RewriteEngine on + RewriteCond %{{HTTPS}} !=on + RewriteCond %{{REQUEST_URI}} !/.well-known/acme-challenge + RewriteRule .* https://%{{SERVER_NAME}}%{{REQUEST_URI}} [R=301,L] + + + + + ServerName {SERVER_NAME} + + SSLEngine on + SSLCipherSuite AES256+EECDH:AES256+EDH + SSLProtocol All -SSLv2 -SSLv3 -TLSv1 + SSLHonorCipherOrder On + SSLCompression off + SSLCertificateFile /etc/letsencrypt/live/{SERVER_NAME}/fullchain.pem + SSLCertificateKeyFile /etc/letsencrypt/live/{SERVER_NAME}/privkey.pem + + DocumentRoot "/var/www/html" + + AllowOverride All + Options -Indexes +FollowSymLinks + Require all granted + + + SSLProxyEngine on + RewriteEngine on + + # --------------------------- + # index metadb + ProxyPass /index http://localhost:{INDEX_METADB_PORT} + ProxyPassReverse /index http://localhost:{INDEX_METADB_PORT} + # --------------------------- + + {CONTAINER_REDIRECTS} + + +""" + +template_container_redirect = """ + RewriteRule ^/{URL}$ /{URL}/ [R=301,L] + + ProxyPreserveHost On + ProxyPass http://localhost:{PORT} upgrade=ANY + ProxyPassReverse http://localhost:{PORT} + RequestHeader set X-Script-Name /{URL} + RequestHeader set X-Scheme https + +""" + + +def _get_url_from_container_name(name): + doi_id = name.split("optimade_")[1] + return f"archive/{doi_id}" + + +def get_container_metas(): + container_metas = [] + running_containers = docker.DockerClient().containers.list() + for container in running_containers: + if not container.name.startswith("optimade_"): + logger.info(f"Skipping {container.name}, not an OPTIMADE container!") + continue + + try: + host_ports = container.ports["5000/tcp"] + except KeyError: + logger.info( + f"[NO VALID PORT] Skipping {container.name=} with {container.image.tags=}, {container.ports=}" + ) + continue + + host_port = None + if host_ports is not None: + for host_port_meta in host_ports: + if host_port_meta["HostIp"] == "0.0.0.0": + host_port = host_port_meta["HostPort"] + break + if host_port is None: + logger.info( + f"[NO PORT EXPOSED ON 0.0.0.0] Skipping {container.name=} with {container.image.tags=}, {container.ports=}" + ) + continue + + try: + url = _get_url_from_container_name(container.name) + except ValueError: + logger.info( + f"[INVALID CONTAINER NAME] Skipping {container.name=}, invalid prefix" + ) + break + + logger.info(f">> {container.short_id=}, {container.name=}, {host_port=}") + + container_metas.append({"URL": url, "PORT": host_port}) + + return container_metas + + +def generate_vhosts(server_name="optimade.materialscloud.org", index_port=3214): + container_redirects = [] + for container_meta in get_container_metas(): + container_redirects.append(template_container_redirect.format(**container_meta)) + + vhosts_file = main_template.format( + SERVER_NAME=server_name, + INDEX_METADB_PORT=index_port, + CONTAINER_REDIRECTS="\n\n".join(container_redirects), + ) + return vhosts_file + + +if __name__ == "__main__": + print(generate_vhosts()) diff --git a/src/mcloud_implementation/mcloud_master.py b/src/mcloud_implementation/mcloud_master.py index 84895c4..cb84cb5 100755 --- a/src/mcloud_implementation/mcloud_master.py +++ b/src/mcloud_implementation/mcloud_master.py @@ -12,6 +12,9 @@ from tqdm import tqdm import os +import socket +import json +import subprocess from pathlib import Path @@ -22,7 +25,10 @@ from datetime import datetime from urllib.parse import urljoin -BASE_URL = "https://dev-optimade.materialscloud.org" +import generate_apache_file + +SERVER_NAME = "dev-optimade.materialscloud.org" +BASE_URL = f"https://{SERVER_NAME}" BASE_URL_INDEX = urljoin(BASE_URL, "/index") ARCHIVE_URL = "https://staging-archive.materialscloud.org/" @@ -36,6 +42,12 @@ mongo_client = MongoClient("localhost", 27017) +def _get_random_empty_port(): + with socket.socket() as sock: + sock.bind(("", 0)) + return sock.getsockname()[1] + + def _mongodb_name(doi_id): return f"optimade_{doi_id}" @@ -93,6 +105,7 @@ def _download_entries_from_archive(): * the subfolder already exists * the mongodb has a database with name "ab-cd" """ + print() print("#### ---------------------------------------------") print("#### Checking MC archive") print("#### ---------------------------------------------") @@ -146,6 +159,7 @@ def _convert_entries_to_jsonl(): * mongodb exists """ + print() print("#### ---------------------------------------------") print("#### Converting downloaded entries to jsonl") print("#### ---------------------------------------------") @@ -197,6 +211,7 @@ def _populate_mongodbs(): * mongodb exists """ + print() print("#### ---------------------------------------------") print("#### Injecting the jsonl data to mongoDB") print("#### ---------------------------------------------") @@ -246,6 +261,7 @@ def _start_containers(): container running) """ + print() print("#### ---------------------------------------------") print("#### Starting containers") print("#### ---------------------------------------------") @@ -257,17 +273,17 @@ def _start_containers(): existing_dbs = _get_optimade_mongodbs(mongo_client) print("Existing MongoDBs: ", existing_dbs) - import subprocess - OLAUNCH_CONFIG_DIR = "/home/ubuntu/optimade-launch-configs" # Note: don't add the JSONL files here, as data was already injected separately + # Note: specifying :latest tag for the image doesn't always give the latest version olaunch_config_template = """ --- +image: ghcr.io/materials-consortia/optimade:0.25.3 name: {DOI_ID} mongo_uri: mongodb://localhost:27017 db_name: {DB_NAME} -unix_sock: {SOCKET_PATH} +port: {PORT} optimade_base_url: {BASE_URL} optimade_index_base_url: {BASE_URL_INDEX} optimade_provider: @@ -291,7 +307,7 @@ def _start_containers(): olaunch_config_template.format( DOI_ID=doi_id, DB_NAME=_mongodb_name(doi_id), - SOCKET_PATH=os.path.join(SOCKET_DIR, doi_id + ".sock"), + PORT=_get_random_empty_port(), BASE_URL=urljoin(BASE_URL, f"archive/{doi_id}"), BASE_URL_INDEX=BASE_URL_INDEX, ARCHIVE_URL=ARCHIVE_URL, @@ -321,7 +337,29 @@ def _start_containers(): print(traceback.format_exc()) +def _update_apache_config(): + print() + print("#### ---------------------------------------------") + print("#### Updating apache config") + print("#### ---------------------------------------------") + vhosts_loc = "/etc/apache2/sites-enabled/optimade-vhosts.conf" + with open(vhosts_loc, "w") as f: + f.write( + generate_apache_file.generate_vhosts( + server_name=SERVER_NAME, index_port=3214 + ) + ) + print(f"Updated {vhosts_loc}!") + try: + # Use Docker Compose to start or restart the service + subprocess.run(["sudo", "systemctl", "reload", "apache2"]) + print("Apache reloaded.") + except subprocess.CalledProcessError as e: + print(f"An error occurred: {e}") + + def _update_index(): + print() print("#### ---------------------------------------------") print("#### Updating the index metadb") print("#### ---------------------------------------------") @@ -338,8 +376,8 @@ def _update_index(): }, ] - for socket_file in Path(SOCKET_DIR).glob("*"): - doi_id = socket_file.stem + for container_name in _get_optimade_containers(): + doi_id = container_name.split("optimade_")[1] entry = { "id": doi_id, "type": "links", @@ -353,9 +391,6 @@ def _update_index(): index.append(entry) - import json - import subprocess - INDEX_METADB_PATH = "/home/ubuntu/index-metadb" with open(os.path.join(INDEX_METADB_PATH, "index_links.json"), "w") as f: @@ -382,10 +417,10 @@ def _update_index(): def _update_landing_page(): """ - Update the landing page served at the root of the website based on the - unix socket files + Update the landing page served at the root of the website """ + print() print("#### ---------------------------------------------") print("#### update landing page") print("#### ---------------------------------------------") @@ -397,14 +432,14 @@ def _update_landing_page(): landing_page_template = Template(f.read()) rows = [] - for socket_file in Path(SOCKET_DIR).glob("*"): - doi_id = socket_file.stem + for container_name in _get_optimade_containers(): + doi_id = container_name.split("optimade_")[1] metadata = _get_record_metadata_processed(doi_id) rows.append([metadata.get("publication_date"), doi_id, metadata]) # sort in ascending order by publication date # (entries without a date go to the end) - rows.sort(key=lambda x: (x[0] is None, x)) + rows.sort(key=lambda x: (x[0] is not None, x), reverse=True) db_list_html = "" for row in rows: @@ -439,6 +474,7 @@ def _update_landing_page(): @click.option("--skip_convert", is_flag=True) @click.option("--skip_mongo_inject", is_flag=True) @click.option("--skip_containers", is_flag=True) +@click.option("--skip_apache", is_flag=True) @click.option("--skip_index", is_flag=True) @click.option("--skip_landing", is_flag=True) def cli( @@ -446,6 +482,7 @@ def cli( skip_convert, skip_mongo_inject, skip_containers, + skip_apache, skip_index, skip_landing, ): @@ -461,6 +498,8 @@ def cli( _populate_mongodbs() if not skip_containers: _start_containers() + if not skip_apache: + _update_apache_config() if not skip_index: _update_index() if not skip_landing: diff --git a/src/mcloud_implementation/remove_api.sh b/src/mcloud_implementation/remove_api.sh index 2ecade4..0645e79 100755 --- a/src/mcloud_implementation/remove_api.sh +++ b/src/mcloud_implementation/remove_api.sh @@ -4,9 +4,9 @@ docker stop optimade_$1 docker rm optimade_$1 -optimade-launch profile remove $1 +optimade-launch profile remove $1 --yes -docker exec -it ubuntu_mongo_1 mongosh optimade_$1 --eval "db.dropDatabase()" +#docker exec -it ubuntu_mongo_1 mongosh optimade_$1 --eval "db.dropDatabase()" sudo unlink /home/ubuntu/optimade-sockets/$1.sock