diff --git a/scripts/generate_apache_file.py b/scripts/generate_apache_file.py
deleted file mode 100644
index 56641ca..0000000
--- a/scripts/generate_apache_file.py
+++ /dev/null
@@ -1,198 +0,0 @@
-"""
-This file (re)generates the apache hosts file for the active site
-with all needed redirects for the various existing containers.
-
-requirements: the `docker` python package
-"""
-import logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-
-### CONFIGURABLE CONFIGURATION
-SERVER_NAME = "archive-live-optimade.materialscloud.org" # Used to generate the Apache file
-DEMO_MODE = False # If True, do not check docker containers but just return fake data
-
-# Only filter containers from these images; must be a set
-VALID_IMAGE_TAGS = {'optimade-python-tools-server:latest'} # TODO REPLACE HERE WITH CORRECT (SET OF) BASE IMAGE NAME(S)
-# Check the exposed port pointing to this internal port in the container; skip if this port is not exposed, or if
-# it is not exposed to 0.0.0.0 in the Host
-INTERNAL_WEB_PORT = '5000/tcp'
-
-
-### STATIC CONFIGURATION
-
-main_template = """
-DirectoryIndex index.php index.html
-
-
- ServerName {SERVER_NAME}
- DocumentRoot "/var/www/html"
-
-
- AllowOverride All
- Options -Indexes +FollowSymLinks
- Require all granted
-
- Alias /.well-known/acme-challenge /var/www/letsencrypt/.well-known/acme-challenge
-
- Order allow,deny
- Allow from all
-
-
- # redirect all traffic to SSL, unless a specific VirtualHost for *:80 is specified,
- # which would take precedence over the default virtual host.
- # Make an exception for the location required for the Letsencrypt/ACME client challenge file
- RewriteEngine on
- RewriteCond %{{HTTPS}} !=on
- RewriteCond %{{REQUEST_URI}} !/.well-known/acme-challenge
- RewriteRule .* https://%{{SERVER_NAME}}%{{REQUEST_URI}} [R=301,L]
-
-
-
-
- ServerName {SERVER_NAME}
- DocumentRoot "/var/www/html"
-
- SSLEngine on
- SSLCipherSuite AES256+EECDH:AES256+EDH
- SSLProtocol All -SSLv2 -SSLv3 -TLSv1
- SSLHonorCipherOrder On
- SSLCompression off
- SSLCertificateFile /etc/letsencrypt/live/{SERVER_NAME}/fullchain.pem
- SSLCertificateKeyFile /etc/letsencrypt/live/{SERVER_NAME}/privkey.pem
-
-
- AllowOverride All
- Options -Indexes +FollowSymLinks
- Require all granted
-
-
- SSLProxyEngine on
- RewriteEngine on
-
- {CONTAINER_REDIRECTS}
-
-
-"""
-
-template_container_redirect = """
- RewriteRule ^/{URL}$ /{URL}/ [R=301,L]
-
- ProxyPreserveHost On
- ProxyPass http://localhost:{PORT}/ upgrade=ANY
- ProxyPassReverse http://localhost:{PORT}/
- RequestHeader set X-Script-Name /{URL}
- RequestHeader set X-Scheme https
-
-"""
-
-####
-# FUNCTIONALITY
-
-def get_url_from_container_name(name, url_prefix=''):
- """Given the container name, extract back the part of the URL.
-
- This URL part will be used in apache redirects.
-
- The rule is the following. For a DOI like:
-
- https://doi.org/10.24435/materialscloud:qt-4b
-
- potential urls:
-
- 10.24435/materialscloud:qt-4b
- 10.24435/materialscloud:2017.0008/v1
-
- because container name doesn't allow '/', ':', ...
- consider a mapping from url to container name by
-
- _ -> _u_
- : -> _c_
- / -> _s_
-
- (Note: currently not implemented!)
-
- We then accept only container names starting with `10.24435_` and the URL
- is obtained from the rest, possibly prepending an URL prefix (from the
- kwargs of this function)
- """
- valid_prefix = '10.24435_'
- if not name.startswith(valid_prefix):
- raise ValueError("Invalid container name")
-
- return f'{url_prefix}{name.replace("=", ":")[len(valid_prefix):]}'
-
-def get_container_metas(demo=False):
- if demo:
- return [
- {
- "URL": "url1",
- "PORT": "12341",
- },
- {
- "URL": "url2",
- "PORT": "12342",
- },
- ]
-
- import docker
-
- container_metas = []
-
- client = docker.from_env()
- # Only running ones
- containers = client.containers.list()
- for container in containers:
- if not VALID_IMAGE_TAGS.intersection(container.image.tags):
- # Probably there is a better way to filter in list(), for now I
- # just skip anything that does not start from one of the provided tags
- logger.info(f"[INCOMPATIBLE IMAGE] Skipping {container.name=} with {container.image.tags=}")
- continue
-
- try:
- host_ports = container.ports[INTERNAL_WEB_PORT]
- except KeyError:
- logger.info(f"[NO VALID PORT] Skipping {container.name=} with {container.image.tags=}, {container.ports=}")
- continue
-
- host_port = None
- for host_port_meta in host_ports:
- if host_port_meta['HostIp'] == '0.0.0.0':
- host_port = host_port_meta['HostPort']
- break
- if host_port is None:
- logger.info(f"[NO PORT EXPOSED ON 0.0.0.0] Skipping {container.name=} with {container.image.tags=}, {container.ports=}")
- break
-
- try:
- url = get_url_from_container_name(container.name)
- except ValueError as exc:
- logger.info(f'[INVALID CONTAINER NAME] Skipping {container.name=}, invalid prefix')
- break
-
- logger.info(f'>> {container.short_id=}, {container.name=}, {host_port=}')
-
- container_metas.append({
- "URL": url,
- "PORT": host_port
- })
-
- return container_metas
-
-
-def generate_vhosts(demo):
- container_redirects = []
- for container_meta in get_container_metas(demo=demo):
- container_redirects.append(template_container_redirect.format(
- **container_meta
- ))
-
- vhosts_file = main_template.format(
- SERVER_NAME = SERVER_NAME,
- CONTAINER_REDIRECTS = "\n\n".join(container_redirects)
- )
- return vhosts_file
-
-if __name__ == "__main__":
- #print(generate_vhosts(demo=DEMO_MODE))
- print(get_container_metas(demo=DEMO_MODE))
diff --git a/src/mcloud_implementation/apache-vhosts-sockets.conf b/src/mcloud_implementation/apache-vhosts-sockets.conf
new file mode 100644
index 0000000..dd8b6f0
--- /dev/null
+++ b/src/mcloud_implementation/apache-vhosts-sockets.conf
@@ -0,0 +1,87 @@
+
+ ServerName dev-optimade.materialscloud.org
+
+ # Let's encrypt certbot ACME challenge for certificate renewal
+ Alias /.well-known/acme-challenge /var/www/letsencrypt/.well-known/acme-challenge
+
+ Order allow,deny
+ Allow from all
+
+
+ # redirect all traffic to SSL, unless a specific VirtualHost for *:80 is specified,
+ # which would take precedence over the default virtual host.
+ # Make an exception for the location required for the Letsencrypt/ACME client challenge file
+ RewriteEngine on
+ RewriteCond %{HTTPS} !=on
+ RewriteCond %{REQUEST_URI} !/.well-known/acme-challenge
+ RewriteRule .* https://%{SERVER_NAME}%{REQUEST_URI} [R=301,L]
+
+
+
+
+ ServerName dev-optimade.materialscloud.org
+
+ # Let's encrypt SSL certificate
+ SSLEngine on
+ SSLCertificateFile /etc/letsencrypt/live/dev-optimade.materialscloud.org/fullchain.pem
+ SSLCertificateKeyFile /etc/letsencrypt/live/dev-optimade.materialscloud.org/privkey.pem
+
+
+ DocumentRoot /var/www/html
+
+ Options Indexes FollowSymLinks
+ AllowOverride All
+ Require all granted
+
+
+ ProxyPreserveHost On
+
+ # Log redirection
+ LogLevel alert rewrite:trace3
+
+ # Define a custom log format including Apache variables (%U is REQUEST_URI)
+ LogFormat "%h %l %u %t \"%r\" %>s %b %{Host}i %U" custom_log
+ CustomLog /var/log/apache2/access.log custom_log
+
+ # ----
+ # index metadb
+ ProxyPass /index http://localhost:3214
+ ProxyPassReverse /index http://localhost:3214
+ # ----
+
+ RewriteEngine On
+
+ # ---------------------------
+ # Direct /archive/$1/$2 to the corresponding unix socket $1.sock
+ # This seems to work...
+
+ # always append a trailing slash
+ RewriteRule ^/{URL}$ /{URL}/ [R=301,L]
+
+ # don't process files or directories
+ RewriteCond %{REQUEST_FILENAME} !-f
+ RewriteCond %{REQUEST_FILENAME} !-d
+
+ # don't process the root path
+ RewriteCond %{REQUEST_URI} !^/$
+ RewriteCond %{REQUEST_URI} !^/index.html$
+
+ # The following could be used to check if files corresponding to URI exists, but
+ # unfortunately this only works with regular files (not unix sockets)
+ #RewriteCond %{REQUEST_URI} ^/([^/]+)/?(.*)$
+ # check that socket corresponding to $1 exists
+ #RewriteCond /home/ubuntu/sockets/$1.sock -f
+
+ # redirect to the socket
+ RewriteRule ^/archive/([^/]+)/?(.*)$ unix:/home/ubuntu/optimade-sockets/$1.sock|http://127.0.0.1/$2 [QSA,P]
+
+ # PROBLEM:
+ # The RewriteRule above doesn't pass through the query parameters (although QSA is specified)
+ # and I didn't find a way to fix it. Interestingly, when using a using a normal proxy, the
+ # query parameters are passed correctly, e.g.
+ # ProxyPass /test unix:/home/ubuntu/optimade-sockets/test.sock|http://127.0.0.1/v1/structures
+ # ProxyPassReverse /test unix:/home/ubuntu/optimade-sockets/test.sock|http://127.0.0.1/v1/structures
+
+ # ---------------------------
+
+
diff --git a/src/mcloud_implementation/generate_apache_file.py b/src/mcloud_implementation/generate_apache_file.py
new file mode 100755
index 0000000..f602da5
--- /dev/null
+++ b/src/mcloud_implementation/generate_apache_file.py
@@ -0,0 +1,153 @@
+#!/usr/bin/env python3
+
+"""
+This file (re)generates the apache hosts file for the active site
+with all needed redirects for the various existing containers.
+
+requirements: the `docker` python package
+"""
+import logging
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+import docker
+
+### STATIC CONFIGURATION
+
+main_template = """
+DirectoryIndex index.php index.html
+
+
+ ServerName {SERVER_NAME}
+
+ DocumentRoot "/var/www/html"
+
+ AllowOverride All
+ Options -Indexes +FollowSymLinks
+ Require all granted
+
+
+ Alias /.well-known/acme-challenge /var/www/letsencrypt/.well-known/acme-challenge
+
+ Order allow,deny
+ Allow from all
+
+
+ # redirect all traffic to SSL, unless a specific VirtualHost for *:80 is specified,
+ # which would take precedence over the default virtual host.
+ # Make an exception for the location required for the Letsencrypt/ACME client challenge file
+ RewriteEngine on
+ RewriteCond %{{HTTPS}} !=on
+ RewriteCond %{{REQUEST_URI}} !/.well-known/acme-challenge
+ RewriteRule .* https://%{{SERVER_NAME}}%{{REQUEST_URI}} [R=301,L]
+
+
+
+
+ ServerName {SERVER_NAME}
+
+ SSLEngine on
+ SSLCipherSuite AES256+EECDH:AES256+EDH
+ SSLProtocol All -SSLv2 -SSLv3 -TLSv1
+ SSLHonorCipherOrder On
+ SSLCompression off
+ SSLCertificateFile /etc/letsencrypt/live/{SERVER_NAME}/fullchain.pem
+ SSLCertificateKeyFile /etc/letsencrypt/live/{SERVER_NAME}/privkey.pem
+
+ DocumentRoot "/var/www/html"
+
+ AllowOverride All
+ Options -Indexes +FollowSymLinks
+ Require all granted
+
+
+ SSLProxyEngine on
+ RewriteEngine on
+
+ # ---------------------------
+ # index metadb
+ ProxyPass /index http://localhost:{INDEX_METADB_PORT}
+ ProxyPassReverse /index http://localhost:{INDEX_METADB_PORT}
+ # ---------------------------
+
+ {CONTAINER_REDIRECTS}
+
+
+"""
+
+template_container_redirect = """
+ RewriteRule ^/{URL}$ /{URL}/ [R=301,L]
+
+ ProxyPreserveHost On
+ ProxyPass http://localhost:{PORT} upgrade=ANY
+ ProxyPassReverse http://localhost:{PORT}
+ RequestHeader set X-Script-Name /{URL}
+ RequestHeader set X-Scheme https
+
+"""
+
+
+def _get_url_from_container_name(name):
+ doi_id = name.split("optimade_")[1]
+ return f"archive/{doi_id}"
+
+
+def get_container_metas():
+ container_metas = []
+ running_containers = docker.DockerClient().containers.list()
+ for container in running_containers:
+ if not container.name.startswith("optimade_"):
+ logger.info(f"Skipping {container.name}, not an OPTIMADE container!")
+ continue
+
+ try:
+ host_ports = container.ports["5000/tcp"]
+ except KeyError:
+ logger.info(
+ f"[NO VALID PORT] Skipping {container.name=} with {container.image.tags=}, {container.ports=}"
+ )
+ continue
+
+ host_port = None
+ if host_ports is not None:
+ for host_port_meta in host_ports:
+ if host_port_meta["HostIp"] == "0.0.0.0":
+ host_port = host_port_meta["HostPort"]
+ break
+ if host_port is None:
+ logger.info(
+ f"[NO PORT EXPOSED ON 0.0.0.0] Skipping {container.name=} with {container.image.tags=}, {container.ports=}"
+ )
+ continue
+
+ try:
+ url = _get_url_from_container_name(container.name)
+ except ValueError:
+ logger.info(
+ f"[INVALID CONTAINER NAME] Skipping {container.name=}, invalid prefix"
+ )
+ break
+
+ logger.info(f">> {container.short_id=}, {container.name=}, {host_port=}")
+
+ container_metas.append({"URL": url, "PORT": host_port})
+
+ return container_metas
+
+
+def generate_vhosts(server_name="optimade.materialscloud.org", index_port=3214):
+ container_redirects = []
+ for container_meta in get_container_metas():
+ container_redirects.append(template_container_redirect.format(**container_meta))
+
+ vhosts_file = main_template.format(
+ SERVER_NAME=server_name,
+ INDEX_METADB_PORT=index_port,
+ CONTAINER_REDIRECTS="\n\n".join(container_redirects),
+ )
+ return vhosts_file
+
+
+if __name__ == "__main__":
+ print(generate_vhosts())
diff --git a/src/mcloud_implementation/mcloud_master.py b/src/mcloud_implementation/mcloud_master.py
index 84895c4..cb84cb5 100755
--- a/src/mcloud_implementation/mcloud_master.py
+++ b/src/mcloud_implementation/mcloud_master.py
@@ -12,6 +12,9 @@
from tqdm import tqdm
import os
+import socket
+import json
+import subprocess
from pathlib import Path
@@ -22,7 +25,10 @@
from datetime import datetime
from urllib.parse import urljoin
-BASE_URL = "https://dev-optimade.materialscloud.org"
+import generate_apache_file
+
+SERVER_NAME = "dev-optimade.materialscloud.org"
+BASE_URL = f"https://{SERVER_NAME}"
BASE_URL_INDEX = urljoin(BASE_URL, "/index")
ARCHIVE_URL = "https://staging-archive.materialscloud.org/"
@@ -36,6 +42,12 @@
mongo_client = MongoClient("localhost", 27017)
+def _get_random_empty_port():
+ with socket.socket() as sock:
+ sock.bind(("", 0))
+ return sock.getsockname()[1]
+
+
def _mongodb_name(doi_id):
return f"optimade_{doi_id}"
@@ -93,6 +105,7 @@ def _download_entries_from_archive():
* the subfolder already exists
* the mongodb has a database with name "ab-cd"
"""
+ print()
print("#### ---------------------------------------------")
print("#### Checking MC archive")
print("#### ---------------------------------------------")
@@ -146,6 +159,7 @@ def _convert_entries_to_jsonl():
* mongodb exists
"""
+ print()
print("#### ---------------------------------------------")
print("#### Converting downloaded entries to jsonl")
print("#### ---------------------------------------------")
@@ -197,6 +211,7 @@ def _populate_mongodbs():
* mongodb exists
"""
+ print()
print("#### ---------------------------------------------")
print("#### Injecting the jsonl data to mongoDB")
print("#### ---------------------------------------------")
@@ -246,6 +261,7 @@ def _start_containers():
container running)
"""
+ print()
print("#### ---------------------------------------------")
print("#### Starting containers")
print("#### ---------------------------------------------")
@@ -257,17 +273,17 @@ def _start_containers():
existing_dbs = _get_optimade_mongodbs(mongo_client)
print("Existing MongoDBs: ", existing_dbs)
- import subprocess
-
OLAUNCH_CONFIG_DIR = "/home/ubuntu/optimade-launch-configs"
# Note: don't add the JSONL files here, as data was already injected separately
+ # Note: specifying :latest tag for the image doesn't always give the latest version
olaunch_config_template = """
---
+image: ghcr.io/materials-consortia/optimade:0.25.3
name: {DOI_ID}
mongo_uri: mongodb://localhost:27017
db_name: {DB_NAME}
-unix_sock: {SOCKET_PATH}
+port: {PORT}
optimade_base_url: {BASE_URL}
optimade_index_base_url: {BASE_URL_INDEX}
optimade_provider:
@@ -291,7 +307,7 @@ def _start_containers():
olaunch_config_template.format(
DOI_ID=doi_id,
DB_NAME=_mongodb_name(doi_id),
- SOCKET_PATH=os.path.join(SOCKET_DIR, doi_id + ".sock"),
+ PORT=_get_random_empty_port(),
BASE_URL=urljoin(BASE_URL, f"archive/{doi_id}"),
BASE_URL_INDEX=BASE_URL_INDEX,
ARCHIVE_URL=ARCHIVE_URL,
@@ -321,7 +337,29 @@ def _start_containers():
print(traceback.format_exc())
+def _update_apache_config():
+ print()
+ print("#### ---------------------------------------------")
+ print("#### Updating apache config")
+ print("#### ---------------------------------------------")
+ vhosts_loc = "/etc/apache2/sites-enabled/optimade-vhosts.conf"
+ with open(vhosts_loc, "w") as f:
+ f.write(
+ generate_apache_file.generate_vhosts(
+ server_name=SERVER_NAME, index_port=3214
+ )
+ )
+ print(f"Updated {vhosts_loc}!")
+ try:
+ # Use Docker Compose to start or restart the service
+ subprocess.run(["sudo", "systemctl", "reload", "apache2"])
+ print("Apache reloaded.")
+ except subprocess.CalledProcessError as e:
+ print(f"An error occurred: {e}")
+
+
def _update_index():
+ print()
print("#### ---------------------------------------------")
print("#### Updating the index metadb")
print("#### ---------------------------------------------")
@@ -338,8 +376,8 @@ def _update_index():
},
]
- for socket_file in Path(SOCKET_DIR).glob("*"):
- doi_id = socket_file.stem
+ for container_name in _get_optimade_containers():
+ doi_id = container_name.split("optimade_")[1]
entry = {
"id": doi_id,
"type": "links",
@@ -353,9 +391,6 @@ def _update_index():
index.append(entry)
- import json
- import subprocess
-
INDEX_METADB_PATH = "/home/ubuntu/index-metadb"
with open(os.path.join(INDEX_METADB_PATH, "index_links.json"), "w") as f:
@@ -382,10 +417,10 @@ def _update_index():
def _update_landing_page():
"""
- Update the landing page served at the root of the website based on the
- unix socket files
+ Update the landing page served at the root of the website
"""
+ print()
print("#### ---------------------------------------------")
print("#### update landing page")
print("#### ---------------------------------------------")
@@ -397,14 +432,14 @@ def _update_landing_page():
landing_page_template = Template(f.read())
rows = []
- for socket_file in Path(SOCKET_DIR).glob("*"):
- doi_id = socket_file.stem
+ for container_name in _get_optimade_containers():
+ doi_id = container_name.split("optimade_")[1]
metadata = _get_record_metadata_processed(doi_id)
rows.append([metadata.get("publication_date"), doi_id, metadata])
# sort in ascending order by publication date
# (entries without a date go to the end)
- rows.sort(key=lambda x: (x[0] is None, x))
+ rows.sort(key=lambda x: (x[0] is not None, x), reverse=True)
db_list_html = ""
for row in rows:
@@ -439,6 +474,7 @@ def _update_landing_page():
@click.option("--skip_convert", is_flag=True)
@click.option("--skip_mongo_inject", is_flag=True)
@click.option("--skip_containers", is_flag=True)
+@click.option("--skip_apache", is_flag=True)
@click.option("--skip_index", is_flag=True)
@click.option("--skip_landing", is_flag=True)
def cli(
@@ -446,6 +482,7 @@ def cli(
skip_convert,
skip_mongo_inject,
skip_containers,
+ skip_apache,
skip_index,
skip_landing,
):
@@ -461,6 +498,8 @@ def cli(
_populate_mongodbs()
if not skip_containers:
_start_containers()
+ if not skip_apache:
+ _update_apache_config()
if not skip_index:
_update_index()
if not skip_landing:
diff --git a/src/mcloud_implementation/remove_api.sh b/src/mcloud_implementation/remove_api.sh
index 2ecade4..0645e79 100755
--- a/src/mcloud_implementation/remove_api.sh
+++ b/src/mcloud_implementation/remove_api.sh
@@ -4,9 +4,9 @@
docker stop optimade_$1
docker rm optimade_$1
-optimade-launch profile remove $1
+optimade-launch profile remove $1 --yes
-docker exec -it ubuntu_mongo_1 mongosh optimade_$1 --eval "db.dropDatabase()"
+#docker exec -it ubuntu_mongo_1 mongosh optimade_$1 --eval "db.dropDatabase()"
sudo unlink /home/ubuntu/optimade-sockets/$1.sock