Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…esar into export_tables
  • Loading branch information
pavish committed Dec 17, 2024
2 parents f8315af + f157aed commit 838452d
Show file tree
Hide file tree
Showing 20 changed files with 518 additions and 292 deletions.
2 changes: 1 addition & 1 deletion .github/sync.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
mathesar-foundation/mathesar-ansible:
mathesar-foundation/mathesar-infrastructure:
- .github/workflows/toc.yml
- .github/workflows/stale.yml
mathesar-foundation/mathesar-data-playground:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/staging-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ jobs:
staging-deploy:
runs-on: ubuntu-latest
steps:
- name: Checkout ansible repo
- name: Checkout infrastructure repo
uses: actions/checkout@v2
with:
repository: 'mathesar-foundation/mathesar-ansible'
repository: 'mathesar-foundation/mathesar-infrastructure'
token: ${{ secrets.MATHESAR_ORG_GITHUB_TOKEN }} # Repo is private, so an access token is used
# This checkout is used for getting the 'action' from the current repo
- name: Checkout mathesar repo
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/sync-github-labels-milestones.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@ jobs:
steps:
- uses: actions/checkout@v2
- run: composer global require 'vanilla/github-sync'
- run: /home/runner/.composer/vendor/bin/github-sync labels -f mathesar-foundation/mathesar -t mathesar-foundation/mathesar-ansible -d
- run: /home/runner/.composer/vendor/bin/github-sync labels -f mathesar-foundation/mathesar -t mathesar-foundation/mathesar-infrastructure -d
- run: /home/runner/.composer/vendor/bin/github-sync labels -f mathesar-foundation/mathesar -t mathesar-foundation/mathesar-data-playground -d
- run: /home/runner/.composer/vendor/bin/github-sync labels -f mathesar-foundation/mathesar -t mathesar-foundation/mathesar-design -d
- run: /home/runner/.composer/vendor/bin/github-sync labels -f mathesar-foundation/mathesar -t mathesar-foundation/mathesar-internal-crm -d
- run: /home/runner/.composer/vendor/bin/github-sync labels -f mathesar-foundation/mathesar -t mathesar-foundation/mathesar-private-notes -d
- run: /home/runner/.composer/vendor/bin/github-sync labels -f mathesar-foundation/mathesar -t mathesar-foundation/mathesar-scripts -d
- run: /home/runner/.composer/vendor/bin/github-sync labels -f mathesar-foundation/mathesar -t mathesar-foundation/mathesar-website -d
- run: /home/runner/.composer/vendor/bin/github-sync labels -f mathesar-foundation/mathesar -t mathesar-foundation/mathesar-wiki -d
- run: /home/runner/.composer/vendor/bin/github-sync milestones -f mathesar-foundation/mathesar -t mathesar-foundation/mathesar-ansible -s open
- run: /home/runner/.composer/vendor/bin/github-sync milestones -f mathesar-foundation/mathesar -t mathesar-foundation/mathesar-infrastructure -s open
- run: /home/runner/.composer/vendor/bin/github-sync milestones -f mathesar-foundation/mathesar -t mathesar-foundation/mathesar-data-playground -s open
- run: /home/runner/.composer/vendor/bin/github-sync milestones -f mathesar-foundation/mathesar -t mathesar-foundation/mathesar-design -s open
- run: /home/runner/.composer/vendor/bin/github-sync milestones -f mathesar-foundation/mathesar -t mathesar-foundation/mathesar-internal-crm -s open
Expand Down
1 change: 1 addition & 0 deletions config/settings/common_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ def pipe_delim(pipe_string):
MATHESAR_UI_SOURCE_LOCATION = os.path.join(BASE_DIR, 'mathesar_ui/')
MATHESAR_CAPTURE_UNHANDLED_EXCEPTION = os.environ.get('CAPTURE_UNHANDLED_EXCEPTION', default=False)
MATHESAR_STATIC_NON_CODE_FILES_LOCATION = os.path.join(BASE_DIR, 'mathesar/static/non-code/')
MATHESAR_ANALYTICS_URL = os.environ.get('MATHESAR_ANALYTICS_URL', default='https://example.com')

DEFAULT_AUTO_FIELD = 'django.db.models.AutoField'

Expand Down
7 changes: 7 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ def _get(db_name):
logger.debug('exit')


@pytest.fixture(autouse=True)
def disable_http_requests(monkeypatch):
def mock_urlopen(self, *args, **kwargs):
raise Exception("Requests to 3rd party addresses make bad tests")
monkeypatch.setattr("urllib3.connectionpool.HTTPConnectionPool.urlopen", mock_urlopen)


@pytest.fixture(scope="session")
def create_db(request, engine_cache):
"""
Expand Down
5 changes: 5 additions & 0 deletions db/analytics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from db import connection as db_conn


def get_object_counts(conn):
return db_conn.exec_msar_func(conn, 'get_object_counts').fetchone()[0]
89 changes: 75 additions & 14 deletions db/sql/00_msar.sql
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,16 @@ SELECT msar.drop_all_msar_functions();
----------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------


CREATE OR REPLACE FUNCTION msar.mathesar_system_schemas() RETURNS text[] AS $$/*
Return a text array of the Mathesar System schemas.
Update this function whenever the list changes.
*/
SELECT ARRAY['msar', '__msar', 'mathesar_types']
$$ LANGUAGE SQL STABLE;


CREATE OR REPLACE FUNCTION msar.extract_smallints(v jsonb) RETURNS smallint[] AS $$/*
From the supplied JSONB value, extract all top-level JSONB array elements which can be successfully
cast to PostgreSQL smallint values. Return the resulting array of smallint values.
Expand Down Expand Up @@ -924,6 +934,46 @@ WHERE has_privilege;
$$ LANGUAGE SQL STABLE RETURNS NULL ON NULL INPUT;


CREATE OR REPLACE FUNCTION
msar.describe_column_default(tab_id regclass, col_id smallint) RETURNS jsonb AS $$/*
Return a JSONB object describing the default (if any) of the given column in the given table.
The returned JSON will have the form:
{
"value": <any>,
"is_dynamic": <bool>,
}
If the default is possibly dynamic, i.e., if "is_dynamic" is true, then "value" will be a text SQL
expression that generates the default value if evaluated. If it is not dynamic, then "value" is the
actual default value.
*/
DECLARE
def_expr text;
def_json jsonb;
BEGIN
def_expr = CASE
WHEN attidentity='' THEN pg_catalog.pg_get_expr(adbin, tab_id)
ELSE 'identity'
END
FROM pg_catalog.pg_attribute LEFT JOIN pg_catalog.pg_attrdef ON attrelid=adrelid AND attnum=adnum
WHERE attrelid=tab_id AND attnum=col_id;
IF def_expr IS NULL THEN
RETURN NULL;
ELSIF msar.is_default_possibly_dynamic(tab_id, col_id) THEN
EXECUTE format(
'SELECT jsonb_build_object(''value'', %L, ''is_dynamic'', true)', def_expr
) INTO def_json;
ELSE
EXECUTE format(
'SELECT jsonb_build_object(''value'', msar.format_data(%s), ''is_dynamic'', false)', def_expr
) INTO def_json;
END IF;
RETURN def_json;
END;
$$ LANGUAGE plpgsql RETURNS NULL ON NULL INPUT;


CREATE OR REPLACE FUNCTION msar.get_column_info(tab_id regclass) RETURNS jsonb AS $$/*
Given a table identifier, return an array of objects describing the columns of the table.
Expand Down Expand Up @@ -955,20 +1005,7 @@ SELECT jsonb_agg(
'type_options', msar.get_type_options(atttypid, atttypmod, attndims),
'nullable', NOT attnotnull,
'primary_key', COALESCE(pgi.indisprimary, false),
'default',
nullif(
jsonb_strip_nulls(
jsonb_build_object(
'value',
CASE
WHEN attidentity='' THEN pg_get_expr(adbin, tab_id)
ELSE 'identity'
END,
'is_dynamic', msar.is_default_possibly_dynamic(tab_id, attnum)
)
),
jsonb_build_object()
),
'default', msar.describe_column_default(tab_id, attnum),
'has_dependents', msar.has_dependents(tab_id, attnum),
'description', msar.col_description(tab_id, attnum),
'current_role_priv', msar.list_column_privileges_for_current_role(tab_id, attnum),
Expand Down Expand Up @@ -1083,6 +1120,30 @@ WHERE has_privilege;
$$ LANGUAGE SQL STABLE RETURNS NULL ON NULL INPUT;


CREATE OR REPLACE FUNCTION
msar.get_object_counts() RETURNS jsonb AS $$/*
Return a JSON object with counts of some objects in the database.
We exclude the mathesar-system schemas.
The objects counted are:
- total schemas, excluding Mathesar internal schemas
- total tables in the included schemas
- total rows of tables included
*/
SELECT jsonb_build_object(
'schema_count', COUNT(DISTINCT pgn.oid),
'table_count', COUNT(pgc.oid),
'record_count', SUM(pgc.reltuples)
)
FROM pg_catalog.pg_namespace pgn
LEFT JOIN pg_catalog.pg_class pgc ON pgc.relnamespace = pgn.oid AND pgc.relkind = 'r'
WHERE pgn.nspname <> 'information_schema'
AND NOT (pgn.nspname = ANY(msar.mathesar_system_schemas()))
AND pgn.nspname NOT LIKE 'pg_%';
$$ LANGUAGE SQL STABLE;


CREATE OR REPLACE FUNCTION msar.schema_info_table() RETURNS TABLE
(
oid bigint, -- The OID of the schema.
Expand Down
20 changes: 19 additions & 1 deletion db/sql/test_00_msar.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2662,7 +2662,7 @@ BEGIN
"name": "txt",
"type": "text",
"default": {
"value": "'abc'::text",
"value": "abc",
"is_dynamic": false
},
"nullable": true,
Expand Down Expand Up @@ -5907,3 +5907,21 @@ BEGIN
);
END;
$$ LANGUAGE plpgsql;


CREATE OR REPLACE FUNCTION test_get_object_counts() RETURNS SETOF TEXT AS $$
DECLARE
object_counts jsonb;
BEGIN
CREATE SCHEMA anewone;
CREATE TABLE anewone.mytab (col1 text);
CREATE TABLE "12345" (bleh text, bleh2 numeric);
CREATE TABLE tableno3 (id INTEGER);
object_counts = msar.get_object_counts();
RETURN NEXT is((object_counts ->> 'schema_count')::integer, 2);
RETURN NEXT is((object_counts ->> 'table_count')::integer, 3);
-- Can't check actual record count without a vacuum, since we just estimate based on catalog.
-- So, we just check that the expected key exists.
RETURN NEXT is(object_counts ? 'record_count', true);
END;
$$ LANGUAGE plpgsql;
1 change: 1 addition & 0 deletions docker-compose.dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ services:
- DJANGO_SETTINGS_MODULE=${DJANGO_SETTINGS_MODULE-config.settings.development}
- ALLOWED_HOSTS=${ALLOWED_HOSTS-*}
- SECRET_KEY=${SECRET_KEY}
- MATHESAR_ANALYTICS_URL=${MATHESAR_ANALYTICS_URL-https://example.com}
- MATHESAR_DATABASES=(mathesar_tables|postgresql://mathesar:mathesar@mathesar_dev_db:5432/mathesar)
- DJANGO_SUPERUSER_PASSWORD=password
- POSTGRES_DB=mathesar_django
Expand Down
139 changes: 139 additions & 0 deletions mathesar/analytics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
"""
This module contains functions for dealing with analytics in Mathesar.
The basic principle is: If there is an installation_id, analytics are
"turned on", and can and will be collected. Otherwise they won't.
Thus, the `disable_analytics` function simply deletes that ID, if it
exists.
"""
from functools import wraps
import threading
from uuid import uuid4

from django.core.cache import cache
from django.conf import settings
from django.db.models import Q
from django.utils import timezone
import requests

from mathesar import __version__
from mathesar.models import (
AnalyticsReport,
ConfiguredRole,
Database,
Explorations,
InstallationID,
User,
)

ANALYTICS_DONE = "analytics_done"
CACHE_TIMEOUT = 1800 # seconds
ACTIVE_USER_DAYS = 14
ANALYTICS_REPORT_MAX_AGE = 30 # days
ANALYTICS_FREQUENCY = 1 # a report is saved at most once per day.


def wire_analytics(f):
@wraps(f)
def wrapped(*args, **kwargs):
if settings.TEST is False and cache.get(ANALYTICS_DONE) is None:
cache.set(ANALYTICS_DONE, True, CACHE_TIMEOUT)
threading.Thread(target=run_analytics).start()
return f(*args, **kwargs)
return wrapped


def run_analytics():
if (
InstallationID.objects.first() is not None
and not AnalyticsReport.objects.filter(
created_at__gte=timezone.now()
- timezone.timedelta(days=ANALYTICS_FREQUENCY)
)
):
save_analytics_report()
upload_analytics_reports()
delete_stale_reports()


def initialize_analytics():
InstallationID.objects.create(value=uuid4())


def disable_analytics():
InstallationID.objects.all().delete()


def save_analytics_report():
installation_id = InstallationID.objects.first()
if installation_id is None:
return
connected_database_count = 0
connected_database_schema_count = 0
connected_database_table_count = 0
connected_database_record_count = 0
for d in Database.objects.all():
try:
object_counts = d.object_counts
connected_database_count += 1
connected_database_schema_count += object_counts['schema_count']
connected_database_table_count += object_counts['table_count']
connected_database_record_count += object_counts['record_count']
except Exception:
print(f"Couldn't retrieve object counts for {d.name}")

analytics_report = AnalyticsReport(
installation_id=installation_id,
mathesar_version=__version__,
user_count=User.objects.filter(is_active=True).count(),
active_user_count=User.objects.filter(
is_active=True,
last_login__gte=timezone.now()
- timezone.timedelta(days=ACTIVE_USER_DAYS)
).count(),
configured_role_count=ConfiguredRole.objects.count(),
connected_database_count=connected_database_count,
connected_database_schema_count=connected_database_schema_count,
connected_database_table_count=connected_database_table_count,
connected_database_record_count=connected_database_record_count,
exploration_count=Explorations.objects.count(),
)
analytics_report.save()


def upload_analytics_reports():
reports = AnalyticsReport.objects.filter(uploaded=False)
reports_blob = [
{
"id": report.id,
"created_at": report.created_at.isoformat(),
"installation_id": str(report.installation_id.value),
"mathesar_version": report.mathesar_version,
"user_count": report.user_count,
"active_user_count": report.active_user_count,
"configured_role_count": report.configured_role_count,
"connected_database_count": report.connected_database_count,
"connected_database_schema_count": report.connected_database_schema_count,
"connected_database_table_count": report.connected_database_table_count,
"connected_database_record_count": report.connected_database_record_count,
"exploration_count": report.exploration_count,
}
for report in reports
]
requests.post(settings.MATHESAR_ANALYTICS_URL, json=reports_blob)
reports.update(uploaded=True)


def delete_stale_reports():
AnalyticsReport.objects.filter(
Q(
# Delete uploaded analytics objects older than 2 days
uploaded=True,
created_at__lte=timezone.now() - timezone.timedelta(days=2)
) | Q(
# Delete analytics reports after a time regardless of upload status
updated_at__lte=timezone.now()
- timezone.timedelta(days=ANALYTICS_REPORT_MAX_AGE)
)
).delete()
Loading

0 comments on commit 838452d

Please sign in to comment.