Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[IMPORT] [MONITORING] feat: move dataset selection to the fieldmapping step #3289

Draft
wants to merge 11 commits into
base: feat/import-monitorings
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/cypress.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ on:
- master
- hotfixes
- develop
- feat/import
- feat/import-monitorings
pull_request:
branches:
- master
- hotfixes
- develop
- feat/import
- feat/import-monitorings

jobs:
mount_app_and_run_cypress:
Expand Down Expand Up @@ -111,7 +111,7 @@ jobs:
cp ./config/settings.ini.sample ./config/settings.ini
./install/05_install_frontend.sh --ci
env:
GEONATURE_CONFIG_FILE: '${{ github.workspace }}/config/test_config.toml'
GEONATURE_CONFIG_FILE: "${{ github.workspace }}/config/test_config.toml"
- name: Install core modules
run: |
geonature install-gn-module contrib/occtax OCCTAX --build=false
Expand Down
22 changes: 12 additions & 10 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,30 @@ on:
- master
- hotfixes
- develop
- feat/import-monitorings
pull_request:
branches:
- master
- hotfixes
- develop
- feat/import-monitorings

jobs:
build:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
debian-version: ['11', '12']
debian-version: ["11", "12"]
include:
- debian-version: '11'
python-version: '3.9'
postgres-version: '13'
postgis-version: '3.2'
- debian-version: '12'
python-version: '3.11'
postgres-version: '15'
postgis-version: '3.3'
- debian-version: "11"
python-version: "3.9"
postgres-version: "13"
postgis-version: "3.2"
- debian-version: "12"
python-version: "3.11"
postgres-version: "15"
postgis-version: "3.3"

name: Debian ${{ matrix.debian-version }}

Expand Down Expand Up @@ -68,7 +70,7 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
cache: "pip"
- name: Install GDAL
run: |
sudo apt update
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.15.0
2.16.0alpha
2 changes: 1 addition & 1 deletion backend/geonature/core/gn_meta/models/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from geonature.core.gn_permissions.tools import get_scopes_by_action
from geonature.core.gn_commons.models import cor_field_dataset, cor_module_dataset

from ref_geo.models import LAreas
from .commons import *


Expand Down Expand Up @@ -322,6 +321,7 @@ def filter_by_creatable(cls, module_code, *, query, user=None, object_code=None)
@qfilter(query=True)
def filter_by_areas(cls, areas, *, query):
from geonature.core.gn_synthese.models import Synthese
from ref_geo.models import LAreas

areaFilter = []
for id_area in areas:
Expand Down
5 changes: 5 additions & 0 deletions backend/geonature/core/gn_monitoring/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ class TBaseVisits(DB.Model):
foreign_keys=[id_dataset],
)

id_import = DB.Column(DB.Integer, nullable=True)


@serializable
@geoserializable(geoCol="geom", idCol="id_base_site")
Expand Down Expand Up @@ -212,6 +214,8 @@ class TBaseSites(DB.Model):
foreign_keys=[cor_site_module.c.id_base_site, cor_site_module.c.id_module],
)

id_import = DB.Column(DB.Integer, nullable=True)


@serializable
class TObservations(DB.Model):
Expand All @@ -226,3 +230,4 @@ class TObservations(DB.Model):
cd_nom = DB.Column(DB.Integer)
comments = DB.Column(DB.String)
uuid_observation = DB.Column(UUID(as_uuid=True), default=select(func.uuid_generate_v4()))
id_import = DB.Column(DB.Integer, nullable=True)
46 changes: 29 additions & 17 deletions backend/geonature/core/gn_synthese/imports/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@ def check_transient_data(task, logger, imprt: TImports):
selected_fields = {
field_name: fields[field_name]
for field_name, source_field in imprt.fieldmapping.items()
if source_field in imprt.columns
if source_field.get("column_src", None) in imprt.columns
or source_field.get("default_value", None) is not None
}
init_rows_validity(imprt)
task.update_state(state="PROGRESS", meta={"progress": 0.05})
Expand Down Expand Up @@ -218,7 +219,15 @@ def update_batch_progress(batch, step):
do_nomenclatures_mapping(
imprt,
entity,
selected_fields,
{
field_name: fields[field_name]
for field_name, mapping in imprt.fieldmapping.items()
if field_name in fields
and (
mapping.get("column_src", None) in imprt.columns
or mapping.get("default_value") is not None
)
},
fill_with_defaults=current_app.config["IMPORT"][
"FILL_MISSING_NOMENCLATURE_WITH_DEFAULT_VALUE"
],
Expand All @@ -238,7 +247,6 @@ def update_batch_progress(batch, step):
entity,
fields["id_nomenclature_blurring"],
fields["id_dataset"],
fields["unique_dataset_id"],
)
if current_app.config["IMPORT"]["CHECK_REF_BIBLIO_LITTERATURE"]:
check_nomenclature_source_status(
Expand Down Expand Up @@ -267,16 +275,20 @@ def update_batch_progress(batch, step):

if "unique_id_sinp" in selected_fields:
check_duplicate_uuid(imprt, entity, selected_fields["unique_id_sinp"])
# TODO: qu'est-ce que c'est ?
if current_app.config["IMPORT"]["PER_DATASET_UUID_CHECK"]:
whereclause = Synthese.id_dataset == imprt.id_dataset
check_existing_uuid(
imprt,
entity,
selected_fields["unique_id_sinp"],
id_dataset_field=selected_fields["id_dataset"],
)
else:
whereclause = sa.true()
check_existing_uuid(
imprt,
entity,
selected_fields["unique_id_sinp"],
whereclause=whereclause,
)
check_existing_uuid(
imprt,
entity,
selected_fields["unique_id_sinp"],
)
if imprt.fieldmapping.get(
"unique_id_sinp_generate",
current_app.config["IMPORT"]["DEFAULT_GENERATE_MISSING_UUID"],
Expand Down Expand Up @@ -339,21 +351,22 @@ def import_data_to_destination(imprt: TImports) -> None:
if field_name not in fields: # not a destination field
continue
field = fields[field_name]
column_src = source_field.get("column_src", None)
if field.multi:
if not set(source_field).isdisjoint(imprt.columns):
if not set(column_src).isdisjoint(imprt.columns):
insert_fields |= {field}
else:
if source_field in imprt.columns:
if (
column_src in imprt.columns
or source_field.get("default_value", None) is not None
):
insert_fields |= {field}

insert_fields -= {fields["unique_dataset_id"]} # Column only used for filling `id_dataset`

select_stmt = (
sa.select(
*[transient_table.c[field.dest_field] for field in insert_fields],
sa.literal(source.id_source),
sa.literal(source.module.id_module),
sa.literal(imprt.id_dataset),
sa.literal(imprt.id_import),
sa.literal("I"),
)
Expand All @@ -363,7 +376,6 @@ def import_data_to_destination(imprt: TImports) -> None:
names = [field.dest_field for field in insert_fields] + [
"id_source",
"id_module",
"id_dataset",
"id_import",
"last_action",
]
Expand Down
2 changes: 1 addition & 1 deletion backend/geonature/core/gn_synthese/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ class Synthese(DB.Model):
meta_update_date = DB.Column(DB.DateTime, server_default=FetchedValue())
last_action = DB.Column(DB.Unicode)

areas = relationship(LAreas, secondary=corAreaSynthese, backref="synthese_obs")
# areas = relationship(LAreas, secondary=corAreaSynthese, backref="synthese_obs")
area_attachment = relationship(LAreas, foreign_keys=[id_area_attachment])
validations = relationship(TValidations, backref="attached_row")
last_validation = relationship(last_validation, uselist=False, viewonly=True)
Expand Down
120 changes: 59 additions & 61 deletions backend/geonature/core/imports/checks/dataframe/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def check_datasets(
) -> Set[str]:
"""
Check if datasets exist and are authorized for the user and import.

It also fill the id_field based on the content of uuid_field
Parameters
----------
imprt : TImports
Expand Down Expand Up @@ -222,72 +222,70 @@ def check_datasets(

"""
updated_cols = set()
uuid_col = uuid_field.dest_field
id_col = id_field.dest_field

if uuid_col in df:
has_uuid_mask = df[uuid_col].notnull()
uuid = df.loc[has_uuid_mask, uuid_col].unique().tolist()
uuid_col = uuid_field.source_column

datasets = {
ds.unique_dataset_id.hex: ds
for ds in TDatasets.query.filter(TDatasets.unique_dataset_id.in_(uuid))
.options(sa.orm.joinedload(TDatasets.nomenclature_data_origin))
.options(sa.orm.raiseload("*"))
.all()
if uuid_col not in df:
yield {
"error_code": ImportCodeError.MISSING_VALUE,
"column": uuid_field.name_field,
}
valid_ds_mask = df[uuid_col].isin(datasets.keys())
invalid_ds_mask = has_uuid_mask & ~valid_ds_mask
if invalid_ds_mask.any():
yield {
"error_code": ImportCodeError.DATASET_NOT_FOUND,
"column": uuid_field.name_field,
"invalid_rows": df[invalid_ds_mask],
}

inactive_dataset = [uuid for uuid, ds in datasets.items() if not ds.active]
inactive_dataset_mask = df[uuid_col].isin(inactive_dataset)
if inactive_dataset_mask.any():
yield {
"error_code": ImportCodeError.DATASET_NOT_ACTIVE,
"column": uuid_field.name_field,
"invalid_rows": df[inactive_dataset_mask],
}

# Warning: we check only permissions of first author, but currently there it only one author per import.
authorized_datasets = {
ds.unique_dataset_id.hex: ds
for ds in db.session.execute(
TDatasets.filter_by_creatable(
user=imprt.authors[0], module_code=module_code, object_code=object_code
)
.where(TDatasets.unique_dataset_id.in_(uuid))
.options(sa.orm.raiseload("*"))
)
.scalars()
.all()
uuid = df[uuid_col].unique().tolist()

datasets = {
str(ds.unique_dataset_id): ds
for ds in TDatasets.query.filter(TDatasets.unique_dataset_id.in_(uuid)).options(
sa.orm.joinedload(TDatasets.nomenclature_data_origin)
)
# .options(sa.orm.raiseload("*"))
.all()
}
valid_ds_mask = df[uuid_col].isin(datasets.keys())
invalid_ds_mask = ~valid_ds_mask
if invalid_ds_mask.any():
yield {
"error_code": ImportCodeError.DATASET_NOT_FOUND,
"column": uuid_field.name_field,
"invalid_rows": df[invalid_ds_mask],
}
authorized_ds_mask = df[uuid_col].isin(authorized_datasets.keys())
unauthorized_ds_mask = valid_ds_mask & ~authorized_ds_mask
if unauthorized_ds_mask.any():
yield {
"error_code": ImportCodeError.DATASET_NOT_AUTHORIZED,
"column": uuid_field.name_field,
"invalid_rows": df[unauthorized_ds_mask],
}

if authorized_ds_mask.any():
df.loc[authorized_ds_mask, id_col] = df[authorized_ds_mask][uuid_col].apply(
lambda uuid: authorized_datasets[uuid].id_dataset
)
updated_cols = {id_col}

else:
has_uuid_mask = pd.Series(False, index=df.index)
inactive_dataset = [uuid for uuid, ds in datasets.items() if not ds.active]
inactive_dataset_mask = df[uuid_col].isin(inactive_dataset)
if inactive_dataset_mask.any():
yield {
"error_code": ImportCodeError.DATASET_NOT_ACTIVE,
"column": uuid_field.name_field,
"invalid_rows": df[inactive_dataset_mask],
}

# Warning: we check only permissions of first author, but currently there it only one author per import.
authorized_datasets = {
str(ds.unique_dataset_id): ds
for ds in db.session.execute(
TDatasets.filter_by_creatable(
user=imprt.authors[0], module_code=module_code, object_code=object_code
)
.where(TDatasets.unique_dataset_id.in_(uuid))
.options(sa.orm.raiseload("*"))
)
.scalars()
.all()
}
authorized_ds_mask = valid_ds_mask & df[uuid_col].isin(authorized_datasets.keys())
unauthorized_ds_mask = ~authorized_ds_mask
if unauthorized_ds_mask.any():
yield {
"error_code": ImportCodeError.DATASET_NOT_AUTHORIZED,
"column": uuid_field.name_field,
"invalid_rows": df[unauthorized_ds_mask],
}

if (~has_uuid_mask).any():
# Set id_dataset from import for empty cells:
df.loc[~has_uuid_mask, id_col] = imprt.id_dataset
# compute id_col based on uuid_col
if authorized_ds_mask.any():
id_col = id_field.dest_field
df.loc[authorized_ds_mask, id_col] = df[authorized_ds_mask][uuid_col].apply(
lambda uuid: authorized_datasets[uuid].id_dataset
)
updated_cols = {id_col}

return updated_cols
6 changes: 3 additions & 3 deletions backend/geonature/core/imports/checks/dataframe/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from geonature.utils.env import db

from geonature.core.imports.models import ImportUserError, ImportUserErrorType
from geonature.core.imports.models import ImportUserError, ImportUserErrorType, TImports
from geonature.core.imports.utils import generated_fields


Expand Down Expand Up @@ -101,7 +101,7 @@ def __error_replace(*args, **kwargs):
return _error_replace


def report_error(imprt, entity, df, error):
def report_error(imprt: TImports, entity, df, error):
"""
Reports an error found in the dataframe, updates the validity column and insert
the error in the `t_user_errors` table.
Expand Down Expand Up @@ -147,7 +147,7 @@ def report_error(imprt, entity, df, error):
# f'{error_type.name}' # FIXME comment
ordered_invalid_rows = sorted(invalid_rows["line_no"])
column = generated_fields.get(error["column"], error["column"])
column = imprt.fieldmapping.get(column, column)
column = imprt.fieldmapping.get(column, {}).get("column_src", column)
# If an error for same import, same column and of the same type already exists,
# we concat existing erroneous rows with current rows.
stmt = pg_insert(ImportUserError).values(
Expand Down
Loading