-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(anonymisation): POC and command for mission table
- Loading branch information
1 parent
be18158
commit bcfb7d4
Showing
5 changed files
with
167 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
from sqlalchemy import Column, Integer, String, DateTime, JSON | ||
from app import db | ||
from app.models.mission import Mission | ||
|
||
|
||
class MissionAnonymized(Mission): | ||
backref_base_name = "mission_anonymized" | ||
__mapper_args__ = {"concrete": True} | ||
|
||
id = Column(Integer, primary_key=True) | ||
name = Column(String, nullable=True) | ||
submitter_id = Column(Integer, nullable=True) | ||
company_id = Column(Integer, nullable=True) | ||
vehicle_id = Column(Integer, nullable=True) | ||
creation_time = Column(DateTime, nullable=True) | ||
reception_time = Column(DateTime, nullable=True) | ||
context = Column(JSON, nullable=True) | ||
|
||
def __repr__(self): | ||
return f"<MissionAnonymized(id={self.id}, name={self.name}, creation_time={self.creation_time})>" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
import csv | ||
from io import StringIO | ||
from sqlalchemy import text | ||
from app import db | ||
import json | ||
|
||
|
||
def migrate_anonymize_mission(interval): | ||
interval = interval.replace('"', "") | ||
""" | ||
Migrer les données de la table mission vers mission_anonymized en utilisant COPY pour optimiser les performances. | ||
""" | ||
select_query = f""" | ||
SELECT | ||
id, | ||
anon.fake_last_name() AS name, | ||
NULL AS submitter_id, | ||
NULL AS company_id, | ||
vehicle_id, | ||
date_trunc('month', creation_time) AS creation_time, | ||
date_trunc('month', reception_time) AS reception_time, | ||
context::jsonb AS context -- Convertir context en JSON valide | ||
FROM mission | ||
WHERE creation_time {interval}; | ||
""" | ||
|
||
try: | ||
with db.session.begin_nested(): | ||
|
||
result = db.session.execute(text(select_query)) | ||
rows = result.fetchall() | ||
|
||
if not rows: | ||
print("No data to migrate.") | ||
return | ||
|
||
csv_buffer = StringIO() | ||
csv_writer = csv.writer(csv_buffer) | ||
|
||
for row in rows: | ||
row_as_list = list(row) | ||
|
||
if isinstance(row_as_list[-1], dict): | ||
row_as_list[-1] = json.dumps(row_as_list[-1]) | ||
|
||
csv_writer.writerow(row_as_list) | ||
|
||
csv_buffer.seek(0) | ||
|
||
engine = db.get_engine() | ||
connection = engine.raw_connection() | ||
|
||
try: | ||
cursor = connection.cursor() | ||
cursor.copy_expert( | ||
""" | ||
COPY mission_anonymized (id, name, submitter_id, company_id, vehicle_id, creation_time, reception_time, context) | ||
FROM STDIN WITH (FORMAT CSV) | ||
""", | ||
csv_buffer, | ||
) | ||
|
||
delete_query = f""" | ||
DELETE FROM mission WHERE creation_time {interval}; | ||
""" | ||
db.session.execute(text(delete_query)) | ||
|
||
print("Anonymized data migration successful.") | ||
|
||
except Exception as e: | ||
connection.rollback() | ||
print(f"Error when copying mass data: {e}") | ||
raise | ||
|
||
finally: | ||
cursor.close() | ||
connection.close() | ||
csv_buffer.close() | ||
|
||
except Exception as e: | ||
db.session.rollback() | ||
print(f"Transaction failed, rolling back changes: {e}") | ||
|
||
finally: | ||
db.session.close() |
39 changes: 39 additions & 0 deletions
39
migrations/versions/c8870f7b9399_add_mission_anonymized_table.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
"""Add mission_anonymized table | ||
Revision ID: c8870f7b9399 | ||
Revises: bd643a8d5269 | ||
Create Date: 2024-11-07 12:34:16.339041 | ||
""" | ||
from alembic import op | ||
import sqlalchemy as sa | ||
|
||
|
||
# revision identifiers, used by Alembic. | ||
revision = "c8870f7b9399" | ||
down_revision = "bd643a8d5269" | ||
branch_labels = None | ||
depends_on = None | ||
|
||
|
||
def upgrade(): | ||
# ### commands auto generated by Alembic - please adjust! ### | ||
op.create_table( | ||
"mission_anonymized", | ||
sa.Column("id", sa.Integer(), nullable=False), | ||
sa.Column("name", sa.String(), nullable=True), | ||
sa.Column("submitter_id", sa.Integer(), nullable=True), | ||
sa.Column("company_id", sa.Integer(), nullable=True), | ||
sa.Column("vehicle_id", sa.Integer(), nullable=True), | ||
sa.Column("creation_time", sa.DateTime(), nullable=True), | ||
sa.Column("reception_time", sa.DateTime(), nullable=True), | ||
sa.Column("context", sa.JSON(), nullable=True), | ||
sa.PrimaryKeyConstraint("id"), | ||
) | ||
# ### end Alembic commands ### | ||
|
||
|
||
def downgrade(): | ||
# ### commands auto generated by Alembic - please adjust! ### | ||
op.drop_table("mission_anonymized") | ||
# ### end Alembic commands ### |