Skip to content

Commit

Permalink
Move user identification heuristics to lib.user.member_request
Browse files Browse the repository at this point in the history
  • Loading branch information
lukasjuhrich committed Sep 17, 2024
1 parent 0f2c310 commit 18e7015
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 104 deletions.
14 changes: 7 additions & 7 deletions pycroft/lib/user/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,7 @@
membership_end_date,
membership_beginning_task,
membership_begin_date,
get_similar_users_in_room,
check_similar_user_in_room,
get_user_by_swdd_person_id,
get_name_from_first_last,
get_user_by_id_or_login,
send_password_reset_mail,
find_similar_users,
are_names_similar,
)
from .user_id import (
encode_type1_user_id,
Expand Down Expand Up @@ -59,6 +52,13 @@
get_possible_existing_users_for_pre_member,
check_new_user_data,
check_new_user_data_unused,
get_similar_users_in_room,
check_similar_user_in_room,
get_user_by_swdd_person_id,
get_name_from_first_last,
get_user_by_id_or_login,
find_similar_users,
are_names_similar,
)
from .mail import (
format_user_mail,
Expand Down
94 changes: 2 additions & 92 deletions pycroft/lib/user/_old.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,10 @@
:copyright: (c) 2012 by AG DSN.
"""
import os
import re
import typing
import typing as t
from datetime import date
from difflib import SequenceMatcher
from collections.abc import Iterable

from sqlalchemy import exists, func, select, Boolean, String, ColumnElement
from sqlalchemy import exists, select, Boolean, String, ColumnElement
from sqlalchemy.orm import Session

from pycroft import config, property
Expand Down Expand Up @@ -51,18 +47,11 @@
from pycroft.model.traffic import traffic_history as func_traffic_history
from pycroft.model.user import (
User,
BaseUser,
RoomHistoryEntry,
PropertyGroup,
)
from pycroft.model.unix_account import UnixAccount, UnixTombstone

from .exc import LoginTakenException, UserExistsInRoomException
from .user_id import (
decode_type1_user_id,
decode_type2_user_id,
check_user_id,
)
from .exc import LoginTakenException
from .passwords import generate_wifi_password
from .mail import user_send_mail, send_confirmation_email

Expand Down Expand Up @@ -657,66 +646,6 @@ def membership_begin_date(user: User) -> date | None:
return end_date


def get_similar_users_in_room(name: str, room: Room, ratio: float = 0.75) -> list[User]:
"""Get inhabitants of a room with a name similar to the given name.
Eagerloading hints:
- `room.users`
"""

if room is None:
return []

return [user for user in room.users if SequenceMatcher(None, name, user.name).ratio() > ratio]


def check_similar_user_in_room(name: str, room: Room) -> None:
"""
Raise an error if an user with a 75% name match already exists in the room
"""

if get_similar_users_in_room(name, room):
raise UserExistsInRoomException


def get_user_by_swdd_person_id(swdd_person_id: int | None) -> User | None:
if swdd_person_id is None:
return None

return typing.cast(
User | None,
User.q.filter_by(swdd_person_id=swdd_person_id).first()
)


def get_name_from_first_last(first_name: str, last_name: str) -> str:
return f"{first_name} {last_name}" if last_name else first_name


def get_user_by_id_or_login(ident: str, email: str) -> User | None:
re_uid1 = r"^\d{4,6}-\d{1}$"
re_uid2 = r"^\d{4,6}-\d{2}$"

user = User.q.filter(func.lower(User.email) == email.lower())

if re.match(re_uid1, ident):
if not check_user_id(ident):
return None
user_id, _ = decode_type1_user_id(ident)
user = user.filter_by(id=user_id)
elif re.match(re_uid2, ident):
if not check_user_id(ident):
return None
user_id, _ = decode_type2_user_id(ident)
user = user.filter_by(id=user_id)
elif re.match(BaseUser.login_regex, ident):
user = user.filter_by(login=ident)
else:
return None

return t.cast(User | None, user.one_or_none())


@with_transaction
def send_password_reset_mail(user: User) -> bool:
user.password_reset_token = generate_random_str(64)
Expand All @@ -733,22 +662,3 @@ def send_password_reset_mail(user: User) -> bool:
return False

return True



def find_similar_users(name: str, room: Room, ratio: float) -> Iterable[User]:
"""Given a potential user's name and a room, find users of similar name living in that room.
:param name: The potential user's name
:param room: the room whose inhabitants to search
:param ratio: the threshold which determines which matches are included in this list.
For that, the `difflib.SequenceMatcher.ratio` must be greater than the given value.
"""
relevant_users_q = (session.session.query(User)
.join(RoomHistoryEntry)
.filter(RoomHistoryEntry.room == room))
return [u for u in relevant_users_q if are_names_similar(name, u.name, threshold=ratio)]


def are_names_similar(one: str, other: str, threshold: float) -> bool:
return SequenceMatcher(a=one, b=other).ratio() > threshold
93 changes: 88 additions & 5 deletions pycroft/lib/user/member_request.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import re
import typing as t
from datetime import timedelta, date
from difflib import SequenceMatcher

from sqlalchemy import func

Expand All @@ -14,18 +17,17 @@
from pycroft.model.facilities import Room
from pycroft.model.session import with_transaction
from pycroft.model.user import (
BaseUser,
User,
PreMember,
RoomHistoryEntry,
)

from ._old import (
create_user,
login_available,
move_in,
move,
get_user_by_swdd_person_id,
get_similar_users_in_room,
check_similar_user_in_room,
login_available,
send_confirmation_email,
user_send_mail,
)
Expand All @@ -39,10 +41,16 @@
LoginTakenException,
EmailTakenException,
UserExistsException,
UserExistsInRoomException,
NoTenancyForRoomException,
MoveInDateInvalidException,
)
from .user_id import encode_type2_user_id
from .user_id import (
check_user_id,
decode_type1_user_id,
decode_type2_user_id,
encode_type2_user_id,
)


@with_transaction
Expand Down Expand Up @@ -336,3 +344,78 @@ def check_new_user_data_unused(login: str, email: str, swdd_person_id: int) -> N
raise EmailTakenException

return


def get_similar_users_in_room(name: str, room: Room, ratio: float = 0.75) -> list[User]:
"""Get inhabitants of a room with a name similar to the given name.
Eagerloading hints:
- `room.users`
"""

if room is None:
return []

return [user for user in room.users if SequenceMatcher(None, name, user.name).ratio() > ratio]


def check_similar_user_in_room(name: str, room: Room) -> None:
"""
Raise an error if an user with a 75% name match already exists in the room
"""

if get_similar_users_in_room(name, room):
raise UserExistsInRoomException


def get_user_by_swdd_person_id(swdd_person_id: int | None) -> User | None:
if swdd_person_id is None:
return None

return t.cast(User | None, User.q.filter_by(swdd_person_id=swdd_person_id).first())


def get_name_from_first_last(first_name: str, last_name: str) -> str:
return f"{first_name} {last_name}" if last_name else first_name


def get_user_by_id_or_login(ident: str, email: str) -> User | None:
re_uid1 = r"^\d{4,6}-\d{1}$"
re_uid2 = r"^\d{4,6}-\d{2}$"

user = User.q.filter(func.lower(User.email) == email.lower())

if re.match(re_uid1, ident):
if not check_user_id(ident):
return None
user_id, _ = decode_type1_user_id(ident)
user = user.filter_by(id=user_id)
elif re.match(re_uid2, ident):
if not check_user_id(ident):
return None
user_id, _ = decode_type2_user_id(ident)
user = user.filter_by(id=user_id)
elif re.match(BaseUser.login_regex, ident):
user = user.filter_by(login=ident)
else:
return None

return t.cast(User | None, user.one_or_none())


def find_similar_users(name: str, room: Room, ratio: float) -> t.Iterable[User]:
"""Given a potential user's name and a room, find users of similar name living in that room.
:param name: The potential user's name
:param room: the room whose inhabitants to search
:param ratio: the threshold which determines which matches are included in this list.
For that, the `difflib.SequenceMatcher.ratio` must be greater than the given value.
"""
relevant_users_q = (
session.session.query(User).join(RoomHistoryEntry).filter(RoomHistoryEntry.room == room)
)
return [u for u in relevant_users_q if are_names_similar(name, u.name, threshold=ratio)]


def are_names_similar(one: str, other: str, threshold: float) -> bool:
return SequenceMatcher(a=one, b=other).ratio() > threshold

0 comments on commit 18e7015

Please sign in to comment.