Skip to content

Commit

Permalink
DRAFT: Bug Fixes (#55)
Browse files Browse the repository at this point in the history
* fix: key errors related to name splitting
  • Loading branch information
initstring authored Feb 1, 2023
1 parent f6b4dc1 commit ac6b16a
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 9 deletions.
21 changes: 14 additions & 7 deletions linkedin2username.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,20 @@ def clean_name(name):
name = re.sub("[ß]", 'ss', name)
name = re.sub("[ñ]", 'n', name)

# Get rid of all things in parenthesis. Lots of people put various credentials, etc
name = re.sub(r'\([^()]*\)', '', name)

# The lines below basically trash anything weird left over.
# A lot of users have funny things in their names, like () or ''
# People like to feel special, I guess.
allowed_chars = re.compile('[^a-zA-Z -]')
name = allowed_chars.sub('', name)

# Next, we get rid of common titles. Thanks ChatGPT for the help.
titles = ['mr', 'miss', 'mrs', 'phd', 'prof', 'professor', 'md', 'dr', 'mba']
pattern = "\\b(" + "|".join(titles) + ")\\b"
name = re.sub(pattern, '', name)

# The line below tries to consolidate white space between words
# and get rid of leading/trailing spaces.
name = re.sub(r'\s+', ' ', name).strip()
Expand All @@ -112,7 +120,7 @@ def split_name(name):
if len(parsed) > 2:
split_name = {'first': parsed[0], 'second': parsed[-2], 'last': parsed[-1]}
else:
split_name = {'first': parsed[0], 'last': parsed[-1]}
split_name = {'first': parsed[0], 'second': '', 'last': parsed[-1]}

return split_name

Expand All @@ -121,7 +129,7 @@ def f_last(self):
names = set()
names.add(self.name['first'][0] + self.name['last'])

if len(self.name) == 3:
if self.name['second']:
names.add(self.name['first'][0] + self.name['second'])

return names
Expand All @@ -131,7 +139,7 @@ def f_dot_last(self):
names = set()
names.add(self.name['first'][0] + '.' + self.name['last'])

if len(self.name) == 3:
if self.name['second']:
names.add(self.name['first'][0] + '.' + self.name['second'])

return names
Expand All @@ -141,7 +149,7 @@ def last_f(self):
names = set()
names.add(self.name['last'] + self.name['first'][0])

if len(self.name) == 3:
if self.name['second']:
names.add(self.name['second'] + self.name['first'][0])

return names
Expand All @@ -151,7 +159,7 @@ def first_dot_last(self):
names = set()
names.add(self.name['first'] + '.' + self.name['last'])

if len(self.name) == 3:
if self.name['second']:
names.add(self.name['first'] + '.' + self.name['second'])

return names
Expand All @@ -161,7 +169,7 @@ def first_l(self):
names = set()
names.add(self.name['first'] + self.name['last'][0])

if len(self.name) == 3:
if self.name['second']:
names.add(self.name['first'] + self.name['second'][0])

return names
Expand Down Expand Up @@ -608,7 +616,6 @@ def do_loops(session, company_id, outer_loops, args):
new_names += len(found_employees)
employee_list.extend(found_employees)


sys.stdout.write(f" [*] Added {str(new_names)} new names. "
f"Running total: {str(len(employee_list))}"
" \r")
Expand Down
13 changes: 11 additions & 2 deletions tests/test_linkedin2username.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
1: "John Smith",
2: "John Davidson-Smith",
3: "John-Paul Smith-Robinson",
4: "José Gonzáles"
4: "José Gonzáles",
}


Expand Down Expand Up @@ -123,6 +123,15 @@ def test_clean_name():
mutator = NameMutator("xxx")
assert mutator.clean_name(" 🙂Ànèôõö ßï🙂 ") == "aneooo ssi"

name = "Dr. Hannibal Lecter, PhD."
assert mutator.clean_name(name) == "hannibal lecter"

name = "Mr. Fancy Pants MD, PhD, MBA"
assert mutator.clean_name(name) == "fancy pants"

name = "Mr. Cert Dude (OSCP, OSCE)"
assert mutator.clean_name(name) == "cert dude"


def test_split_name():
mutator = NameMutator("xxx")
Expand All @@ -131,7 +140,7 @@ def test_split_name():
assert mutator.split_name(name) == {"first": "madonna", "second": "wayne", "last": "gacey"}

name = "twiggy ramirez"
assert mutator.split_name(name) == {"first": "twiggy", "last": "ramirez"}
assert mutator.split_name(name) == {"first": "twiggy", "second": "", "last": "ramirez"}

name = "brian warner is marilyn manson"
assert mutator.split_name(name) == {"first": "brian", "second": "marilyn", "last": "manson"}
Expand Down

0 comments on commit ac6b16a

Please sign in to comment.