diff --git a/streetaddress/streetaddress.py b/streetaddress/streetaddress.py index 1906712..c3d94f4 100644 --- a/streetaddress/streetaddress.py +++ b/streetaddress/streetaddress.py @@ -34,6 +34,7 @@ def __init__(self): def parse(self, addr_str, skip_house=False): addr_str = addr_str.strip() res = { + 'attn': None, 'house' : None, 'street_name' : None, 'street_type' : None, @@ -41,33 +42,42 @@ def parse(self, addr_str, skip_house=False): 'suite_num' : None, 'suite_type' : None, 'other' : None, + 'po_box': None + } tokens = addr_str.split() - start_idx = 0 - if len(tokens) == 0: return res - - if skip_house: - start_idx = 0 - else: - if tokens[0].lower() in self.text2num_dict: - res['house'] = six.text_type(self.text2num_dict[tokens[0].lower()]) - start_idx = 1 - elif self.rec_st_nd_rd_th.search(tokens[0]): - #first token is actually a street number (not house) - start_idx = 0 - elif self.rec_house_number.search(tokens[0]): - res['house'] = tokens[0] - start_idx = 1 - else: - #no house number - start_idx = 0 - - if res['house'] and len(tokens) >= 2 and tokens[1] == '1/2': - res['house'] += ' ' + tokens[1] - start_idx = 2 + + #go through tokens of address and change spelled out addresses to numeric + for x,tkn in enumerate(tokens): + if tkn.lower() in self.text2num_dict: + tokens[x] = six.text_type(self.text2num_dict[tkn.lower()]) + + #find first numeric token, + start_idx = findNumeric(tokens) + + #if tokens contain PO BOX: + if next((True for i, v in enumerate(tokens) if v.lower() in ['po','pobox','apo','boxnumber', 'box']), False): + res['po_box'] = tokens[start_idx] + return res + + #anything preceding first numeric token assume to be a name (ie - John Doe) + res['attn'] = ' '.join(tokens[:start_idx]) if start_idx >0 else None + + #first numeric token is actually a street number (not house) + if self.rec_st_nd_rd_th.search(tokens[start_idx]): + pass + elif self.rec_house_number.search(tokens[start_idx]): + res['house'] = tokens[start_idx] + start_idx = start_idx + 1 + + #account for half addresses (5 1/2) + if res['house'] and len(tokens) >start_idx+1: + if tokens[start_idx + 1] == '1/2': + res['house'] += ' ' + tokens[start_idx + 1] + start_idx = start_idx + 1 street_accum = [] other_accum = [] @@ -177,6 +187,20 @@ def get_text2num_dict(): 'ninety': 90, } +#test if the first character of a string is a numeric +def is_number(currString): + try: + int(currString[0]) + return True + except ValueError: + return False + +#return the first token that has a numeric as the first character, any preceding tokens assumed to be attn: strings +def findNumeric(tkns): + return next((i for i, v in enumerate(tkns) if is_number(v)), 0) + + + ######################################################################## # StreetAddressFormatter @@ -250,4 +274,3 @@ def abbrev_street_avenue_etc(self, addr, abbrev_only_last_token=True): addr = ' ' . join(word_lst) return addr -