forked from cmharlow/geonames-reconcile
-
Notifications
You must be signed in to change notification settings - Fork 0
/
lc_parse.py
65 lines (64 loc) · 2.97 KB
/
lc_parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from unicodedata import normalize as ucnorm, category
def lc2geonames(text, PY3):
if PY3:
if not isinstance(text, str):
str(text, 'utf-8')
else:
if not isinstance(text, unicode):
text = unicode(text)
if '(' in text:
text = text.replace('Ala.)', ', Alabama')
text = text.replace('Alaska)', ', Alaska')
text = text.replace('Ariz.)', ', Arizona')
text = text.replace('Ark.)', ', Arkansas')
text = text.replace('Calif.)', ', California')
text = text.replace('Colo.)', ', Colorado')
text = text.replace('Conn.)', ', Connecticut')
text = text.replace('Del.)', ', Delaware')
text = text.replace('D.C.)', ', District of Columbia')
text = text.replace('Fla.)', ', Florida')
text = text.replace('Ga.)', ', Georgia')
text = text.replace('Hawaii)', ', Hawaii')
text = text.replace('Idaho)', ', Idaho')
text = text.replace('Ill.)', ', Illinois')
text = text.replace('Ind.)', ', Indiana')
text = text.replace('Iowa)', ', Iowa')
text = text.replace('Kan.)', ', Kansas')
text = text.replace('Ky.)', ', Kentucky')
text = text.replace('La.)', ', Louisiana')
text = text.replace('Me.)', ', Maine')
text = text.replace('Md.)', ', Maryland')
text = text.replace('Mass.)', ', Massachusetts')
text = text.replace('Mich.)', ', Michigan')
text = text.replace('Minn.)', ', Minnesota')
text = text.replace('Miss.)', ', Mississippi')
text = text.replace('Mo.)', ', Missouri')
text = text.replace('Mont.)', ', Montana')
text = text.replace('Neb.)', ', Nebraska')
text = text.replace('Nev.)', ', Nevada')
text = text.replace('N.H.)', ', New Hampshire')
text = text.replace('N.J.)', ', New Jersey')
text = text.replace('N.M.)', ', New Mexico')
text = text.replace('N.Y.)', ', New York')
text = text.replace('N.C.)', ', North Carolina')
text = text.replace('N.D.)', ', North Dakota')
text = text.replace('Ohio)', ', Ohio')
text = text.replace('Okla.)', ', Oklahoma')
text = text.replace('Or.)', ', Oregon')
text = text.replace('Pa.)', ', Pennsylvania')
text = text.replace('R.I.)', ', Rhode Island')
text = text.replace('S.C.)', ', South Carolina')
text = text.replace('S.D.)', ', South Dakota')
text = text.replace('Tenn.)', ', Tennessee')
text = text.replace('Tex.)', ', Texas')
text = text.replace('Utah)', ', Utah')
text = text.replace('Vt.)', ', Vermont')
text = text.replace('Va.)', ', Virginia')
text = text.replace('Wash.)', ', Washington')
text = text.replace('W. Va.)', ', West Virginia')
text = text.replace('Wis.)', ', Wisconsin')
text = text.replace('Wyo.)', ', Wyoming')
text = text.replace(' (', ' ')
text = text.replace(')', ' ')
text = text.replace(':', ',')
return text