Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate Python address parser using use-address library #56

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 13 additions & 31 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,50 +1,32 @@
# Extend the base Python image
# See https://hub.docker.com/_/python for version options
# N.b., there are many options for Python images. We used the plain
# version number in the pilot. YMMV. See this post for a discussion of
# some options and their pros and cons:
# https://pythonspeed.com/articles/base-image-python-docker-images/
FROM python:3.8

# Add the NodeSource PPA
# (see: https://github.com/nodesource/distributions/blob/master/README.md)
RUN curl -sL https://deb.nodesource.com/setup_12.x | bash -

# Install any additional OS-level packages you need via apt-get. RUN statements
# add additional layers to your image, increasing its final size. Keep your
# image small by combining related commands into one RUN statement, e.g.,
#
# RUN apt-get update && \
# apt-get install -y python-pip
#
# Read more on Dockerfile best practices at the source:
# https://docs.docker.com/develop/develop-images/dockerfile_best-practices
RUN apt-get update && apt-get install -y --no-install-recommends postgresql-client nodejs
RUN curl -sL https://deb.nodesource.com/setup_14.x | bash - # Updated to Node.js 14

# Install any additional OS-level packages
RUN apt-get update && apt-get install -y --no-install-recommends postgresql-client nodejs npm

# Verify PATH includes npm
ENV PATH /usr/local/bin:$PATH

# Inside the container, create an app directory and switch into it
RUN mkdir /app
WORKDIR /app

# Copy the requirements file into the app directory, and install them. Copy
# only the requirements file, so Docker can cache this build step. Otherwise,
# the requirements must be reinstalled every time you build the image after
# the app code changes. See this post for further discussion of strategies
# for building lean and efficient containers:
# https://blog.realkinetic.com/building-minimal-docker-containers-for-python-applications-37d0272c52f3
# Copy the requirements file and install Python dependencies
COPY ./requirements.txt /app/requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

# Install Node requirements
# Copy package.json and install Node dependencies
COPY ./package.json /app/package.json
RUN npm install
RUN npm install --verbose # Added verbose flag for more detailed output

# Copy the contents of the current host directory (i.e., our app code) into
# the container.
# Copy the rest of the application
COPY . /app

# Add a bogus env var for the Django secret key in order to allow us to run
# the 'collectstatic' management command
# Add a bogus env var for the Django secret key
ENV DJANGO_SECRET_KEY 'foobar'

# Build static files into the container
RUN python manage.py collectstatic --noinput
RUN python manage.py collectstatic --noinput
75 changes: 75 additions & 0 deletions parserator_web/static/js/index.js
Original file line number Diff line number Diff line change
@@ -1,2 +1,77 @@
/* TODO: Flesh this out to connect the form to the API and render results
in the #address-results div. */
document.addEventListener('DOMContentLoaded', function() {
const form = document.querySelector('form');
const addressResults = document.getElementById('address-results');

form.addEventListener('submit', function(e) {
e.preventDefault();
const address = document.getElementById('address').value;

// Send request to API endpoint
fetch(`/api/parse/?address=${encodeURIComponent(address)}`)
.then(response => response.json())
.then(data => {
// Clear previous results
addressResults.innerHTML = '';
addressResults.style.display = 'block';

if (data.detail && data.detail.startsWith('Error parsing address:')) {
// Display parsing error
const errorMessage = data.detail.split('ORIGINAL STRING:')[0].trim();
addressResults.innerHTML = `
<h4>Parsing Error</h4>
<p class="text-danger">${errorMessage}</p>
<p>The address could not be parsed. This may be due to:</p>
<ul>
<li>Duplicate address components</li>
<li>Invalid address format</li>
</ul>
<p>Please check the address and try again.</p>
`;
} else if (data.address_components) {
// Display successful results
addressResults.innerHTML = `
<h4>Parsing results</h4>
<p>Address type: <strong>${data.address_type}</strong></p>
<p>Input: <strong>${data.input_string}</strong></p>
<table class="table table-bordered">
<thead>
<tr>
<th>Address part</th>
<th>Tag</th>
</tr>
</thead>
<tbody>
</tbody>
</table>
`;

const resultsTable = addressResults.querySelector('tbody');

// Add new results
for (const [part, tag] of Object.entries(data.address_components)) {
const row = resultsTable.insertRow();
const cellPart = row.insertCell(0);
const cellTag = row.insertCell(1);
cellPart.textContent = part;
cellTag.textContent = tag;
}
} else {
// Unexpected response format
addressResults.innerHTML = `
<h4>Parsing Error</h4>
<p class="text-danger">An unexpected error occurred. Please try again.</p>
`;
}
})
.catch(error => {
console.error('Error:', error);
addressResults.style.display = 'block';
addressResults.innerHTML = `
<h4>Parsing Error</h4>
<p class="text-danger">An unexpected error occurred. Please try again.</p>
`;
});
});
});
3 changes: 1 addition & 2 deletions parserator_web/templates/parserator_web/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ <h3 id="usaddress-parser"><i class="fa fa-fw fa-map-marker-alt"></i> U.S. addres
<button id="submit" type="submit" class="btn btn-success mt-3">Parse!</button>
</form>
</div>
<!-- TODO: Display parsed address components here. -->
<div id="address-results" style="display:none">
<h4>Parsing results</h4>
<p>Address type: <strong><span id="parse-type"></span></strong></p>
Expand All @@ -38,5 +37,5 @@ <h4>Parsing results</h4>
{% endblock %}

{% block extra_js %}
<script src="{% static 'js/index.js' %}"></script>
<script src="{% static 'js/index.js' %}"></script>
{% endblock %}
36 changes: 30 additions & 6 deletions parserator_web/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,35 @@ class AddressParse(APIView):
renderer_classes = [JSONRenderer]

def get(self, request):
# TODO: Flesh out this method to parse an address string using the
# parse() method and return the parsed components to the frontend.
return Response({})
address = request.query_params.get('address')
if not address:
raise ParseError("Address parameter is required")

try:
address_components, address_type = self.parse(address)
return Response({
'input_string': address,
'address_components': address_components,
'address_type': address_type
})
except usaddress.RepeatedLabelError as e:
raise ParseError(f"Error parsing address: {str(e)}")

def parse(self, address):
# TODO: Implement this method to return the parsed components of a
# given address using usaddress: https://github.com/datamade/usaddress
return address_components, address_type
"""
Parse the given address using usaddress.

Args:
address (str): The address string to parse.

Returns:
tuple: A tuple containing two elements:
1. address_components (dict): The parsed address components.
2. address_type (str): The type of address provided.
"""
address_components, address_type = usaddress.tag(address)

# Convert OrderedDict to regular dict for better JSON serialization
address_components = dict(address_components)

return address_components, address_type
32 changes: 30 additions & 2 deletions tests/test_views.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,43 @@
import pytest
from django.urls import reverse
from rest_framework import status


def test_api_parse_succeeds(client):
# TODO: Finish this test. Send a request to the API and confirm that the
# data comes back in the appropriate format.
address_string = '123 main st chicago il'
pytest.fail()
url = reverse('address-parse')
response = client.get(url, {'address': address_string})

assert response.status_code == status.HTTP_200_OK

data = response.json()
assert 'input_string' in data
assert 'address_components' in data
assert 'address_type' in data

assert data['input_string'] == address_string
assert 'AddressNumber' in data['address_components']
assert data['address_components']['AddressNumber'] == '123'
assert data['address_type'] == 'Street Address'


def test_api_parse_raises_error(client):
# TODO: Finish this test. The address_string below will raise a
# RepeatedLabelError, so ParseAddress.parse() will not be able to parse it.
address_string = '123 main st chicago il 123 main st'
pytest.fail()
url = reverse('address-parse')
response = client.get(url, {'address': address_string})

assert response.status_code == status.HTTP_400_BAD_REQUEST
assert 'Error parsing address' in response.json()['detail']


# Additional test for missing address parameter
def test_api_parse_missing_address(client):
url = reverse('address-parse')
response = client.get(url)

assert response.status_code == status.HTTP_400_BAD_REQUEST
assert 'Address parameter is required' in response.json()['detail']