From 6cff2660a62d25d40c0ca8e57cd3148da606897d Mon Sep 17 00:00:00 2001 From: Jerry bony Date: Wed, 17 Jul 2024 00:02:29 -0300 Subject: [PATCH 1/2] Integrate Python address parser using usaddress library Add usaddress library to project dependencies - Implement AddressParse API view for address parsing - Create parse() method to handle address parsing logic - Update frontend to send requests to new API endpoint - Display parsed address components and handle errors in UI - Add error handling for unparseable addresses - Update tests to cover new address parsing functionality" --- Dockerfile | 44 +++------- .../templates/parserator_web/index.html | 82 ++++++++++++++++++- parserator_web/views.py | 36 ++++++-- tests/test_views.py | 32 +++++++- 4 files changed, 151 insertions(+), 43 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4c464fdf..50ab527d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,50 +1,32 @@ # Extend the base Python image -# See https://hub.docker.com/_/python for version options -# N.b., there are many options for Python images. We used the plain -# version number in the pilot. YMMV. See this post for a discussion of -# some options and their pros and cons: -# https://pythonspeed.com/articles/base-image-python-docker-images/ FROM python:3.8 # Add the NodeSource PPA -# (see: https://github.com/nodesource/distributions/blob/master/README.md) -RUN curl -sL https://deb.nodesource.com/setup_12.x | bash - - -# Install any additional OS-level packages you need via apt-get. RUN statements -# add additional layers to your image, increasing its final size. Keep your -# image small by combining related commands into one RUN statement, e.g., -# -# RUN apt-get update && \ -# apt-get install -y python-pip -# -# Read more on Dockerfile best practices at the source: -# https://docs.docker.com/develop/develop-images/dockerfile_best-practices -RUN apt-get update && apt-get install -y --no-install-recommends postgresql-client nodejs +RUN curl -sL https://deb.nodesource.com/setup_14.x | bash - # Updated to Node.js 14 + +# Install any additional OS-level packages +RUN apt-get update && apt-get install -y --no-install-recommends postgresql-client nodejs npm + +# Verify PATH includes npm +ENV PATH /usr/local/bin:$PATH # Inside the container, create an app directory and switch into it RUN mkdir /app WORKDIR /app -# Copy the requirements file into the app directory, and install them. Copy -# only the requirements file, so Docker can cache this build step. Otherwise, -# the requirements must be reinstalled every time you build the image after -# the app code changes. See this post for further discussion of strategies -# for building lean and efficient containers: -# https://blog.realkinetic.com/building-minimal-docker-containers-for-python-applications-37d0272c52f3 +# Copy the requirements file and install Python dependencies COPY ./requirements.txt /app/requirements.txt RUN pip install --no-cache-dir -r requirements.txt -# Install Node requirements +# Copy package.json and install Node dependencies COPY ./package.json /app/package.json -RUN npm install +RUN npm install --verbose # Added verbose flag for more detailed output -# Copy the contents of the current host directory (i.e., our app code) into -# the container. +# Copy the rest of the application COPY . /app -# Add a bogus env var for the Django secret key in order to allow us to run -# the 'collectstatic' management command +# Add a bogus env var for the Django secret key ENV DJANGO_SECRET_KEY 'foobar' # Build static files into the container -RUN python manage.py collectstatic --noinput +RUN python manage.py collectstatic --noinput \ No newline at end of file diff --git a/parserator_web/templates/parserator_web/index.html b/parserator_web/templates/parserator_web/index.html index a72d9c80..5e0c8b61 100644 --- a/parserator_web/templates/parserator_web/index.html +++ b/parserator_web/templates/parserator_web/index.html @@ -17,7 +17,6 @@

U.S. addres - {% endblock %} - {% block extra_js %} - -{% endblock %} + +{% endblock %} \ No newline at end of file diff --git a/parserator_web/views.py b/parserator_web/views.py index 0be3f4a9..f251f3c0 100644 --- a/parserator_web/views.py +++ b/parserator_web/views.py @@ -14,11 +14,35 @@ class AddressParse(APIView): renderer_classes = [JSONRenderer] def get(self, request): - # TODO: Flesh out this method to parse an address string using the - # parse() method and return the parsed components to the frontend. - return Response({}) + address = request.query_params.get('address') + if not address: + raise ParseError("Address parameter is required") + + try: + address_components, address_type = self.parse(address) + return Response({ + 'input_string': address, + 'address_components': address_components, + 'address_type': address_type + }) + except usaddress.RepeatedLabelError as e: + raise ParseError(f"Error parsing address: {str(e)}") def parse(self, address): - # TODO: Implement this method to return the parsed components of a - # given address using usaddress: https://github.com/datamade/usaddress - return address_components, address_type + """ + Parse the given address using usaddress. + + Args: + address (str): The address string to parse. + + Returns: + tuple: A tuple containing two elements: + 1. address_components (dict): The parsed address components. + 2. address_type (str): The type of address provided. + """ + address_components, address_type = usaddress.tag(address) + + # Convert OrderedDict to regular dict for better JSON serialization + address_components = dict(address_components) + + return address_components, address_type \ No newline at end of file diff --git a/tests/test_views.py b/tests/test_views.py index bfd5d0b7..b4b9faa4 100644 --- a/tests/test_views.py +++ b/tests/test_views.py @@ -1,15 +1,43 @@ import pytest +from django.urls import reverse +from rest_framework import status def test_api_parse_succeeds(client): # TODO: Finish this test. Send a request to the API and confirm that the # data comes back in the appropriate format. address_string = '123 main st chicago il' - pytest.fail() + url = reverse('address-parse') + response = client.get(url, {'address': address_string}) + + assert response.status_code == status.HTTP_200_OK + + data = response.json() + assert 'input_string' in data + assert 'address_components' in data + assert 'address_type' in data + + assert data['input_string'] == address_string + assert 'AddressNumber' in data['address_components'] + assert data['address_components']['AddressNumber'] == '123' + assert data['address_type'] == 'Street Address' def test_api_parse_raises_error(client): # TODO: Finish this test. The address_string below will raise a # RepeatedLabelError, so ParseAddress.parse() will not be able to parse it. address_string = '123 main st chicago il 123 main st' - pytest.fail() + url = reverse('address-parse') + response = client.get(url, {'address': address_string}) + + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert 'Error parsing address' in response.json()['detail'] + + +# Additional test for missing address parameter +def test_api_parse_missing_address(client): + url = reverse('address-parse') + response = client.get(url) + + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert 'Address parameter is required' in response.json()['detail'] \ No newline at end of file From 78e2b7fb0efd6626bf91ef48af8668b8ef936e05 Mon Sep 17 00:00:00 2001 From: Jerry bony Date: Tue, 6 Aug 2024 07:10:25 -0500 Subject: [PATCH 2/2] move Javascript to index.js file --- parserator_web/static/js/index.js | 75 +++++++++++++++++ .../templates/parserator_web/index.html | 81 +------------------ 2 files changed, 78 insertions(+), 78 deletions(-) diff --git a/parserator_web/static/js/index.js b/parserator_web/static/js/index.js index 492674cc..3c299a75 100644 --- a/parserator_web/static/js/index.js +++ b/parserator_web/static/js/index.js @@ -1,2 +1,77 @@ /* TODO: Flesh this out to connect the form to the API and render results in the #address-results div. */ + document.addEventListener('DOMContentLoaded', function() { + const form = document.querySelector('form'); + const addressResults = document.getElementById('address-results'); + + form.addEventListener('submit', function(e) { + e.preventDefault(); + const address = document.getElementById('address').value; + + // Send request to API endpoint + fetch(`/api/parse/?address=${encodeURIComponent(address)}`) + .then(response => response.json()) + .then(data => { + // Clear previous results + addressResults.innerHTML = ''; + addressResults.style.display = 'block'; + + if (data.detail && data.detail.startsWith('Error parsing address:')) { + // Display parsing error + const errorMessage = data.detail.split('ORIGINAL STRING:')[0].trim(); + addressResults.innerHTML = ` +

Parsing Error

+

${errorMessage}

+

The address could not be parsed. This may be due to:

+
    +
  • Duplicate address components
  • +
  • Invalid address format
  • +
+

Please check the address and try again.

+ `; + } else if (data.address_components) { + // Display successful results + addressResults.innerHTML = ` +

Parsing results

+

Address type: ${data.address_type}

+

Input: ${data.input_string}

+ + + + + + + + + +
Address partTag
+ `; + + const resultsTable = addressResults.querySelector('tbody'); + + // Add new results + for (const [part, tag] of Object.entries(data.address_components)) { + const row = resultsTable.insertRow(); + const cellPart = row.insertCell(0); + const cellTag = row.insertCell(1); + cellPart.textContent = part; + cellTag.textContent = tag; + } + } else { + // Unexpected response format + addressResults.innerHTML = ` +

Parsing Error

+

An unexpected error occurred. Please try again.

+ `; + } + }) + .catch(error => { + console.error('Error:', error); + addressResults.style.display = 'block'; + addressResults.innerHTML = ` +

Parsing Error

+

An unexpected error occurred. Please try again.

+ `; + }); + }); + }); diff --git a/parserator_web/templates/parserator_web/index.html b/parserator_web/templates/parserator_web/index.html index 5e0c8b61..bd219cad 100644 --- a/parserator_web/templates/parserator_web/index.html +++ b/parserator_web/templates/parserator_web/index.html @@ -35,82 +35,7 @@

Parsing results

{% endblock %} -{% block extra_js %} - -{% endblock %} \ No newline at end of file +{% block extra_js %} + +{% endblock %}