Skip to content
This repository has been archived by the owner on Apr 4, 2023. It is now read-only.

Commit

Permalink
Rework dependencies in the versions command.
Browse files Browse the repository at this point in the history
We can just look for the presence of a CFRVersion.
  • Loading branch information
cmc333333 committed Mar 8, 2017
1 parent 4a85425 commit 9471aab
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 108 deletions.
72 changes: 32 additions & 40 deletions regparser/commands/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,19 @@

import click

from regparser.commands.preprocess_notice import preprocess_notice
from regparser.federalregister import fetch_notice_json
from regparser.index import dependency, entry
from regparser.index import entry
from regparser.notice.xml import NoticeXML
from regparser.web.index.models import CFRVersion, SourceCollection, SourceFile

logger = logging.getLogger(__name__)


def fetch_version_ids(cfr_title, cfr_part, notice_dir):
def fetch_version_ids(cfr_title, cfr_part):
"""Returns a list of version ids after looking them up between the federal
register and the local filesystem"""
present_ids = [v.path[-1] for v in notice_dir.sub_entries()]
present_ids = [v.path[-1] for v in entry.Notice().sub_entries()]
final_rules = fetch_notice_json(cfr_title, cfr_part, only_final=True)

version_ids = []
Expand Down Expand Up @@ -51,75 +52,66 @@ def delays(source_files):
return delay_map


def generate_dependencies(version_dir, version_ids, delays_by_version):
"""Creates a dependency graph and adds all dependencies for input xml and
delays between notices"""
notice_dir = entry.Notice()
deps = dependency.Graph()
for version_id in version_ids:
deps.add(version_dir / version_id, notice_dir / version_id)
for delayed, delay in delays_by_version.items():
deps.add(version_dir / delayed, notice_dir / delay.by)
return deps


def write_to_disk(cfr_title, cfr_part, sources, version_id, delay=None):
def create_version(cfr_title, cfr_part, sources, version_id, delay=None):
"""Serialize a Version instance to disk"""
notice_xml = NoticeXML(sources[version_id].xml())
effective = notice_xml.effective if delay is None else delay.until
delaying_source = None if delay is None else sources[delay.by]
if effective:
entry.Version(cfr_title, cfr_part, notice_xml.version_id).write(b'')
CFRVersion.objects.filter(
identifier=notice_xml.version_id, cfr_title=cfr_title,
cfr_part=cfr_part).delete()
entry.Version(cfr_title, cfr_part, version_id).write(b'')
CFRVersion.objects.create(
identifier=notice_xml.version_id, source=sources[version_id],
identifier=version_id, source=sources[version_id],
delaying_source=delaying_source, effective=effective,
fr_volume=notice_xml.fr_citation.volume,
fr_page=notice_xml.fr_citation.page, cfr_title=cfr_title,
cfr_part=cfr_part
)
else:
logger.warning("No effective date for this rule: %s. Skipping",
notice_xml.version_id)
version_id)


def write_if_needed(cfr_title, cfr_part, source_files, delays_by_version):
def create_if_needed(cfr_title, cfr_part, source_files, delays_by_version):
"""All versions which are stale (either because they were never create or
because their dependency has been updated) are written to disk. If any
dependency is missing, an exception is raised"""
source_by_id = {sf.file_name: sf for sf in source_files}
version_dir = entry.Version(cfr_title, cfr_part)
deps = generate_dependencies(version_dir, source_by_id.keys(),
delays_by_version)
for version_id in source_by_id.keys():
version_entry = version_dir / version_id
deps.validate_for(version_entry)
if deps.is_stale(version_entry):
write_to_disk(cfr_title, cfr_part, source_by_id, version_id,
delays_by_version.get(version_id))
exists = CFRVersion.objects.filter(
identifier=version_id, cfr_title=cfr_title, cfr_part=cfr_part
).exists()
if not exists:
create_version(cfr_title, cfr_part, source_by_id, version_id,
delays_by_version.get(version_id))


def generate_source(version_id, ctx):
"""If the source file associated with this version doesn't exist yet,
create it by calling preprocess_notice."""
exists = SourceFile.objects.filter(
collection=SourceCollection.notice.name, file_name=version_id
).exists()
if not exists:
ctx.invoke(preprocess_notice, document_number=version_id)
return SourceFile.objects.get(
collection=SourceCollection.notice.name, file_name=version_id)


@click.command()
@click.argument('cfr_title', type=int)
@click.argument('cfr_part', type=int)
def versions(cfr_title, cfr_part):
@click.pass_context
def versions(ctx, cfr_title, cfr_part):
"""Find all Versions for a regulation. Accounts for locally modified
notice XML and rules modifying the effective date of versions of a
regulation"""
cfr_title, cfr_part = str(cfr_title), str(cfr_part)
notice_dir = entry.Notice()

logger.info("Finding versions")
version_ids = fetch_version_ids(cfr_title, cfr_part, notice_dir)
version_ids = fetch_version_ids(cfr_title, cfr_part)
logger.debug("Versions found: %r", version_ids)

source_files = [
SourceFile.objects.get(
collection=SourceCollection.notice.name, file_name=version_id)
for version_id in version_ids
]
source_files = [generate_source(v, ctx) for v in version_ids]
# notices keyed by version_id
delays_by_version = delays(source_files)
write_if_needed(cfr_title, cfr_part, source_files, delays_by_version)
create_if_needed(cfr_title, cfr_part, source_files, delays_by_version)
117 changes: 49 additions & 68 deletions tests/commands/versions_tests.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
from datetime import date, timedelta
from datetime import date

import pytest
from django.utils import timezone
from mock import Mock
from mock import Mock, call

from regparser.commands import versions
from regparser.index import dependency, entry
from regparser.index import entry
from regparser.notice.xml import NoticeXML
from regparser.web.index.models import Entry as DBEntry
from regparser.web.index.models import CFRVersion, SourceFile
from regparser.web.index.models import CFRVersion, SourceCollection, SourceFile


@pytest.mark.django_db
Expand All @@ -19,8 +17,7 @@ def test_fetch_version_ids_no_local(monkeypatch):
{'document_number': '1', 'full_text_xml_url': 'somewhere'},
{'document_number': '22', 'full_text_xml_url': 'somewhere'}
]))
path = entry.Entry("path")
assert ['1', '22'] == versions.fetch_version_ids('title', 'part', path)
assert ['1', '22'] == versions.fetch_version_ids('title', 'part')


@pytest.mark.django_db
Expand All @@ -31,12 +28,12 @@ def test_fetch_version_ids_local(monkeypatch):
{'document_number': '1', 'full_text_xml_url': 'somewhere'},
{'document_number': '22', 'full_text_xml_url': 'somewhere'}
]))
path = entry.Entry("path")
path = entry.Entry("notice_xml")
(path / '1_20010101').write(b'v1')
(path / '1_20020202').write(b'v2')
(path / '22').write(b'second')
(path / '22-3344').write(b'unrelated file')
assert versions.fetch_version_ids('title', 'part', path) == [
assert versions.fetch_version_ids('title', 'part') == [
'1_20010101', '1_20020202', '22']


Expand All @@ -48,10 +45,10 @@ def test_fetch_version_ids_skip_no_xml(monkeypatch):
{'document_number': '2', 'full_text_xml_url': None},
{'document_number': '3', 'full_text_xml_url': 'somewhere'}
]))
path = entry.Entry("path")
assert ['1', '3'] == versions.fetch_version_ids('title', 'part', path)
assert ['1', '3'] == versions.fetch_version_ids('title', 'part')


@pytest.mark.django_db
def test_delays():
"""For NoticeXMLs which cause delays to other NoticeXMLs, we'd like to get
a dictionary of delayed -> Delay(delayer, delayed_until)"""
Expand Down Expand Up @@ -133,100 +130,84 @@ def test_delays_order():


@pytest.mark.django_db
def test_write_to_disk():
def test_create_version():
"""If a version has been delayed, its effective date should be part of the
serialized json"""
notice_xml1 = NoticeXML(b'<ROOT/>')
notice_xml1.version_id = '111'
notice_xml1.version_id = 'aaa'
notice_xml1.effective = date(2002, 2, 2)
notice_xml1.fr_volume = 1
notice_xml1.start_page = 1
notice_xml1.save()

notice_xml2 = NoticeXML(b'<ROOT/>')
notice_xml2.version_id = '222'
notice_xml2.version_id = 'bbb'
notice_xml2.save()

sources = {'111': SourceFile.objects.get(file_name='111'),
'222': SourceFile.objects.get(file_name='222')}
sources = {'aaa': SourceFile.objects.get(file_name='aaa'),
'bbb': SourceFile.objects.get(file_name='bbb')}

versions.write_to_disk('12', '1000', sources, '111')
versions.create_version('12', '1000', sources, 'aaa')
saved = CFRVersion.objects.get()
assert saved.effective == date(2002, 2, 2)
assert saved.source == sources['111']
assert saved.source == sources['aaa']
assert saved.delaying_source is None
CFRVersion.objects.all().delete()

versions.write_to_disk('12', '1000', sources, '111',
versions.Delay(by='222', until=date(2004, 4, 4)))
versions.create_version('12', '1000', sources, 'aaa',
versions.Delay(by='bbb', until=date(2004, 4, 4)))
saved = CFRVersion.objects.get()
assert saved.effective == date(2004, 4, 4)
assert saved.source == sources['111']
assert saved.delaying_source == sources['222']
assert saved.source == sources['aaa']
assert saved.delaying_source == sources['bbb']


@pytest.mark.django_db
def test_write_if_needed_raises_exception(monkeypatch):
"""If an input file is missing, this raises an exception"""
with pytest.raises(dependency.Missing):
versions.write_if_needed(
'title', 'part', [SourceFile(file_name='111')], {})
def test_generate_source_calls_preprocessor(monkeypatch):
"""If a SourceFile is missing, we should call the preprocess function"""
ctx = Mock()

def create_source(*args, **kwargs):
SourceFile.objects.create(
collection=SourceCollection.notice.name, file_name='aaa')

@pytest.mark.django_db
def test_write_if_needed_output_missing(monkeypatch):
"""If the output file is missing, we'll always write"""
monkeypatch.setattr(versions, 'write_to_disk', Mock())
entry.Entry('notice_xml', '111').write(b'content')
versions.write_if_needed(
'title', 'part', [SourceFile(file_name='111')], {})
assert versions.write_to_disk.called
ctx.invoke.side_effect = create_source
versions.generate_source('aaa', ctx)
assert ctx.invoke.call_args == call(versions.preprocess_notice,
document_number='aaa')


@pytest.mark.django_db
def test_write_if_needed_no_need_to_recompute(monkeypatch):
"""If all dependencies are up to date and the output is present, there's
no need to write anything"""
monkeypatch.setattr(versions, 'write_to_disk', Mock())
entry.Entry('notice_xml', '111').write(b'content')
entry.Entry('version', 'title', 'part', '111').write(b'out')
versions.write_if_needed(
'title', 'part', [SourceFile(file_name='111')], {})
assert not versions.write_to_disk.called
def test_create_if_needed_output_missing(monkeypatch):
"""If the output file is missing, we'll always write"""
monkeypatch.setattr(versions, 'create_version', Mock())
entry.Entry('notice_xml', 'aaa').write(b'content')
versions.create_if_needed(111, 22, [SourceFile(file_name='aaa')], {})
assert versions.create_version.called


@pytest.mark.django_db
def test_write_if_needed_delays(monkeypatch):
"""Delays introduce dependencies."""
monkeypatch.setattr(versions, 'write_to_disk', Mock())
entry.Entry('notice_xml', '111').write(b'content')
entry.Entry('notice_xml', '222').write(b'content')
entry.Entry('version', 'title', 'part', '111').write(b'out')
entry.Entry('version', 'title', 'part', '222').write(b'out')
sources = [SourceFile(file_name='111'), SourceFile(file_name='222')]
delays = {'111': versions.Delay('222', 'until-date')}
versions.write_if_needed('title', 'part', sources, delays)
assert not versions.write_to_disk.called

# Simulate a change to an input file
label_id = str(entry.Notice('222'))
new_time = timezone.now() + timedelta(hours=1)
DBEntry.objects.filter(label_id=label_id).update(modified=new_time)
versions.write_if_needed('title', 'part', sources, delays)
assert versions.write_to_disk.called
def test_generate_source_no_need_to_recompute(monkeypatch):
"""If the SourceFile is present, there's no need to call precompute"""
sf = SourceFile.objects.create(
collection=SourceCollection.notice.name, file_name='aaa')
ctx = Mock()
assert sf == versions.generate_source('aaa', ctx)
assert not ctx.invoke.called


@pytest.mark.django_db
def test_write_to_disk_no_effective(monkeypatch):
def test_create_version_no_effective(monkeypatch):
"""If a version is somehow associated with a proposed rule (or a final
rule has been misparsed), we should get a warning"""
notice_xml = NoticeXML(b'<ROOT><DATES/></ROOT>')
notice_xml.version_id = '111'
notice_xml.version_id = 'aaa'
notice_xml.save()

sources = {'111': SourceFile.objects.get(file_name='111')}
sources = {'aaa': SourceFile.objects.get(file_name='aaa')}
monkeypatch.setattr(versions, 'logger', Mock())

versions.write_to_disk('not', 'used', sources, '111')
versions.create_version('not', 'used', sources, 'aaa')

assert versions.logger.warning.called
assert '111' in versions.logger.warning.call_args[0]
assert 'aaa' in versions.logger.warning.call_args[0]

0 comments on commit 9471aab

Please sign in to comment.