Skip to content
This repository has been archived by the owner on Apr 4, 2023. It is now read-only.

Commit

Permalink
Replace Version with CFRVersion (backed by the db).
Browse files Browse the repository at this point in the history
This is step two to moving dependency management into the database -- we now
have SourceFiles in use and CFRVersions being saved/retrieved. We need to
move all of the data from index.entrys into web.index.models, then we can
rework the dependency logic to rely solely on foreign keys.
  • Loading branch information
CM Lubinski authored and cmc333333 committed Feb 27, 2017
1 parent 56fb2e0 commit 86c21e5
Show file tree
Hide file tree
Showing 29 changed files with 455 additions and 342 deletions.
11 changes: 5 additions & 6 deletions regparser/commands/annual_editions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from regparser.history import annual
from regparser.index import dependency, entry
from regparser.tree import gpo_cfr
from regparser.web.index.models import SourceCollection, SourceFile
from regparser.web.index.models import CFRVersion, SourceCollection, SourceFile

LastVersionInYear = namedtuple('LastVersionInYear', ['version_id', 'year'])
logger = logging.getLogger(__name__)
Expand All @@ -16,11 +16,11 @@ def last_versions(cfr_title, cfr_part):
"""Run through all known versions of this regulation and pull out versions
which are the last to be included before an annual edition"""
have_annual_edition = {}
path = entry.FinalVersion(cfr_title, cfr_part)
if not any(path.sub_entries()):
query = CFRVersion.objects.filter(cfr_title=cfr_title, cfr_part=cfr_part,
effective__isnull=False)
if not query.exists():
raise click.UsageError("No versions found. Run `versions`?")
for subpath in path.sub_entries():
version = subpath.read()
for version in sorted(query):
pub_date = annual.date_of_annual_after(cfr_title, version.effective)
have_annual_edition[pub_date.year] = version.identifier
for year in sorted(have_annual_edition):
Expand Down Expand Up @@ -49,7 +49,6 @@ def process_if_needed(cfr_title, cfr_part, last_version_list):
tree_entry = tree_path / last_version.version_id
deps.validate_for(tree_entry)
if deps.is_stale(tree_entry):
annual = SourceCollection.annual
annual_xml = SourceFile.objects.filter(
collection=SourceCollection.annual.name,
file_name=SourceCollection.annual.format(
Expand Down
38 changes: 21 additions & 17 deletions regparser/commands/annual_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,10 @@
import click

from regparser.history.annual import find_volume
from regparser.history.versions import Version
from regparser.index import dependency, entry
from regparser.notice.citation import Citation
from regparser.notice.fake import build as build_fake_notice
from regparser.tree.gpo_cfr import builder
from regparser.web.index.models import SourceCollection, SourceFile
from regparser.web.index.models import CFRVersion, SourceCollection, SourceFile

_version_id = '{0}-annual-{1}'.format
logger = logging.getLogger(__name__)
Expand All @@ -26,29 +24,35 @@ def process_if_needed(volume, cfr_part):
deps = dependency.Graph()
deps.add(tree_entry, annual_entry)
deps.validate_for(tree_entry)
source = SourceFile.objects.filter(
collection=SourceCollection.annual.name,
file_name=SourceCollection.annual.format(volume.title, cfr_part,
volume.year)
).get()
if deps.is_stale(tree_entry):
xml = SourceFile.objects.filter(
collection=SourceCollection.annual.name,
file_name=SourceCollection.annual.format(
volume.title, cfr_part, volume.year)
).get().xml()
tree = builder.build_tree(xml)
tree = builder.build_tree(source.xml())
tree_entry.write(tree)
notice_entry.write(b'')
build_fake_notice(version_id, volume.publication_date, volume.title,
cfr_part).save()
return source


def create_version_entry_if_needed(volume, cfr_part):
def create_version_entry_if_needed(volume, cfr_part, source):
"""Only write the version entry if it doesn't already exist. If we
overwrote one, we'd be invalidating all related trees, etc."""
version_id = _version_id(volume.year, cfr_part)
version_dir = entry.FinalVersion(volume.title, cfr_part)
query = CFRVersion.objects.filter(
cfr_title=volume.title, cfr_part=cfr_part, effective__isnull=False)
known_versions = [v.identifier for v in query]

if version_id not in [c.path[-1] for c in version_dir.sub_entries()]:
(version_dir / version_id).write(
Version(version_id, effective=volume.publication_date,
fr_citation=Citation(volume.vol_num, 1)))
if version_id not in known_versions:
entry.Version(volume.title, cfr_part, version_id).write(b'')
CFRVersion.objects.create(
identifier=version_id, source=source,
effective=volume.publication_date, fr_volume=1, fr_page=1,
cfr_title=volume.title, cfr_part=cfr_part
)


@click.command()
Expand Down Expand Up @@ -76,5 +80,5 @@ def annual_version(cfr_title, cfr_part, year):
logger.info("Getting annual version - %s CFR %s, Year: %s",
cfr_title, cfr_part, cfr_year)

create_version_entry_if_needed(vol, cfr_part)
process_if_needed(vol, cfr_part)
source = process_if_needed(vol, cfr_part)
create_version_entry_if_needed(vol, cfr_part, source)
3 changes: 2 additions & 1 deletion regparser/commands/clear.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from django.conf import settings

from regparser.index.http_cache import http_client
from regparser.web.index.models import DependencyNode, SourceFile
from regparser.web.index.models import DependencyNode, CFRVersion, SourceFile


@click.command()
Expand All @@ -28,5 +28,6 @@ def clear(path):
else:
DependencyNode.objects.all().delete()
SourceFile.objects.all().delete()
CFRVersion.objects.all().delete()

http_client().cache.clear()
8 changes: 5 additions & 3 deletions regparser/commands/fill_with_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@

import click

from regparser.history.versions import Version
from regparser.index import dependency, entry
from regparser.notice.compiler import compile_regulation
from regparser.notice.xml import NoticeXML
from regparser.web.index.models import CFRVersion

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -76,9 +76,11 @@ def fill_with_rules(cfr_title, cfr_part):
logger.info("Fill with rules - %s CFR %s", cfr_title, cfr_part)
tree_dir = entry.Tree(cfr_title, cfr_part)
version_dir = entry.Version(cfr_title, cfr_part)
query = CFRVersion.objects.filter(cfr_title=cfr_title, cfr_part=cfr_part)

versions = [c.read() for c in version_dir.sub_entries()]
versions_with_parents = list(zip(versions, Version.parents_of(versions)))
versions = [v for v in sorted(query)]
versions_with_parents = list(zip(versions,
CFRVersion.parents_of(versions)))
deps = dependencies(tree_dir, version_dir, versions_with_parents)

derived = [(version.identifier, parent.identifier)
Expand Down
17 changes: 12 additions & 5 deletions regparser/commands/full_issuance.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

import click

from regparser.history.versions import Version
from regparser.index import dependency, entry
from regparser.notice.xml import NoticeXML
from regparser.tree.gpo_cfr.builder import build_tree
from regparser.web.index.models import CFRVersion, SourceCollection, SourceFile

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -34,10 +34,17 @@ def process_version_if_needed(cfr_title, cfr_part, version_id):
deps.validate_for(version_entry)

if deps.is_stale(version_entry):
notice_xml = NoticeXML.from_db(version_id)
version = Version(version_id, notice_xml.effective,
notice_xml.fr_citation)
version_entry.write(version)
source = SourceFile.objects.get(
collection=SourceCollection.notice.name, file_name=version_id
)
notice_xml = NoticeXML(source.xml())
version_entry.write(b'')
CFRVersion.objects.create(
identifier=version_id, source=source, cfr_title=cfr_title,
cfr_part=cfr_part, effective=notice_xml.effective,
fr_volume=notice_xml.fr_citation.volume,
fr_page=notice_xml.fr_citation.page
)


def process_tree_if_needed(cfr_title, cfr_part, version_id):
Expand Down
13 changes: 7 additions & 6 deletions regparser/commands/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from regparser.commands import utils
from regparser.index import dependency, entry
from regparser.web.index.models import CFRVersion

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -53,12 +54,13 @@ def stale_layers(doc_entry, doc_type):
return stale


def process_cfr_layers(stale_names, cfr_title, version_entry):
def process_cfr_layers(stale_names, cfr_title, cfr_part, version_id):
"""Build all of the stale layers for this version, writing them into the
index. Assumes all dependencies have already been checked"""
tree = entry.Tree(*version_entry.path).read()
version = version_entry.read()
layer_dir = entry.Layer.cfr(*version_entry.path)
tree = entry.Tree(cfr_title, cfr_part, version_id).read()
version = CFRVersion.objects.get(
identifier=version_id, cfr_title=cfr_title, cfr_part=cfr_part)
layer_dir = entry.Layer.cfr(cfr_title, cfr_part, version_id)
for layer_name in stale_names:
layer_json = LAYER_CLASSES['cfr'][layer_name](
tree, cfr_title=int(cfr_title), version=version).build()
Expand All @@ -85,10 +87,9 @@ def layers(cfr_title, cfr_part):

for tree_entry in utils.relevant_paths(entry.Tree(), cfr_title, cfr_part):
tree_title, tree_part, version_id = tree_entry.path
version_entry = entry.Version(tree_title, tree_part, version_id)
stale = stale_layers(tree_entry, 'cfr')
if stale:
process_cfr_layers(stale, tree_title, version_entry)
process_cfr_layers(stale, tree_title, tree_part, version_id)

if cfr_title is None and cfr_part is None:
for preamble_entry in entry.Preamble().sub_entries():
Expand Down
20 changes: 13 additions & 7 deletions regparser/commands/proposal_versions.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import click

from regparser.history.versions import Version
from regparser.index import dependency, entry
from regparser.notice.xml import NoticeXML
from regparser.web.index.models import CFRVersion, SourceCollection, SourceFile


@click.command()
Expand All @@ -13,11 +13,17 @@ def proposal_versions(doc_number):
if not notice.exists():
raise dependency.Missing(str(notice), str(notice))

notice = NoticeXML.from_db(doc_number)
version = Version(doc_number, effective=None,
fr_citation=notice.fr_citation)
source = SourceFile.objects.get(
collection=SourceCollection.notice.name, file_name=doc_number)
notice = NoticeXML(source.xml())

for cfr_title, cfr_part in notice.cfr_ref_pairs:
version_entry = entry.Version(cfr_title, cfr_part, doc_number)
if not version_entry.exists() or version_entry.read() != version:
version_entry.write(version)
entry.Version(cfr_title, cfr_part, doc_number).write(b'')
CFRVersion.objects.filter(
identifier=doc_number, cfr_title=cfr_title, cfr_part=cfr_part
).delete()
CFRVersion.objects.create(
identifier=doc_number, source=source, cfr_title=cfr_title,
cfr_part=cfr_part, fr_volume=notice.fr_citation.volume,
fr_page=notice.fr_citation.page
)
6 changes: 4 additions & 2 deletions regparser/commands/sxs_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,17 @@
from regparser.layer.section_by_section import SectionBySection
from regparser.notice.build import build_notice
from regparser.notice.xml import NoticeXML
from regparser.web.index.models import CFRVersion

logger = logging.getLogger(__name__)


def previous_sxs(cfr_title, cfr_part, stop_version):
"""The SxS layer relies on all notices that came before a particular
version"""
sub_entries = entry.FinalVersion(cfr_title, cfr_part).sub_entries()
version_ids = [e.path[-1] for e in sub_entries]
query = CFRVersion.objects.filter(
cfr_title=cfr_title, cfr_part=cfr_part, effective__isnull=False)
version_ids = [v.identifier for v in sorted(query)]
for previous_version in version_ids:
yield entry.Notice(previous_version)
if previous_version == stop_version:
Expand Down
54 changes: 36 additions & 18 deletions regparser/commands/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
import click

from regparser.federalregister import fetch_notice_json
from regparser.history.versions import Version
from regparser.index import dependency, entry
from regparser.notice.xml import NoticeXML
from regparser.web.index.models import CFRVersion, SourceCollection, SourceFile

logger = logging.getLogger(__name__)

Expand All @@ -32,14 +33,15 @@ def fetch_version_ids(cfr_title, cfr_part, notice_dir):
Delay = namedtuple('Delay', ['by', 'until'])


def delays(xmls):
def delays(source_files):
"""Find all changes to effective dates. Return the latest change to each
version of the regulation"""
notice_xmls = [NoticeXML(sf.xml()) for sf in source_files]
delay_map = {}
# Sort so that later modifications override earlier ones
for delayer in sorted(xmls, key=attrgetter('published')):
for delayer in sorted(notice_xmls, key=attrgetter('fr_citation')):
for delay in delayer.delays():
for delayed in filter(delay.modifies_notice_xml, xmls):
for delayed in filter(delay.modifies_notice_xml, notice_xmls):
delay_map[delayed.version_id] = Delay(delayer.version_id,
delay.delayed_until)
return delay_map
Expand All @@ -57,28 +59,41 @@ def generate_dependencies(version_dir, version_ids, delays_by_version):
return deps


def write_to_disk(xml, version_entry, delay=None):
def write_to_disk(cfr_title, cfr_part, sources, version_id, delay=None):
"""Serialize a Version instance to disk"""
effective = xml.effective if delay is None else delay.until
notice_xml = NoticeXML(sources[version_id].xml())
effective = notice_xml.effective if delay is None else delay.until
delaying_source = None if delay is None else sources[delay.by]
if effective:
version = Version(xml.version_id, effective, xml.fr_citation)
version_entry.write(version)
entry.Version(cfr_title, cfr_part, notice_xml.version_id).write(b'')
CFRVersion.objects.filter(
identifier=notice_xml.version_id, cfr_title=cfr_title,
cfr_part=cfr_part).delete()
CFRVersion.objects.create(
identifier=notice_xml.version_id, source=sources[version_id],
delaying_source=delaying_source, effective=effective,
fr_volume=notice_xml.fr_citation.volume,
fr_page=notice_xml.fr_citation.page, cfr_title=cfr_title,
cfr_part=cfr_part
)
else:
logger.warning("No effective date for this rule: %s. Skipping",
xml.version_id)
notice_xml.version_id)


def write_if_needed(cfr_title, cfr_part, version_ids, xmls, delays_by_version):
def write_if_needed(cfr_title, cfr_part, source_files, delays_by_version):
"""All versions which are stale (either because they were never create or
because their dependency has been updated) are written to disk. If any
dependency is missing, an exception is raised"""
version_dir = entry.FinalVersion(cfr_title, cfr_part)
deps = generate_dependencies(version_dir, version_ids, delays_by_version)
for version_id in version_ids:
source_by_id = {sf.file_name: sf for sf in source_files}
version_dir = entry.Version(cfr_title, cfr_part)
deps = generate_dependencies(version_dir, source_by_id.keys(),
delays_by_version)
for version_id in source_by_id.keys():
version_entry = version_dir / version_id
deps.validate_for(version_entry)
if deps.is_stale(version_entry):
write_to_disk(xmls[version_id], version_entry,
write_to_disk(cfr_title, cfr_part, source_by_id, version_id,
delays_by_version.get(version_id))


Expand All @@ -96,8 +111,11 @@ def versions(cfr_title, cfr_part):
version_ids = fetch_version_ids(cfr_title, cfr_part, notice_dir)
logger.debug("Versions found: %r", version_ids)

version_entries = [notice_dir / version_id for version_id in version_ids]
source_files = [
SourceFile.objects.get(
collection=SourceCollection.notice.name, file_name=version_id)
for version_id in version_ids
]
# notices keyed by version_id
xmls = {e.path[-1]: e.read() for e in version_entries if e.exists()}
delays_by_version = delays(xmls.values())
write_if_needed(cfr_title, cfr_part, version_ids, xmls, delays_by_version)
delays_by_version = delays(source_files)
write_if_needed(cfr_title, cfr_part, source_files, delays_by_version)
9 changes: 5 additions & 4 deletions regparser/commands/write_to.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

from regparser.api_writer import Client
from regparser.commands import utils
from regparser.history.versions import Version
from regparser.index import entry
from regparser.notice.build import add_footnotes, process_sxs
from regparser.notice.xml import NoticeXML
from regparser.web.index.models import CFRVersion

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -46,9 +46,10 @@ def transform_notice(notice_xml):
as_dict = notice_xml.as_dict()
as_dict['versions'] = {}
for cfr_title, cfr_part in notice_xml.cfr_ref_pairs:
version_dir = entry.Version(cfr_title, cfr_part)
versions = [v.read() for v in version_dir.sub_entries()]
with_parents = zip(versions, Version.parents_of(versions))
query = CFRVersion.objects.filter(cfr_title=cfr_title,
cfr_part=cfr_part)
versions = list(sorted(query))
with_parents = zip(versions, CFRVersion.parents_of(versions))
for version, parent in with_parents:
if version.identifier == notice_xml.version_id and parent:
as_dict['versions'][cfr_part] = {"left": parent.identifier,
Expand Down
Loading

0 comments on commit 86c21e5

Please sign in to comment.