Skip to content

Commit

Permalink
Implement new version of FastRun
Browse files Browse the repository at this point in the history
  • Loading branch information
LeMyst committed Dec 19, 2024
1 parent bc817a2 commit cf48aba
Show file tree
Hide file tree
Showing 32 changed files with 934 additions and 892 deletions.
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -856,7 +856,15 @@ for entrez_id, ensembl in raw_data.items():

Note: Fastrun mode checks for equality of property/value pairs, qualifiers (not including qualifier attributes), labels,
aliases and description, but it ignores references by default!
References can be checked in fast run mode by setting `use_refs` to `True`.
References can be checked in fast run mode by setting `use_references` to `True`.

# Statistics #

| Dataset | partial fastrun | fastrun without qualifiers/references | fastrun with qualifiers | fastrun with qualifiers/references |
|:----------------------------|----------------:|--------------------------------------:|------------------------:|-----------------------------------:|
| Communes (34990 elements) | ? | 7min | 30s | 60s |
| Cantons (2042 elements) | ? | ? | ? | ? |
| Départements (100 elements) | 70min | 1s | 30s | 60s |

# Debugging #

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,4 @@ disable = [

[tool.pytest.ini_options]
log_cli = true
log_cli_level = 'DEBUG'
56 changes: 6 additions & 50 deletions test/test_all.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import copy
import logging
import unittest

from wikibaseintegrator import WikibaseIntegrator, datatypes, wbi_fastrun
from wikibaseintegrator.datatypes import BaseDataType, Item
from wikibaseintegrator.entities import ItemEntity
from wikibaseintegrator.wbi_config import config as wbi_config
from wikibaseintegrator.wbi_enums import ActionIfExists, WikibaseDatatype
from wikibaseintegrator.wbi_fastrun import get_fastrun_container
from wikibaseintegrator.wbi_enums import WikibaseDatatype

wbi_config['USER_AGENT'] = 'WikibaseIntegrator-pytest/1.0 (test_all.py)'

Expand Down Expand Up @@ -53,66 +53,22 @@ class TestFastRun(unittest.TestCase):
"""
some basic tests for fastrun mode
"""
logging.basicConfig(level=logging.DEBUG)

def test_fastrun(self):
statements = [
datatypes.ExternalID(value='P40095', prop_nr='P352'),
datatypes.ExternalID(value='A0A023PZB3', prop_nr='P352'),
datatypes.ExternalID(value='YER158C', prop_nr='P705')
]

frc = wbi_fastrun.FastRunContainer(base_filter=[BaseDataType(prop_nr='P352'), datatypes.Item(prop_nr='P703', value='Q27510868')], base_data_type=datatypes.BaseDataType)

fastrun_result = frc.write_required(data=statements)

if fastrun_result:
message = 'fastrun failed'
else:
message = 'successful fastrun'
fastrun_result = frc.write_required(claims=statements)

# here, fastrun should succeed, if not, test failed
if fastrun_result:
if not fastrun_result:
raise ValueError

def test_fastrun_label(self):
# tests fastrun label, description and aliases, and label in another language
frc = get_fastrun_container(base_filter=[datatypes.ExternalID(value='/m/02j71', prop_nr='P646')])
item = WikibaseIntegrator().item.get('Q2')

assert item.labels.get(language='en') == "Earth"
descr = item.descriptions.get(language='en')
assert len(descr) > 3
assert "Planet Earth" in item.aliases.get()

assert list(frc.get_language_data("Q2", 'en', 'label'))[0] == item.labels.get(language='en')
assert frc.check_language_data("Q2", ['not the Earth'], 'en', 'label')
assert "Planet Earth" in item.aliases.get()
assert "planet" in item.descriptions.get()

assert item.labels.get('es') == "Tierra"

item.descriptions.set(value=descr)
item.descriptions.set(value="fghjkl")
assert item.get_json()['descriptions']['en'] == {'language': 'en', 'value': 'fghjkl'}
item.labels.set(value="Earth")
item.labels.set(value="xfgfdsg")
assert item.get_json()['labels']['en'] == {'language': 'en', 'value': 'xfgfdsg'}
item.aliases.set(values=["fake alias"], action_if_exists=ActionIfExists.APPEND_OR_REPLACE)
assert {'language': 'en', 'value': 'fake alias'} in item.get_json()['aliases']['en']

# something that's empty (for now.., can change, so this just makes sure no exception is thrown)
frc.check_language_data("Q2", ['Ewiase'], 'ak', 'label')
frc.check_language_data("Q2", ['not Ewiase'], 'ak', 'label')
frc.check_language_data("Q2", [''], 'ak', 'description')
frc.check_language_data("Q2", [], 'ak', 'aliases')
frc.check_language_data("Q2", ['sdf', 'sdd'], 'ak', 'aliases')

item.labels.get(language="ak")
item.descriptions.get(language='ak')
item.aliases.get(language="ak")
item.labels.set(value="label", language="ak")
item.descriptions.set(value="d", language="ak")
item.aliases.set(values=["a"], language="ak", action_if_exists=ActionIfExists.APPEND_OR_REPLACE)


def test_sitelinks():
item = wbi.item.get('Q622901')
Expand Down
10 changes: 5 additions & 5 deletions test/test_entity_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,18 +51,18 @@ def test_write(self):
def test_write_not_required(self):
assert not wbi.item.get('Q582').write_required(base_filter=[BaseDataType(prop_nr='P1791')])

def test_write_not_required_ref(self):
assert not wbi.item.get('Q582').write_required(base_filter=[BaseDataType(prop_nr='P1464')], use_references=True)

def test_write_required(self):
item = wbi.item.get('Q582')
item.claims.add(Item(prop_nr='P1791', value='Q42'))
assert item.write_required([BaseDataType(prop_nr='P1791')])

def test_write_not_required_ref(self):
assert not wbi.item.get('Q582').write_required(base_filter=[BaseDataType(prop_nr='P2581')], use_refs=True)

def test_write_required_ref(self):
item = wbi.item.get('Q582')
item.claims.get('P2581')[0].references.references.pop()
assert item.write_required(base_filter=[BaseDataType(prop_nr='P2581')], use_refs=True)
item.claims.get('P1464')[0].references.references.pop()
assert item.write_required(base_filter=[BaseDataType(prop_nr='P1464')], use_references=True)

def test_long_item_id(self):
assert wbi.item.get('Item:Q582').id == 'Q582'
Expand Down
2 changes: 1 addition & 1 deletion test/test_wbi_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ def test_new_item_creation(self):
MonolingualText(text='xxx', language='fr', prop_nr='P7'),
Quantity(amount=-5.04, prop_nr='P8'),
Quantity(amount=5.06, upper_bound=9.99, lower_bound=-2.22, unit='Q11573', prop_nr='P8'),
CommonsMedia(value='xxx', prop_nr='P9'),
CommonsMedia(value="Place lazare goujon.jpg", prop_nr='P9'),
GlobeCoordinate(latitude=1.2345, longitude=-1.2345, precision=12, prop_nr='P10'),
GeoShape(value='Data:xxx.map', prop_nr='P11'),
Property(value='P123', prop_nr='P12'),
Expand Down
211 changes: 0 additions & 211 deletions test/test_wbi_fastrun.py

This file was deleted.

15 changes: 13 additions & 2 deletions wikibaseintegrator/datatypes/basedatatype.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class BaseDataType(Claim):
The base class for all Wikibase data types, they inherit from it
"""
DTYPE = 'base-data-type'
PTYPE = 'property-data-type'
subclasses: list[type[BaseDataType]] = []
sparql_query: str = '''
SELECT * WHERE {{
Expand All @@ -28,7 +29,14 @@ def __init__(self, prop_nr: int | str | None = None, **kwargs: Any):

super().__init__(**kwargs)

self.mainsnak.property_number = prop_nr or None
if isinstance(prop_nr, str):
pattern = re.compile(r'^([a-z][a-z\d+.-]*):([^][<>\"\x00-\x20\x7F])+$')
matches = pattern.match(str(prop_nr))

if matches:
prop_nr = prop_nr.rsplit('/', 1)[-1]

self.mainsnak.property_number = prop_nr
# self.subclasses.append(self)

# Allow registration of subclasses of BaseDataType into BaseDataType.subclasses
Expand All @@ -39,7 +47,7 @@ def __init_subclass__(cls, **kwargs):
def set_value(self, value: Any | None = None):
pass

def get_sparql_value(self) -> str:
def get_sparql_value(self, **kwargs: Any) -> str | None:
return '"' + self.mainsnak.datavalue['value'] + '"'

def parse_sparql_value(self, value, type='literal', unit='1') -> bool:
Expand All @@ -61,3 +69,6 @@ def parse_sparql_value(self, value, type='literal', unit='1') -> bool:
raise ValueError

return True

def from_sparql_value(self, sparql_value: dict) -> BaseDataType: # type: ignore
pass
Loading

0 comments on commit cf48aba

Please sign in to comment.