Skip to content

Commit

Permalink
Merge branch 'valevo:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
ddeboer authored Apr 2, 2024
2 parents fc251d8 + 7d4d8c7 commit 2b6075b
Show file tree
Hide file tree
Showing 14 changed files with 67 additions and 18 deletions.
Binary file removed docker/cache/ContentLengthEnginev0_NMvW_v0.csv.gz
Binary file not shown.
Binary file modified docker/cache/ContentLengthEnginev0_OpenBeelden_v0.csv.gz
Binary file not shown.
Binary file removed docker/cache/PMIEnginev0_NMvW_v0.csv.gz
Binary file not shown.
Binary file removed docker/cache/RandomEnginev0_NMvW_v0.csv.gz
Binary file not shown.
Binary file modified docker/cache/RandomEnginev0_OpenBeelden_v0.csv.gz
Binary file not shown.
Binary file removed docker/cache/TypicalityEnginev0_NMvW_v0.csv.gz
Binary file not shown.
Binary file removed docker/cache/VocabularyEnginev0_NMvW_v0.csv.gz
Binary file not shown.
27 changes: 27 additions & 0 deletions docker/data/NMVW.v1_0.META.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"id": "NMVW_v1",
"name": "Nationaal Museum van Wereldculturen",
"dataset_url": "https://collectie.wereldculturen.nl/",
"text_columns": ["Provenance", "Notes", "CuratorialRemarks"],
"dataset_params":
{
"ObjectName": {
"label": "Object Name", "description": "Name or type of object, e.g. 'foto' or 'munt'", "control": "autocomplete", "options": []
},
"Culture": {
"label": "Culture", "description": "Broad cultural classification", "control": "autocomplete", "options": []
},
"Function and Context": {
"label": "Function and Context", "description": "Broad category of function and/or context in the source culture", "control": "autocomplete", "options": []
},
"Material": {
"label": "Material", "description": "Broad category of main physical material used", "control": "autocomplete", "options": []
},
"Geography": {
"label": "Geography", "description": "Geographical origin in terms of continent", "control": "autocomplete", "options": []
},
"Geography: Subcontinents": {
"label": "Geography: Subcontinents", "description": "More fine-grained Geographical origin in terms of subcontinents", "control": "autocomplete", "options": []
}
}
}
Binary file added docker/data/NMVW.v1_0.csv.gz
Binary file not shown.
Binary file added docker/data/NMVW.v1_0.image_URLs.csv.gz
Binary file not shown.
7 changes: 3 additions & 4 deletions docker/src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
Compress(app)

# !!! comment out for production !!!
# app.config["DEBUG"] = True
app.config["DEBUG"] = True

from datasets import NMvW, OpenBeelden
print("datasets loaded", flush=True)
Expand Down Expand Up @@ -292,9 +292,8 @@ def get_examples():

return jsonify({"examples": examples})


# if __name__ == "__main__":
# app.run(debug=True)
if __name__ == "__main__":
app.run(debug=True)



Expand Down
38 changes: 30 additions & 8 deletions docker/src/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,26 +275,48 @@ def get_thumb(self, object_ids):

def get(self, object_ids, column):
r = self.df.loc[object_ids]
return r["img_URL"]
return r[column]


# NMvW
# # NMvW

# ## Load DataFrame
# df = pd.read_csv("../data/NMvW.v0_4.csv.gz",
# dtype=dict(Provenance="string", RelatedWorks="string"))
# ## TODO: save & load DF s.t. these lines are not necessary here
# # df["ID"] = df.ID.astype("int")
# df = df.set_index("ID")
# df["name"] = df["name"].fillna("")
# df["start_date"] = df.start_date.apply(lambda s: dt.strptime(s, Dataset.parse_date).date())
# df["end_date"] = df.end_date.apply(lambda s: dt.strptime(s, Dataset.parse_date).date())
# ## get Image Source
# images_NMvW = ImageSource("../data/NMvW.image_URLs.csv.gz")
# ## Instantiate Dataset object
# NMvW = Dataset.with_dataset_meta(
# df, "../data/NMvW.META.json", images_NMvW, available_engines=[])



# NMvW - V1

## Load DataFrame
df = pd.read_csv("../data/NMvW.v0_4.csv.gz",
dtype=dict(Provenance="string", RelatedWorks="string"))
df = pd.read_csv("../data/NMVW.v1_0.csv.gz",
dtype=dict(Provenance="string",
Notes="string",
CuratorialRemarks="string"))
## TODO: save & load DF s.t. these lines are not necessary here
# df["ID"] = df.ID.astype("int")
df = df.set_index("ID")
df["name"] = df["name"].fillna("")
# df["name"] = df["name"].fillna("")
df = df.fillna("")
df["start_date"] = df.start_date.apply(lambda s: dt.strptime(s, Dataset.parse_date).date())
df["end_date"] = df.end_date.apply(lambda s: dt.strptime(s, Dataset.parse_date).date())
## get Image Source
images_NMvW = ImageSource("../data/NMvW.image_URLs.csv.gz")
images_NMvW = ImageSource("../data/NMVW.v1_0.image_URLs.csv.gz")
## Instantiate Dataset object
NMvW = Dataset.with_dataset_meta(
df, "../data/NMvW.META.json", images_NMvW, available_engines=[])

df, "../data/NMVW.v1_0.META.json",
images_NMvW, available_engines=[])

# OpenBeelden

Expand Down
2 changes: 1 addition & 1 deletion docker/src/engines/TypicalityEnginev0.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def process_objects(self, objs, round_to=2):

details = tuples.apply(lambda t: dict(t[0]))
print("in TypicalityEgine, constructing big d", flush=True)
d = {k: v for smalld in tqdm(details, desc="constructing big d", flush=True) for k, v in smalld.items()}
d = {k: v for smalld in tqdm(details, desc="constructing big d") for k, v in smalld.items()}

values = np.asarray([d[k] for k in sorted(d.keys())])
values = self.inv_normed_abs(values, q=100).round(round_to)
Expand Down
11 changes: 6 additions & 5 deletions docker/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,20 @@
description='Automates checking whether the app loads correctly and all defined routes run without errors.',
epilog='Gunicorn is set to run at 0.0.0.0, default port is 8080 (but can be passed as argument to the ./start.sh script).')

parser.add_argument('-o', '--host', default="0.0.0.0", type=str)
parser.add_argument('-p', '--port', default="8080", type=str)
parser.add_argument('-a', '--app_running', action='store_true')
parser.add_argument('-v', '--verbose',action='store_true')

args = parser.parse_args()

prefix="http://"
host = "0.0.0.0"
port = f"{args.port}/api/v1"
host = args.host
port = f"{args.port}"

if not args.app_running:
import subprocess
subprocess.call(['sh', './start.sh'])
subprocess.call(['sh', './start.sh', '&'])


# testing all routes routes defined in ./src/app.py
Expand Down Expand Up @@ -65,8 +66,8 @@


print()
res = requests.get(f"{prefix}{host}:{port}/datasets/NMvW_v0/autocomplete",
params=dict(param="Classification", keyword=""))
res = requests.get(f"{prefix}{host}:{port}/datasets/NMvW_v0/autocomplete",
params=dict(param="Geography", keyword=""))

print(f"/datasets/NMvW_v0/autocomplete: {res.status_code}")
if args.verbose:
Expand Down

0 comments on commit 2b6075b

Please sign in to comment.