-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #60 from BackofenLab/workJannis
Merge meilisearch to master
- Loading branch information
Showing
7 changed files
with
887 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
# This file shows the default configuration of Meilisearch. | ||
# All variables are defined here: https://www.meilisearch.com/docs/learn/configuration/instance_options#environment-variables | ||
|
||
# Designates the location where database files will be created and retrieved. | ||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#database-path | ||
db_path = "./data.ms" | ||
|
||
# Configures the instance's environment. Value must be either `production` or `development`. | ||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#environment | ||
env = "development" | ||
|
||
# The address on which the HTTP server will listen. | ||
http_addr = "localhost:7700" | ||
|
||
# Sets the instance's master key, automatically protecting all routes except GET /health. | ||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#master-key | ||
master_key = "aSampleMasterKey2" | ||
|
||
# Deactivates Meilisearch's built-in telemetry when provided. | ||
# Meilisearch automatically collects data from all instances that do not opt out using this flag. | ||
# All gathered data is used solely for the purpose of improving Meilisearch, and can be deleted at any time. | ||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#disable-analytics | ||
# no_analytics = true | ||
|
||
# Sets the maximum size of accepted payloads. | ||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#payload-limit-size | ||
http_payload_size_limit = "550 MB" | ||
|
||
# Defines how much detail should be present in Meilisearch's logs. | ||
# Meilisearch currently supports six log levels, listed in order of increasing verbosity: `OFF`, `ERROR`, `WARN`, `INFO`, `DEBUG`, `TRACE` | ||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#log-level | ||
log_level = "INFO" | ||
|
||
# Sets the maximum amount of RAM Meilisearch can use when indexing. | ||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#max-indexing-memory | ||
# max_indexing_memory = "12 GiB" | ||
|
||
# Sets the maximum number of threads Meilisearch can use during indexing. | ||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#max-indexing-threads | ||
# max_indexing_threads = 10 | ||
|
||
############# | ||
### DUMPS ### | ||
############# | ||
|
||
# Sets the directory where Meilisearch will create dump files. | ||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#dump-directory | ||
dump_dir = "dumps/" | ||
|
||
# Imports the dump file located at the specified path. Path must point to a .dump file. | ||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#import-dump | ||
# import_dump = "./path/to/my/file.dump" | ||
|
||
# Prevents Meilisearch from throwing an error when `import_dump` does not point to a valid dump file. | ||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-missing-dump | ||
ignore_missing_dump = false | ||
|
||
# Prevents a Meilisearch instance with an existing database from throwing an error when using `import_dump`. | ||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-dump-if-db-exists | ||
ignore_dump_if_db_exists = false | ||
|
||
|
||
################# | ||
### SNAPSHOTS ### | ||
################# | ||
|
||
# Enables scheduled snapshots when true, disable when false (the default). | ||
# If the value is given as an integer, then enables the scheduled snapshot with the passed value as the interval | ||
# between each snapshot, in seconds. | ||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#schedule-snapshot-creation | ||
schedule_snapshot = false | ||
|
||
# Sets the directory where Meilisearch will store snapshots. | ||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#snapshot-destination | ||
snapshot_dir = "snapshots/" | ||
|
||
# Launches Meilisearch after importing a previously-generated snapshot at the given filepath. | ||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#import-snapshot | ||
# import_snapshot = "./path/to/my/snapshot" | ||
|
||
# Prevents a Meilisearch instance from throwing an error when `import_snapshot` does not point to a valid snapshot file. | ||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-missing-snapshot | ||
ignore_missing_snapshot = false | ||
|
||
# Prevents a Meilisearch instance with an existing database from throwing an error when using `import_snapshot`. | ||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-snapshot-if-db-exists | ||
ignore_snapshot_if_db_exists = false | ||
|
||
|
||
########### | ||
### SSL ### | ||
########### | ||
|
||
# Enables client authentication in the specified path. | ||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-authentication-path | ||
# ssl_auth_path = "./path/to/root" | ||
|
||
# Sets the server's SSL certificates. | ||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-certificates-path | ||
# ssl_cert_path = "./path/to/certfile" | ||
|
||
# Sets the server's SSL key files. | ||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-key-path | ||
# ssl_key_path = "./path/to/private-key" | ||
|
||
# Sets the server's OCSP file. | ||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-ocsp-path | ||
# ssl_ocsp_path = "./path/to/ocsp-file" | ||
|
||
# Makes SSL authentication mandatory. | ||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-require-auth | ||
ssl_require_auth = false | ||
|
||
# Activates SSL session resumption. | ||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-resumption | ||
ssl_resumption = false | ||
|
||
# Activates SSL tickets. | ||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-tickets | ||
ssl_tickets = false | ||
|
||
############################# | ||
### Experimental features ### | ||
############################# | ||
|
||
# Experimental metrics feature. For more information, see: <https://github.com/meilisearch/meilisearch/discussions/3518> | ||
# Enables the Prometheus metrics on the `GET /metrics` endpoint. | ||
experimental_enable_metrics = false | ||
|
||
# Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652> | ||
experimental_reduce_indexing_memory_usage = false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
import json | ||
from ast import literal_eval | ||
import pandas as pd | ||
|
||
|
||
def csv_to_json(): | ||
""" | ||
Transforms one csv file into (multiple) json files | ||
Make sure the csv file comes in the right format and that the allocation of attributes | ||
matches the code and your file. | ||
For more information on the expected formate check the | ||
meilisearch documentation. | ||
Make sure the output files are not bigger than meilisearch max_payload! | ||
""" | ||
|
||
input_file = str(input("Please give the input file: ")) | ||
filesize = int(input("How many abstracts should be saved in one file? (recommended ~ 100k): ")) | ||
print(f"Loading {input_file} ...") | ||
df = pd.read_csv(input_file, on_bad_lines="warn") | ||
|
||
listofdata = [] | ||
y = 0 # used to create different filenames | ||
|
||
print(f"Processing a total of {len(df)} abstracts...") | ||
|
||
for x in range(len(df)): # going through every line in our file | ||
if (x % filesize == 0) and (x != 0): # split into several output files | ||
filename = f"Pubmed{y}.json" | ||
with open(filename, "w", encoding="utf-8") as outfile: | ||
json_object = json.dumps(listofdata, indent=4) | ||
outfile.write(json_object) | ||
|
||
y += 1 | ||
print(f"{y} / {len(df) // filesize + 1}...") # print status | ||
listofdata = [] | ||
|
||
# retriving attributes from file, make sure they are json compatible | ||
pubmed_id = str(df.iloc[x, 0]) | ||
title = df.iloc[x, 1] | ||
abstract = df.iloc[x, 2] | ||
cited_by = df.iloc[x, 3] | ||
cited_number = str(len(literal_eval(cited_by))) | ||
published_year = str(df.iloc[x, 4]) | ||
|
||
dictionary = { | ||
"PubMed ID": pubmed_id, | ||
"Title": title, | ||
"Abstract": abstract, | ||
"Cited by": cited_by, | ||
"Cited number": cited_number, | ||
"Published": published_year, | ||
} | ||
|
||
listofdata.append(dictionary) | ||
|
||
# save the last last data into the last file | ||
filename = f"Pubmed{y}.json" | ||
with open(filename, "w", encoding="utf-8") as outfile: | ||
json_object = json.dumps(listofdata, indent=4) | ||
outfile.write(json_object) | ||
|
||
print("Done") | ||
|
||
|
||
if __name__ == "__main__": | ||
csv_to_json() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
import json | ||
import time | ||
import Api_key | ||
import meilisearch | ||
|
||
|
||
def add_data(client): | ||
""" | ||
Uploads the given files to meilisearch | ||
Takes any number of json files and uploads them to meilisearch. | ||
Please make sure you follow the naming conventions. If you want to upload multiple files | ||
give them the same name with ascending number after the name - starting at 0 | ||
Example: upload_to_db_0.json, upload_to_db_1.json, upload_to_db_2.json,.... | ||
The upload speed depends on the size of your files, the program crashes without error if the | ||
size of one of your documents is bigger than the max payload limit set in meilisearch. | ||
Recommended size < 350MB | ||
""" | ||
|
||
number_files = int(input("How many files do you want to upload?: ")) | ||
|
||
if number_files > 1: | ||
print( | ||
"\nPlease make sure you files are named correct -" | ||
"Same name, ascending numbers at the end starting with 0:" | ||
"\ntest1.json, test2.json, test3.json ... \n" | ||
) | ||
input_file = str( | ||
input( | ||
"Please only give the name without .json ending and number \ | ||
\nlike this: 'test' for test0.json: " | ||
) | ||
) | ||
multiple_files = True | ||
elif number_files == 1: | ||
input_file = str(input("Please give the input file you would like to upload: ")) | ||
multiple_files = False | ||
else: | ||
return | ||
|
||
index = str(input("What index do you want the data to be added to? ")) | ||
print("Processing, this may take a while...") | ||
|
||
for x in range(number_files): | ||
if multiple_files: | ||
filename = input_file + str(x) + ".json" | ||
else: | ||
filename = input_file | ||
|
||
# open the file with out data - has to be json!!! | ||
with open(filename, "r", encoding="utf-8") as json_file: | ||
data = json.load(json_file) | ||
|
||
print("Loading: ", filename) | ||
|
||
task = client.index(index).add_documents(data) | ||
task_id = task.task_uid | ||
|
||
while task.status == "enqueued" or task.status == "processing": | ||
time.sleep(10) # we use sleep to avoid spamming the server with status requests | ||
task = client.get_task(task_id) | ||
|
||
if task.status == "failed": | ||
print("Failed to upload to meilisearch, check meilisearch console") | ||
return | ||
|
||
if x + 1 < number_files: | ||
print(f"{x+1} / {number_files} - {filename} uploaded successfully") | ||
else: | ||
print(f"Done - {filename} uploaded successfully") | ||
|
||
|
||
def delete_index(client): | ||
"""Deletes the given index""" | ||
index = str(input("What index do you want to delete? ")) | ||
client.delete_index(index) | ||
|
||
|
||
def main(): | ||
"""docstring""" | ||
# setup the client connection to the database using the master API key to identify us | ||
client = meilisearch.Client("http://localhost:7700", Api_key.ADMIN_API_KEY) | ||
|
||
add_data(client) | ||
|
||
# Uncomment this to delte an index | ||
# delete_index(client) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.