Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Kalmyk paresr implementation -- https://github.com/ispras/lingvodoc-react/issues/1119 #1503

Merged
merged 28 commits into from
Apr 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions alembic/versions/0fc45203d6ab_kalmyk_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""Kalmyk parser added

Revision ID: 0fc45203d6ab
Revises: 6e02e6fdf0f9
Create Date: 2024-01-11 12:20:38.119574

"""

# revision identifiers, used by Alembic.
revision = '0fc45203d6ab'
down_revision = '6e02e6fdf0f9'
branch_labels = None
depends_on = None

from alembic import op

def upgrade():
op.execute('''
INSERT INTO public.parser(additional_metadata, created_at, object_id, client_id, name, parameters, method)
VALUES(null, '2024-01-11 12:20:38', 12, 1, 'Парсер калмыцкого языка (hfst)', '[]',
'hfst_kalmyk');
''')

def downgrade():
op.execute('''
DELETE FROM parser WHERE method = 'hfst_kalmyk';
''')
5 changes: 0 additions & 5 deletions aux_scripts/apertium_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,6 @@ PARSER_LIST=("apertium-kaz" "apertium-tat" "apertium-rus" "apertium-kaz-rus" "ap

fi;

# Bashkir should update from https://github.com/AigizK/apertium-bak.

echo "unimplemented: bashkir from https://github.com/AigizK/apertium-bak"
exit 1

for PARSER_NAME in "${PARSER_LIST[@]}"; do

if ! [ -d "$1/$PARSER_NAME" ]; then
Expand Down
47 changes: 47 additions & 0 deletions aux_scripts/compile_xfst.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/python3
from hfst_dev import compile_xfst_file
import cgi
import cgitb
import shutil

STACK_FILENAME = 'rules.xfst.hfst'
DIR = '/var/www/cgi-bin/xal/'
TMP = '/var/www/tmp/'

cgitb.enable(format='text')
POST = cgi.FieldStorage()

print('Content-Type: text/html; charset=utf-8')
print('')

try:
LF = POST['LEXC'].filename
RF = POST['RULES'].filename
except:
LF = ''
RF = ''
if LF != '' and RF != '':
LexcFile = open(TMP + POST['LEXC'].filename, 'wb')
LexcFile.write(POST['LEXC'].file.read())
LexcFile.flush()
LexcFile.close()
RulesFile = open(TMP + POST['RULES'].filename, 'wb')
RulesFile.write(POST['RULES'].file.read())
RulesFile.flush()
RulesFile.close()
RulesFile = open(TMP + POST['RULES'].filename, 'a+')
RulesFile.write('\nsave stack ' + STACK_FILENAME)
RulesFile.flush()
RulesFile.close()
shutil.copyfile(TMP + POST['LEXC'].filename, DIR + POST['LEXC'].filename)
shutil.copyfile(TMP + POST['RULES'].filename, DIR + POST['RULES'].filename)
compile_xfst_file(DIR + POST['RULES'].filename)
print('XFST compiled!')
else:
print('''
<form method="post" enctype="multipart/form-data">
<input type="file" name="LEXC"><br/>
<input type="file" name="RULES"><br/>
<input type="submit" value="COMPILE!">
</form>
''')
2 changes: 2 additions & 0 deletions docker/docker-compose-proxy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ services:
volumes:
- ./frontend/dist:/dist
- /opt/apertium:/opt/apertium
- /opt/hfst:/opt/hfst
- ./sock:/sock
- /api/build/
- ../:/api
Expand All @@ -102,6 +103,7 @@ services:
volumes:
- ./frontend-proxy/dist:/dist
- /opt/apertium:/opt/apertium
- /opt/hfst:/opt/hfst
- ./sock-proxy:/sock
- /api/build/
- ../:/api
Expand Down
7 changes: 5 additions & 2 deletions docker/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
version: '3.2'
services:
pg:
image: postgres:13.2
image: ${PG_IMG:-docker_pg:latest}
build: ./postgres
container_name: postgres
environment:
POSTGRES_PASSWORD: password
Expand All @@ -12,7 +13,7 @@ services:
ports:
- "15432:5432"
nginx:
image: nginx:latest
image: 'nginx:latest'
container_name: ngx
ports:
- "80:80"
Expand All @@ -34,6 +35,7 @@ services:
ports:
- '16379:6379'
api:
image: 'docker_api:latest'
build: ..
depends_on:
- pg
Expand All @@ -46,6 +48,7 @@ services:
volumes:
- ./frontend/dist:/dist
- /opt/apertium:/opt/apertium
- /opt/hfst:/opt/hfst
- ./sock:/sock
- /api/build/
- ../:/api
Expand Down
130 changes: 130 additions & 0 deletions docker/docker.ini.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
###
# app configuration
# http://docs.pylonsproject.org/projects/pyramid/en/1.5-branch/narr/environment.html
###

[app:main]
use = egg:lingvodoc

secret = 'secret-1'

pyramid.reload_templates = true
pyramid.debug_authorization = false
pyramid.debug_notfound = false
pyramid.debug_routematch = false
pyramid.default_locale_name = ru
pyramid.includes =
pyramid_tm
# pyramid_debugtoolbar

sqlalchemy.url = postgresql+psycopg2://postgres:password@pg:5432/lingvodoc

# These parameters should be specified manually
dedoc_url = http://dedoc-demo.at.ispras.ru/upload
apertium_path = /opt/apertium

# By default, the toolbar only appears for clients from IP addresses
# '127.0.0.1' and '::1'.
# debugtoolbar.hosts = 127.0.0.1 ::1

###
# wsgi server configuration
###

[server:main]
use = egg:gunicorn#main
bind = unix:/sock/lingvodoc.sock
workers = workers_number
timeout = 360000
pidfile = /tmp/lingvodoc.pid
accesslog = /var/log/access.log
errorlog = /var/log/error.log
loglevel = warning
#preload_app = True
#daemon = True
proc_name = lingvodoc

###
# logging configuration
# http://docs.pylonsproject.org/projects/pyramid/en/1.5-branch/narr/logging.html
###

[loggers]
keys = root, lingvodoc, sqlalchemy

[handlers]
keys = console, filelog

[formatters]
keys = generic

[logger_root]
level = WARN
handlers = console, filelog

[logger_lingvodoc]
level = WARN
handlers =
qualname = lingvodoc

[logger_sqlalchemy]
level = ERROR
handlers =
qualname = sqlalchemy.engine
# "level = INFO" logs SQL queries.
# "level = DEBUG" logs SQL queries and results.
# "level = WARN" logs neither. (Recommended for production systems.)

[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic

[handler_filelog]
class = concurrent_log_handler.ConcurrentRotatingFileHandler
args = ('/tmp/ld.log', 'a', 16777216, 4)
level = NOTSET
formatter = generic

[formatter_generic]
format = [%(process)d] %(asctime)s %(levelname)-5.5s [%(name)s][%(threadName)s] %(message)s

[backend:storage]
# disk or openstack
type = disk
path = /tmp/
prefix = backend_prefix
static_route = objects/

[backend:storage.temporary]
host = minio.at.ispras.ru
access_key = ld_development_access_key
secret_key = ld_development_secret_key
bucket = lingvodoc-temp-files
prefix = temp_storage_prefix

;[cache:dogpile]
;expiration_time = 3600000
;backend = dogpile.cache.redis
;backend = dogpile.cache.memcached

[cache:redis:args]
;redis_expiration_time = 60*60*2
host = redis
port = 6379
db = 0

# Info of the SMTP server account used to send emails, e.g. emails of user signup approval.
[smtp]
host = mailserver.example.com
username = noreply
password = password
port = 25
tls = True
debug = 0

# Controls whether user signups are moderated or not.
[signup]
approve = False
address = [email protected]
4 changes: 4 additions & 0 deletions docker/postgres/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
FROM postgres:13.2

RUN mkdir -p /var/lib/postgresql-static/data
ENV PGDATA /var/lib/postgresql-static/data
6 changes: 3 additions & 3 deletions lingvodoc/schema/gql_parserresult.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,10 +299,10 @@ def mutate(root, info, **args):

def get_parser_result_for_text(text, parse_method, apertium_path):
method = getattr(ParseMethods, parse_method)
if parse_method.find("timarkh") != -1:
result = method(text)
elif parse_method.find("apertium") != -1:
if parse_method.find("apertium") != -1:
result = method(text, apertium_path)
else:
result = method(text)
return result


Expand Down
Loading