diff --git a/setup.py b/setup.py index f34dfb4..69f4f7e 100644 --- a/setup.py +++ b/setup.py @@ -14,10 +14,10 @@ "apispec<4.0.0", "apispec-webframeworks", "click<=7.2.0", - "flask==1.1.2", + "flask==1.1.4", "Flask-Babel", "Flask-Cache==0.13.1", - "flask-cors==2.0.0", + "flask-cors==3.0.10", "flask-marshmallow", "gunicorn", "jieba==0.42.1", @@ -32,7 +32,8 @@ "pymorphy2-dicts-uk", "pymorphy2", "pythainlp", - "pyvi" + "pyvi", + "Jinja2==2.11.3" ], tests_require=[ ], diff --git a/tokenizer/lib/spacy/processor.py b/tokenizer/lib/spacy/processor.py index ff59151..6e87b93 100644 --- a/tokenizer/lib/spacy/processor.py +++ b/tokenizer/lib/spacy/processor.py @@ -169,8 +169,9 @@ def _segmentize( for token in doc: if ( not token.is_space and not token._.is_meta ): if ( - (segmentOn == 'singleline' and token._.line_break_before) - or (segmentOn == 'doubleline' and token._.segment_break_before) + (segmentOn != 'onesegment') and + ((segmentOn == 'singleline' and token._.line_break_before) + or (segmentOn == 'doubleline' and token._.segment_break_before)) ): segmentIndex = segmentIndex + 1 segmeta, null = self.metaParser.parseLine( diff --git a/tokenizer/schemas.py b/tokenizer/schemas.py index 0e0c175..80b8384 100644 --- a/tokenizer/schemas.py +++ b/tokenizer/schemas.py @@ -36,9 +36,9 @@ class TokenizeTeiRequestSchema(Schema): class TokenizeTextRequestSchema(Schema): segments = fields.Str( required=False, - missing="doubleline", + missing="onesegment", description=gettext("Identify how segments are separated in the text."), - validate=validate.OneOf(["singleline","doubleline"]) + validate=validate.OneOf(["singleline","doubleline", "onesegment"]) ) lang = fields.Str( required=True, diff --git a/tokenizer/tokenizer.py b/tokenizer/tokenizer.py index 06f3d08..5125cca 100644 --- a/tokenizer/tokenizer.py +++ b/tokenizer/tokenizer.py @@ -13,6 +13,7 @@ import json app = Flask("tokenizer") +# CORS(app) ma = Marshmallow(app) babel = Babel(app)