Skip to content

Commit

Permalink
Merge pull request #43 from alpheios-project/i42-one-segment
Browse files Browse the repository at this point in the history
Add onesegment property
  • Loading branch information
balmas authored Sep 29, 2021
2 parents 1610415 + 17019e5 commit 54302e1
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 7 deletions.
7 changes: 4 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
"apispec<4.0.0",
"apispec-webframeworks",
"click<=7.2.0",
"flask==1.1.2",
"flask==1.1.4",
"Flask-Babel",
"Flask-Cache==0.13.1",
"flask-cors==2.0.0",
"flask-cors==3.0.10",
"flask-marshmallow",
"gunicorn",
"jieba==0.42.1",
Expand All @@ -32,7 +32,8 @@
"pymorphy2-dicts-uk",
"pymorphy2",
"pythainlp",
"pyvi"
"pyvi",
"Jinja2==2.11.3"
],
tests_require=[
],
Expand Down
5 changes: 3 additions & 2 deletions tokenizer/lib/spacy/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,9 @@ def _segmentize(
for token in doc:
if ( not token.is_space and not token._.is_meta ):
if (
(segmentOn == 'singleline' and token._.line_break_before)
or (segmentOn == 'doubleline' and token._.segment_break_before)
(segmentOn != 'onesegment') and
((segmentOn == 'singleline' and token._.line_break_before)
or (segmentOn == 'doubleline' and token._.segment_break_before))
):
segmentIndex = segmentIndex + 1
segmeta, null = self.metaParser.parseLine(
Expand Down
4 changes: 2 additions & 2 deletions tokenizer/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ class TokenizeTeiRequestSchema(Schema):
class TokenizeTextRequestSchema(Schema):
segments = fields.Str(
required=False,
missing="doubleline",
missing="onesegment",
description=gettext("Identify how segments are separated in the text."),
validate=validate.OneOf(["singleline","doubleline"])
validate=validate.OneOf(["singleline","doubleline", "onesegment"])
)
lang = fields.Str(
required=True,
Expand Down
1 change: 1 addition & 0 deletions tokenizer/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import json

app = Flask("tokenizer")
# CORS(app)
ma = Marshmallow(app)
babel = Babel(app)

Expand Down

0 comments on commit 54302e1

Please sign in to comment.