Skip to content

Commit

Permalink
Merge pull request #945 from spencermountain/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
spencermountain authored Aug 10, 2022
2 parents ab15882 + 60aef4c commit 290a65d
Show file tree
Hide file tree
Showing 18 changed files with 138 additions and 23 deletions.
2 changes: 1 addition & 1 deletion builds/compromise.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion builds/one/compromise-one.cjs

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion builds/one/compromise-one.mjs

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion builds/three/compromise-three.cjs

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion builds/three/compromise-three.mjs

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion builds/two/compromise-two.cjs

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion builds/two/compromise-two.mjs

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ While all _Major_ releases should be reviewed, our only _large_ releases are **v

<!-- #### 14.5.0 [Unreleased]
-->
#### 14.4.5 [August 2022]
- **[fix]** - fix logic for greedy-negative matches - #936
- **[fix]** - fix tagging for 3-digit year iso dates - #868
- **[update]** - dependencies

#### 14.4.4 [August 2022]
- **[fix]** - support {root} matches without compromise/two

Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"author": "Spencer Kelly <[email protected]> (http://spencermounta.in)",
"name": "compromise",
"description": "modest natural language processing",
"version": "14.4.4",
"version": "14.4.5",
"main": "./src/three.js",
"unpkg": "./builds/compromise.js",
"type": "module",
Expand Down
2 changes: 1 addition & 1 deletion plugins/dates/src/model/regex.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ export default [
// @4pm
[/^@[1-9]+(am|pm)$/, 'Time', '@5pm'],
// 03/02
[/^[0-9]{2}\/[0-9]{2}/, 'Date', '03/02'],
[/^[0-9]{2}\/[0-9]{2}$/, 'Date', '03/02'],
// iso-time
// [/^[0-9]{4}[:-][0-9]{2}[:-][0-9]{2}T[0-9]/i, 'Time', 'iso-time-tag']

Expand Down
5 changes: 2 additions & 3 deletions scratch.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import nlp from './src/three.js'
import plg from './plugins/dates/src/plugin.js'
nlp.plugin(plg)

// nlp.verbose('tagger')
nlp.verbose('tagger')

let txt = ''
let doc
Expand All @@ -13,8 +13,7 @@ let m
// doc.nouns(0).toSingular()
// console.log(doc.text())

let str = nlp('I will never do that').verbs().toInfinitive().all().out()
console.log(str)
doc = nlp('30/01/194').debug()

// console.log(doc.world)
// console.log(nlp.world())
2 changes: 1 addition & 1 deletion src/1-one/change/api/insert.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import uuid from '../compute/uuid.js'
// are we inserting inside a contraction?
// expand it first
const expand = function (m) {
if (m.has('@hasContraction')) {//&& m.after('^.').has('@hasContraction')
if (m.has('@hasContraction') && typeof m.contractions === 'function') {//&& m.after('^.').has('@hasContraction')
let more = m.grow('@hasContraction')
more.contractions().expand()
}
Expand Down
37 changes: 37 additions & 0 deletions src/1-one/match/methods/match/steps/logic/negative-greedy.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import doesMatch from '../../term/doesMatch.js'

const negGreedy = function (state, reg, nextReg) {
let skip = 0
for (let t = state.t; t < state.terms.length; t += 1) {
let found = doesMatch(state.terms[t], reg, state.start_i + state.t, state.phrase_length)
// we don't want a match, here
if (found) {
break//stop going
}
// are we doing 'greedy-to'?
// - "!foo+ after" should stop at 'after'
if (nextReg) {
found = doesMatch(state.terms[t], nextReg, state.start_i + state.t, state.phrase_length)
if (found) {
break
}
}
skip += 1
// is it max-length now?
if (reg.max !== undefined && skip === reg.max) {
break
}
}
if (skip === 0) {
return false //dead
}
// did we satisfy min for !foo{min,max}
if (reg.min && reg.min > skip) {
return false//dead
}
state.t += skip
// state.r += 1
return true
}

export default negGreedy
16 changes: 9 additions & 7 deletions src/1-one/match/methods/match/steps/negative.js
Original file line number Diff line number Diff line change
@@ -1,25 +1,24 @@
import matchTerm from '../term/doesMatch.js'
import negGreedy from './logic/negative-greedy.js'

// '!foo' should match anything that isn't 'foo'
// if it matches, return false
const doNegative = function (state) {
const { regs } = state
let reg = regs[state.r]



// match *anything* but this term
let tmpReg = Object.assign({}, reg)
tmpReg.negative = false // try removing it

// found it? if so, we die here
let found = matchTerm(state.terms[state.t], tmpReg, state.start_i + state.t, state.phrase_length)
if (found) {
return false//die
return false//bye
}

// should we skip the term too?
// "before after"
// match("before !foo? after")
if (reg.optional) {
// "before after" - "before !foo? after"
// does the next reg match the this term?
let nextReg = regs[state.r + 1]
if (nextReg) {
Expand All @@ -37,7 +36,10 @@ const doNegative = function (state) {
}
}
}

// negative greedy - !foo+ - super hard!
if (reg.greedy) {
return negGreedy(state, tmpReg, regs[state.r + 1])
}
state.t += 1
return true
}
Expand Down
2 changes: 1 addition & 1 deletion src/2-two/preTagger/model/regex/regex-numbers.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ export default [
// iso-dates
[/^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}/i, 'Date', 'iso-date'],
[/^[0-9]{1,4}-[0-9]{1,2}-[0-9]{1,4}$/, 'Date', 'iso-dash'],
[/^[0-9]{1,4}\/[0-9]{1,2}\/[0-9]{1,4}$/, 'Date', 'iso-slash'],
[/^[0-9]{1,4}\/[0-9]{1,2}\/([0-9]{4}|[0-9]{2})$/, 'Date', 'iso-slash'],
[/^[0-9]{1,4}\.[0-9]{1,2}\.[0-9]{1,4}$/, 'Date', 'iso-dot'],
[/^[0-9]{1,4}-[a-z]{2,9}-[0-9]{1,4}$/i, 'Date', '12-dec-2019'],

Expand Down
2 changes: 1 addition & 1 deletion src/_version.js
Original file line number Diff line number Diff line change
@@ -1 +1 @@
export default '14.4.4'
export default '14.4.5'
73 changes: 72 additions & 1 deletion tests/one/match/negative.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ test('! negative match syntax :', function (t) {
// doc = nlp("I will send her.")
// t.equal(doc.has(m), true, here + 'multi-optional')


t.end()
})

Expand All @@ -77,3 +76,75 @@ test('negative optional logic', function (t) {

t.end()
})

test('negative greedy-max', function (t) {
const check = (doc) => doc.has('before !(not|no|maybe|perhaps){0,3} after')

let doc = nlp('before after')
t.equal(check(doc), true, 'no middle')

doc = nlp('before one after')
t.equal(check(doc), true, 'one middle')

doc = nlp('before one two after')
t.equal(check(doc), true, 'two middle')

doc = nlp('before one two three after')
t.equal(check(doc), true, 'three middle')

doc = nlp('before one two three four after')
t.equal(check(doc), false, 'four middle')

doc = nlp('before one two three maybe')
let m = doc.match('before !maybe{1}')
t.equal(m.text(), 'before one', 'greedy-max-one')

doc = nlp('before one two three maybe')
m = doc.match('before !maybe{1,2}')
t.equal(m.text(), 'before one two', 'greedy-max-two')

doc = nlp('before one two three maybe')
m = doc.match('before !maybe{4,5}')
t.equal(m.text(), '', 'greedy-unmet-min')

t.end()
})

test('negative greedy-end', function (t) {
// simpler version
let doc = nlp('before one after')
t.equal(doc.has('before !maybe+ after'), true, 'greedy-not-one')

doc = nlp('before one two after')
t.equal(doc.has('before !maybe+ after'), true, 'greedy-not-two')

// greedyEnd
doc = nlp('before one two')
let m = doc.match('before !maybe+')
t.equal(m.text(), 'before one two', 'greedy-end')

doc = nlp('before maybe')
m = doc.match('before !maybe+')
t.equal(m.text(), '', 'greedy-zero-fail')

doc = nlp('before one maybe')
m = doc.match('before !maybe+')
t.equal(m.text(), 'before one', 'greedy-one-stop')

doc = nlp('before one two maybe')
m = doc.match('before !maybe+')
t.equal(m.text(), 'before one two', 'greedy-two-stop')
t.end()
})


test('negative greedy-to', function (t) {
let doc = nlp('before one after end')
let m = doc.match('before !maybe+ after')
t.equal(m.text(), 'before one after', 'greedy-to-pos')

doc = nlp('before one after end')
m = doc.match('before !maybe+ none')
t.equal(m.text(), '', 'greedy-no-after')
t.end()
})
1 change: 1 addition & 0 deletions tests/two/match.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,7 @@ let arr = [
["I was an expert", 'i was an #Noun'],
["definitely worth a rental.", '#Adverb #Verb a #Noun'],
["keeping the matter a secret", '#Gerund the #Noun a #Noun'],
["30/01/194", '!#Date'],
]
test('match:', function (t) {
let res = []
Expand Down

0 comments on commit 290a65d

Please sign in to comment.