From fcf492e9dcb5650e8c7c9a6dacb5c6bae0fc3a8b Mon Sep 17 00:00:00 2001 From: Jean-Philippe Barrette-LaPierre Date: Sat, 15 Sep 2007 00:34:14 -0400 Subject: [PATCH] added error detection --- finenight/lisp/finenight.lisp | 2 +- finenight/lisp/iadfa-test.lisp | 181 ++++++++++++++++++++++++++++++++- finenight/lisp/iadfa.lisp | 140 +++++++++++++++---------- finenight/lisp/package.lisp | 1 + finenight/lisp/utils.lisp | 23 +++-- finenight/python/fsc.py | 6 +- finenight/python/recognize | 2 + 7 files changed, 286 insertions(+), 69 deletions(-) diff --git a/finenight/lisp/finenight.lisp b/finenight/lisp/finenight.lisp index 8fbfa34..077f77e 100644 --- a/finenight/lisp/finenight.lisp +++ b/finenight/lisp/finenight.lisp @@ -1,4 +1,4 @@ -(load "package.lisp") +(load "package") (load "utils") (load "fsa") (load "fsa-builder") diff --git a/finenight/lisp/iadfa-test.lisp b/finenight/lisp/iadfa-test.lisp index dc3631a..6834013 100644 --- a/finenight/lisp/iadfa-test.lisp +++ b/finenight/lisp/iadfa-test.lisp @@ -25,8 +25,52 @@ (setf ,output (extract-words (iadfa-fsa ,iadfa))) (format t "input:~%~S~%output:~%~S~%" ,w ,output) (equal ,w ,output)))) - +(defun detect-problems (words) + (let ((iadfa (build-iadfa)) + (words-to-be-checked nil)) + (dolist (word words) + (setf words-to-be-checked (nconc words-to-be-checked (list word))) + (handle-word iadfa (concatenate 'list word)) + (when (not (equal words-to-be-checked + (extract-words (iadfa-fsa iadfa)))) + (return))) + ;; We got the first entry that trigger the problem. + ;; we need now to see which entry is needed to start + ;; the problem + words-to-be-checked)) + + +(defun detect-first-starting-problematic-word (words-to-be-checked) + (let ((wtbc (cdr words-to-be-checked)) + (last-word (car words-to-be-checked))) + (do ((iadfa (gen-iadfa wtbc) (gen-iadfa wtbc))) + ((null wtbc)) + (if (equal wtbc + (extract-words (iadfa-fsa iadfa))) + (return (cons last-word wtbc))) + (setf last-word (car wtbc)) + (setf wtbc (cdr wtbc))))) + + +(defun detect-problems-from-file (filename) + (let ((words-to-be-checked nil)) + (let ((iadfa (build-iadfa))) + (for-each-line-in-file (word filename) + (setf words-to-be-checked (nconc words-to-be-checked (list word))) + (handle-word iadfa (concatenate 'list word)) + (when (not (equal words-to-be-checked + (extract-words (iadfa-fsa iadfa)))) + (return)) + nil)) + ;; We got the first entry that trigger the problem. + ;; we need now to see which entry is needed to start + ;; the problem + (detect-first-starting-problematic-word words-to-be-checked))) + + +(detect-problems-from-file "../../data/com.zone.sorted.small") + (defun iadfa-non-branch-suffix () "This tests that the output of the iadfa isn't screwed up by the prefix 0--0 of 0--0--0 because the delete branch @@ -49,6 +93,7 @@ don't delete any node" "0-APR-CREDITCARD" "0-APR-CREDITCARDS" "0-APR-CREDITS-CARD"))) + (defun iadfa-test2 () "This situation would cause to have an empty ancestror fsa." (test-equivalence '("0000" @@ -74,12 +119,124 @@ We had a cycle on the W (1 -W> 2 -O> 3 -O> 1)." (defun iadfa-test5 () "When we add 0-SUNGKOREA the stem starts within the common suffixes. -So we add the stem within the suffixes which create new words" +So we add the stem within the suffixes which create new words." (test-equivalence '("0-FORUM" + "0-SSUM" + "0-SSUNGKOREA"))) + + +(defun iadfa-test6 () + "When we add 0-SUNGKOREA the stem starts within the common suffixes. +So we add the stem within the suffixes which create new words. +So we need to make sure that the common-suffix won't go further than +the 0-ASUNGKOREA" + (test-equivalence '("0-ASUNGKOREA" + "0-FORUM" + "0-S" "0-SUM" "0-SUNGKOREA"))) -(iadfa-test5) +(defun iadfa-test8 () +"This is an example of a test where we had a bad +update of parents-arities" + (test-equivalence '("0-1" + "0-1-0" + "0-1-1"))) + +(defun iadfa-test9 () + (test-equivalence '("0-1-2" + "0-1-2-0" + "0-1-2-3"))) + +(defun iadfa-test10 () + + (test-equivalence '("0-1" + "0-1-0" + "0-1-1" + "0-1-100" + "0-1-2" + "0-1-2-0" + "0-1-2-3"))) + +(defun iadfa-test11 () + "This situation was causing problems. The 0-2GENION was +disapearing after the 0-2GO addition. 0-2GO was subsuming +the stem of 0-2GENION, so we had GE to add back for the +subsumed, but the calculated stem to add was only G, +since we had the entry 0-0OO. + +The cause was that we were completely consuming the profile, +but we shouldn't eat profile when there's one to consume" + (test-equivalence '("0-0OO" + "0-2-GENION" + "0-2GENION" + "0-2GO"))) + +(org.ancar.CLUnit::deftest "IADFA Test 11" + :category "Subsumed previous stems." + :test-fn #'iadfa-test11) + +(defun iadfa-test12 () + "The 0-7-0 was lost. When we were adding the last entry, +common-prefix was returning a suffix of 7-2 and a previous +stem of 7-0. However, because of 0-462, we were getting a +stem of 7 from common-suffix and a previous stem of 7-0. + +In fact we shouldn't try to get a common prefix that would +produce a stem shorter than the previous stem. +" + (test-equivalence '("0-462" + "0-5-0" + "0-7-0" + "0-7-2"))) + +(org.ancar.CLUnit::deftest "IADFA Test 12" + :category "Subsumed previs stems." + :test-fn #'iadfa-test12) + +(defun iadfa-test13 () + "The 0-5000 was created." + (test-equivalence '("0-1000000" + "0-10000000" + "0-3" + "0-3-0" + "0-3-6" + "0-30" + "0-300" + "0-3000" + "0-300MPH" + "0-50" + "0-500" + "0-500MPH"))) + +(defun iadfa-test13 () + (test-equivalence '("0-1000000" "0-10000000" "0-10000HIT" "0-1000ADULTTOYSSEXTOYS" "0-1000HIT" + "0-100ADOLESCENT18" "0-100C" "0-100EDU" "0-100KM" "0-100SPORTS" "0-101" + "0-101---0-1-2-3-4-5-6-7-8-9-DECLARATION-OF-WAR--MERCURYDOLPHIN" + "0-10EISAI" "0-11" "0-110" "0-111" "0-117" "0-12" "0-123" "0-123-456-789" + "0-123456789" "0-12CLUB" "0-12KIDS" "0-12LINEMEN" "0-13" "0-1320FEET" + "0-14" "0-15" "0-16" "0-160" "0-168" "0-16EDU" "0-173" "0-18" "0-180" + "0-18SOS" "0-18SUI" "0-18TEENS4U" "0-19BOYS" "0-1AND1-0" "0-1AVSEX" "0-1DU" + "0-1KISS" "0-1MEDIA" "0-1NET" "0-1SEX" "0-1SHOP" "0-1TECH" "0-1WEB" + "0-1XXX" "0-2" "0-2-0" "0-2-1" "0-2-60" "0-2-7" + "0-2-AMATEUR-XXX-GAY-LESBIAN-ADULT-VIDEOS" "0-2-GENION" "0-2-ONLINE" "0-20" + "0-200" "0-2000" "0-200MPH" "0-21" "0-212" "0-216" "0-21SMARTKIDS" "0-22" + "0-23" "0-232" "0-24" "0-24-SEX" "0-24AUTO" "0-24BUSINESS" "0-24FLORIST" + "0-24H" "0-24H-ZARSZERVIZ" "0-24JEWELRY" "0-24SEX" "0-24SHOP" + "0-24SHOPPING" "0-24UHR" "0-25" "0-255" "0-261" "0-27" "0-273" "0-28" + "0-29" "0-2FLO-WERS" "0-2GENION" "0-2GO" "0-2K" "0-2TALENT" "0-2TALENTS" + "0-2U" "0-2VISA" "0-3" "0-3-0" "0-3-6" "0-3-6AIBB" "0-30" "0-300" "0-3000" + "0-300MPH" "0-312" "0-34" "0-36" "0-360" "0-360C" "0-360CARDS" + "0-360FINANCIAL" "0-360HOLDINGS" "0-360HOMES" "0-360HOMETOURS" "0-360MLS" + "0-360PHOTO" "0-360PHOTOS" "0-360PHOTOTOURS" "0-360PODCASTNEWS" + "0-360REALTY" "0-360RESORTS" "0-360TOUR" "0-360TOURS" "0-360VIEWS" + "0-360VR" "0-365" "0-371" "0-3BABY" "0-3D" "0-3FORUM" "0-3YEARS" "0-4" + "0-4-0" "0-40" "0-400KM" "0-400M" "0-400MPH" "0-41" "0-411" "0-45" "0-462" + "0-48" "0-49" "0-4D" "0-4FACTORY" "0-4VADUZ" "0-5" "0-5-0" "0-5-30" "0-50" + "0-500" "0-500MPH"))) + +(iadfa-test13) + (org.ancar.CLUnit::deftest "IADFA Test 1" :category "Destinations" @@ -101,6 +258,24 @@ So we add the stem within the suffixes which create new words" :category "Destinations" :test-fn #'iadfa-test5) +(org.ancar.CLUnit::deftest "IADFA Test 6" + :category "Destinations" + :test-fn #'iadfa-test6) + +(org.ancar.CLUnit::deftest "IADFA Test 8" + :category "Destinations" + :test-fn #'iadfa-test8) + +(org.ancar.CLUnit::deftest "IADFA Test 9" + :category "Destinations" + :test-fn #'iadfa-test9) + +(org.ancar.CLUnit::deftest "IADFA Test 10" + :category "Destinations" + :test-fn #'iadfa-test10) + + + (org.ancar.CLUnit::run-all-tests) diff --git a/finenight/lisp/iadfa.lisp b/finenight/lisp/iadfa.lisp index 74a9c9b..71fa5ae 100644 --- a/finenight/lisp/iadfa.lisp +++ b/finenight/lisp/iadfa.lisp @@ -4,7 +4,8 @@ (in-package :com.rrette.finenight.iadfa) (defstruct iadfa - (ancestrors (make-array 1000000 :initial-element nil :fill-pointer 0) :type vector) + (ancestrors (make-array 1000000 :initial-element nil :fill-pointer 0)) + (parent-arities (make-array 1000000 :fill-pointer 0)) (index 0) ;; this is used for automatic node name generation (unused-nodes nil) (fsa (make-fsa :start-node (make-empty-node 0))) @@ -28,22 +29,23 @@ (progn (setf ancestrors (make-hash-table)) (setf (aref (iadfa-ancestrors iadfa) (node-label dst-node)) ancestrors))) + (incf (aref (iadfa-parent-arities iadfa) (node-label dst-node))) (hash-table-update! input ancestrors nodes (cons src-node nodes)))) (defun node-remove-ancestror! (iadfa dst-node input src-node) (let ((ancestrors (aref (iadfa-ancestrors iadfa) (node-label dst-node)))) (if ancestrors - (progn - (hash-table-update! input ancestrors nodes - (remove src-node nodes)))))) + (hash-table-update! input ancestrors nodes + (remove src-node nodes))))) (defun get-fresh-node (iadfa) (if (null (iadfa-unused-nodes iadfa)) (progn (let ((new-label (generate-state iadfa))) - (if (>= new-label (length (iadfa-ancestrors iadfa))) - (vector-push (make-hash-table) (iadfa-ancestrors iadfa))) + (when (>= new-label (length (iadfa-ancestrors iadfa))) + (vector-push 0 (iadfa-parent-arities iadfa)) + (vector-push (make-hash-table) (iadfa-ancestrors iadfa))) (make-empty-node new-label))) (let* ((unused-nodes (iadfa-unused-nodes iadfa)) (new-node (car unused-nodes))) @@ -60,6 +62,7 @@ ;(node-reset node-end) (do ((i (node-label node) (+ i 1))) ((>= i (iadfa-index iadfa))) + (setf (aref (iadfa-parent-arities iadfa) i) 0) (clrhash (aref (iadfa-ancestrors iadfa) i)))) ;; (do () @@ -78,6 +81,7 @@ (setf (iadfa-index iadfa) (node-label node))) (defun delete-branch (iadfa stem-start-node stem-start-input stem-end-node) + (decf (aref (iadfa-parent-arities iadfa) (node-label (car (node-destinations stem-end-node))))) (remove-ancestror-to-childs iadfa stem-end-node) (when (not (eq stem-start-node stem-end-node)) (let ((old-node (car (node-transition stem-start-node stem-start-input)))) @@ -104,10 +108,7 @@ '()))) (defun node-ancestrors (iadfa node) - (let ((ancestrors (aref (iadfa-ancestrors iadfa) (node-label node)))) - (if ancestrors - (apply #'append (hash-values ancestrors)) - '()))) + (aref (iadfa-parent-arities iadfa) (node-label node))) (defun node-ancestrors-for-input (iadfa dst-node input) (iadfa-state-ancestrors-for-input iadfa (node-label dst-node) input)) @@ -126,6 +127,7 @@ (defun common-prefix (iadfa word node) (let ((stem '()) + (prefix-stem '()) (stem-start-node node) (stem-start-input (car word)) (stem-end-node nil) @@ -138,6 +140,7 @@ (setf stem-start-node node) (setf stem-start-input (car word)) (setf stem '()) + (setf prefix-stem '()) (setf profile '())))) (if (eq (iadfa-final iadfa) node) (progn @@ -145,14 +148,26 @@ (values stem-start-node (append stem word) (append profile (make-list (- (length word) 1) :initial-element nil)))) (let ((next-node (node-transition node (car word)))) (if (null next-node) - (values node word (make-list (length word) :initial-element nil)) + (progn + (if found-stem + (let ((prefix-symbol (car (node-symbols node))) + (prefix-node (car (node-destinations node)))) + ;; we are in a suffix of a subsumed stem + ;; the node should have only one destination. + (delete-branch iadfa stem-start-node stem-start-input stem-end-node) + (values stem-start-node + (append stem word) + (append profile (make-list (- (length word) 1) :initial-element nil)) + (append stem (list prefix-symbol)) + prefix-node)) + (values node word (make-list (length word) :initial-element nil)))) (progn (setf next-node (car next-node)) (setf stem (append stem (list (car word)))) (setf profile (append profile (list (node-final next-node)))) (when (not found-stem) + (setf prefix-stem (append prefix-stem (list (car word)))) (setf stem-end-node node) - (when (< 1 (length (node-ancestrors iadfa next-node))) - (break) + (when (< 1 (node-ancestrors iadfa next-node)) (setf found-stem t))) (c-prefix (cdr word) next-node @@ -163,35 +178,46 @@ -(defun c-suffix (iadfa current-suffix node prefix-node profile) - (if (eq 1 (length current-suffix)) +(defun c-suffix (iadfa current-suffix node prefix-node profile sub-stem) + (if (or (= 1 (length current-suffix)) + (= 1 (length profile)) + (= 1 (length sub-stem)) + (= (length sub-stem) (length current-suffix))) (values node (reverse current-suffix) (reverse profile)) (let ((next-node (ancestror-transition iadfa node (car current-suffix) (car profile)))) +;; (if (equal '(#\0 #\- #\7) current-suffix) +;; (break)) (if (or (not next-node) (eq next-node prefix-node) (eq next-node (fsa-start-node (iadfa-fsa iadfa)))) (values node (reverse current-suffix) (reverse profile)) (c-suffix iadfa (cdr current-suffix) next-node prefix-node - (cdr profile)))))) + (cdr profile) + (cdr sub-stem)))))) -(defun common-suffix (iadfa current-suffix node prefix-node profile) +(defun common-suffix (iadfa current-suffix node prefix-node profile sub-stem) ;; this function takes a suffix to be consumed ;; and a node to start from and the current stem - (c-suffix iadfa (reverse current-suffix) node prefix-node (reverse profile))) + (c-suffix iadfa (reverse current-suffix) node prefix-node (reverse profile) sub-stem)) (defun iadfa-add-edge! (iadfa src-node input dst-node) (node-add-edge! src-node input dst-node) (node-add-ancestror! iadfa dst-node input src-node)) -(defun add-stem (iadfa prefix-node suffix-node current-stem profile) +(defun add-stem (iadfa prefix-node suffix-node current-stem profile sub-prefix sub-node) (let ((last-node prefix-node) (last-input (car (last current-stem))) - (processing-stem (butlast current-stem))) + (processing-stem (butlast current-stem)) + (sub-prefix sub-prefix)) (reduce #'(lambda (iadfa input) (let ((new-node (get-fresh-node iadfa))) (setf (node-final new-node) (car profile)) + (when sub-prefix + (when (= 1 (length sub-prefix)) + (iadfa-add-edge! iadfa last-node (car sub-prefix) sub-node)) + (setf sub-prefix (cdr sub-prefix))) (setf profile (cdr profile)) (iadfa-add-edge! iadfa last-node input new-node) (setf last-node new-node) @@ -199,15 +225,17 @@ processing-stem :initial-value iadfa) (iadfa-add-edge! iadfa last-node last-input suffix-node) + (when (= 1 (length sub-prefix)) + (iadfa-add-edge! iadfa last-node (car sub-prefix) sub-node)) iadfa)) (defun handle-word (iadfa word) (let* ((fsa (iadfa-fsa iadfa))) - (multiple-value-bind (prefix-node current-suffix profile) (common-prefix iadfa word (fsa-start-node fsa)) + (multiple-value-bind (prefix-node current-suffix profile sub-prefix sub-node) (common-prefix iadfa word (fsa-start-node fsa)) (multiple-value-bind (suffix-node current-stem current-profile) - (common-suffix iadfa current-suffix (iadfa-final iadfa) prefix-node profile) - (add-stem iadfa prefix-node suffix-node current-stem current-profile) + (common-suffix iadfa current-suffix (iadfa-final iadfa) prefix-node profile sub-prefix) + (add-stem iadfa prefix-node suffix-node current-stem current-profile sub-prefix sub-node) (if (> (node-arity prefix-node) 1) (remove-ancestror-to-childs iadfa prefix-node))) iadfa))) @@ -222,8 +250,12 @@ (let ((index 0)) (reduce #'(lambda (iadfa word) (handle-word iadfa (concatenate 'list word)) - (graphviz-export-to-file (make-fsa-builder-from-fsa (iadfa-fsa iadfa)) (concatenate 'string "output/iadfa" (format nil "~A" index) ".dot")) - (graphviz-export-to-file (build-fsa-from-ancestrors iadfa) (concatenate 'string "output/iadfa-ances" (format nil "~A" index) ".dot")) + (graphviz-export-to-file (make-fsa-builder-from-fsa (iadfa-fsa iadfa)) + (concatenate 'string "output/iadfa" + (format nil "~A" index) ".dot")) + (graphviz-export-to-file (build-fsa-from-ancestrors iadfa) + (concatenate 'string "output/iadfa-ances" + (format nil "~A" index) ".dot")) (setf index (+ index 1)) iadfa) words @@ -235,21 +267,21 @@ (last-time (get-internal-real-time)) (nb-per-hours 0) (nb-hours-for-all 0)) - (for-each-line-in-file - file - #'(lambda (line) - (format t "~,2F w/h ~,2F Hours ~A ~A ~%" nb-per-hours nb-hours-for-all index line) - (handle-word iadfa (concatenate 'list line)) - (when (member index dump) - (graphviz-export-to-file (make-fsa-builder-from-fsa (iadfa-fsa iadfa)) (concatenate 'string "output/iadfa" (format nil "~A" index) ".dot")) - (graphviz-export-to-file (build-fsa-from-ancestrors iadfa) (concatenate 'string "output/iadfa-ances" (format nil "~A" index) ".dot"))) - (incf index) - (if (zerop (mod index 1000)) - (let ((current-time (get-internal-real-time))) - (setf nb-per-hours (float (* 1000 (/ 1 (/ (- current-time last-time) internal-time-units-per-second)) 60 60))) - (setf nb-hours-for-all (float (/ (* 65000 (/ (- current-time last-time) internal-time-units-per-second)) 60 60))) - (setf last-time current-time))) - iadfa)) + (for-each-line-in-file (line file) + (format t "~,2F w/h ~,2F Hours ~A ~A ~%" nb-per-hours nb-hours-for-all index line) + (handle-word iadfa (concatenate 'list line)) + (when (member index dump) + (graphviz-export-to-file (make-fsa-builder-from-fsa (iadfa-fsa iadfa)) (concatenate 'string "output/iadfa" (format nil "~A" index) ".dot")) + (graphviz-export-to-file (build-fsa-from-ancestrors iadfa) (concatenate 'string "output/iadfa-ances" (format nil "~A" index) ".dot"))) + (incf index) + (if (zerop (mod index 1000)) + (let ((current-time (get-internal-real-time))) + (setf nb-per-hours (float (* 1000 (/ 1 (/ (- current-time last-time) + internal-time-units-per-second)) 60 60))) + (setf nb-hours-for-all (float (/ (* 65000 (/ (- current-time last-time) + internal-time-units-per-second)) 60 60))) + (setf last-time current-time))) + iadfa) iadfa)) @@ -259,21 +291,19 @@ (last-time (get-internal-real-time)) (nb-per-hours 0) (nb-hours-for-all 0)) - (for-each-line-in-file - file - #'(lambda (line) - (format t "~,2F w/h ~,2F Hours ~A ~A ~%" nb-per-hours nb-hours-for-all index line) - (handle-word iadfa (concatenate 'list line)) - (graphviz-export-to-file (make-fsa-builder-from-fsa (iadfa-fsa iadfa)) (concatenate 'string "output/iadfa" (format nil "~A" index) ".dot")) - (graphviz-export-to-file (build-fsa-from-ancestrors iadfa) (concatenate 'string "output/iadfa-ances" (format nil "~A" index) ".dot")) - (incf index) - (if (zerop (mod index 1000)) - (let ((current-time (get-internal-real-time))) - (setf nb-per-hours (float (* 1000 (/ 1 (/ (- current-time last-time) internal-time-units-per-second)) 60 60))) - (setf nb-hours-for-all (float (/ (* 65000 (/ (- current-time last-time) internal-time-units-per-second)) 60 60))) - (setf last-time current-time))) - iadfa)) - (iadfa-fsa iadfa))) + (for-each-line-in-file (line file) + (format t "~,2F w/h ~,2F Hours ~A ~A ~%" nb-per-hours nb-hours-for-all index line) + (handle-word iadfa (concatenate 'list line)) + (graphviz-export-to-file (make-fsa-builder-from-fsa (iadfa-fsa iadfa)) (concatenate 'string "output/iadfa" (format nil "~A-~A" index line) ".dot")) + (graphviz-export-to-file (build-fsa-from-ancestrors iadfa) (concatenate 'string "output/iadfa-ances" (format nil "~A" index) ".dot")) + (incf index) + (if (zerop (mod index 1000)) + (let ((current-time (get-internal-real-time))) + (setf nb-per-hours (float (* 1000 (/ 1 (/ (- current-time last-time) internal-time-units-per-second)) 60 60))) + (setf nb-hours-for-all (float (/ (* 65000 (/ (- current-time last-time) internal-time-units-per-second)) 60 60))) + (setf last-time current-time))) + iadfa) + iadfa)) ;; (defun dump-words (iadfa) ;; (let ((fsa (iadfa-fsa)) diff --git a/finenight/lisp/package.lisp b/finenight/lisp/package.lisp index ab6cf27..0360cfd 100644 --- a/finenight/lisp/package.lisp +++ b/finenight/lisp/package.lisp @@ -30,6 +30,7 @@ :node-label :node-remove-edge! :node-remove-dsts-for-input! + :node-symbols :node-transition :node-walk :make-empty-fsa diff --git a/finenight/lisp/utils.lisp b/finenight/lisp/utils.lisp index f2ab12f..7071c06 100644 --- a/finenight/lisp/utils.lisp +++ b/finenight/lisp/utils.lisp @@ -97,13 +97,22 @@ (defun generate-name (index) (format nil "q~A" index)) -(defun for-each-line-in-file (file func) - (declare (function func)) - (with-open-file (p file :direction :input) - (do ((line (read-line p nil 'eof) - (read-line p nil 'eof))) - ((eql line 'eof)) - (funcall func line)))) +;; (defun for-each-line-in-file (file func) +;; (declare (function func)) +;; (with-open-file (p ,file :direction :input) +;; (do ((line (read-line p nil 'eof) +;; (read-line p nil 'eof))) +;; ((eql line 'eof)) +;; (funcall func line)))) + + +(defmacro for-each-line-in-file ((var file) &body body) + (with-syms (stream) + `(with-open-file (,stream ,file :direction :input) + (do ((,var (read-line ,stream nil 'eof) (read-line ,stream nil 'eof))) + ((eql ,var 'eof)) + ,@body)))) + (defmacro vector-walk ((index value vector) &rest body) (with-syms (vec) diff --git a/finenight/python/fsc.py b/finenight/python/fsc.py index 7d99313..4fd6480 100644 --- a/finenight/python/fsc.py +++ b/finenight/python/fsc.py @@ -208,8 +208,8 @@ def isLikeStates(state, lowerStates): return isLike -def delta( (stateType, index), character, input, states ): - cv = characterizedVector( character, input )[:3] +def delta( n, (stateType, index), character, input, states ): + cv = characterizedVector( character, input )[:(2 * n + 1)] l = len(cv) w = states[l] cv = str(cv) @@ -251,7 +251,7 @@ def recognize( self, word, fsa): while len(states): (V, q, M) = states.pop() for (x, q1) in fsa.states[q].transitions.items(): - mPrime = delta( M, x, word[M[1]:], self.transitionsStates ) + mPrime = delta( self.n, M, x, word[M[1]:], self.transitionsStates ) if mPrime[0] != []: V1 = V + x states.append((V1, q1, mPrime)) diff --git a/finenight/python/recognize b/finenight/python/recognize index 508ae81..4166f75 100755 --- a/finenight/python/recognize +++ b/finenight/python/recognize @@ -3,6 +3,7 @@ import getopt import os import pickle +import pprint import sys @@ -174,6 +175,7 @@ if __name__ == "__main__": debug("Using Schulz's algoritm") from fsc import ErrorTolerantRecognizer transitionStates = getTransitionStates(transitionsFile, distance) + pprint.pprint(transitionStates) else: debug("Using Flazer's algorithm") from et import ErrorTolerantRecognizer