From 2ae250e646a10d272a230bef2ad959d39b526007 Mon Sep 17 00:00:00 2001 From: Linas Vepstas Date: Thu, 3 Nov 2022 18:23:24 -0500 Subject: [PATCH 1/8] Simplify; remove the left-right distinction --- data/demo-atomese/storage.dict | 6 ++-- link-grammar/dict-atomese/local-as.h | 9 ++--- link-grammar/dict-atomese/lookup-atomese.cc | 22 ++++-------- link-grammar/dict-atomese/sections.cc | 38 ++++++++------------- 4 files changed, 25 insertions(+), 50 deletions(-) diff --git a/data/demo-atomese/storage.dict b/data/demo-atomese/storage.dict index 8fe16e8f7a..d4400f7338 100644 --- a/data/demo-atomese/storage.dict +++ b/data/demo-atomese/storage.dict @@ -134,8 +134,7 @@ % to 2 or more, then that many extra optional connectors will be added. % % If sections are not enabled, then these parameters have no effect. -#define left-pairs 1; -#define right-pairs 1; +#define extra-pairs 1; % Same as above, but the supplementary connectors will all be of type `ANY`, % and thus can connect to anything. Since these can connect to anything, @@ -146,8 +145,7 @@ % % Setting to zero disables this. % If sections are disabled, the these parameters have no effect. -#define left-any 2; -#define right-any 2; +#define extra-any 2; % Create expressions that consist entirely of word-pair relationships. % The disjuncts will have up to the specified number of connectors; 4 is diff --git a/link-grammar/dict-atomese/local-as.h b/link-grammar/dict-atomese/local-as.h index 6c1360b477..671ea1365b 100644 --- a/link-grammar/dict-atomese/local-as.h +++ b/link-grammar/dict-atomese/local-as.h @@ -44,13 +44,8 @@ class Local // Basic Sections bool enable_sections; - - // Supplements - int left_pairs; - int right_pairs; - - int left_any; - int right_any; + int extra_pairs; + int extra_any; // Disjuncts made from pairs int pair_disjuncts; diff --git a/link-grammar/dict-atomese/lookup-atomese.cc b/link-grammar/dict-atomese/lookup-atomese.cc index 2dc7a977c8..d578238fde 100644 --- a/link-grammar/dict-atomese/lookup-atomese.cc +++ b/link-grammar/dict-atomese/lookup-atomese.cc @@ -50,17 +50,13 @@ using namespace opencog; #define ANY_DEFAULT_STRING "any-default" #define ENABLE_SECTIONS_STRING "enable-sections" +#define EXTRA_PAIRS_STRING "extra-pairs" +#define EXTRA_ANY_STRING "extra-any" #define PAIR_DISJUNCTS_STRING "pair-disjuncts" #define PAIR_WITH_ANY_STRING "pair-with-any" -#define LEFT_PAIRS_STRING "left-pairs" -#define RIGHT_PAIRS_STRING "right-pairs" - #define ANY_DISJUNCTS_STRING "any-disjuncts" -#define LEFT_ANY_STRING "left-any" -#define RIGHT_ANY_STRING "right-any" - /// Shared global static AtomSpacePtr external_atomspace; @@ -162,12 +158,8 @@ bool as_open(Dictionary dict) local->any_default = atof(LDEF(ANY_DEFAULT_STRING, "3.0")); local->enable_sections = atoi(LDEF(ENABLE_SECTIONS_STRING, "1")); - - local->left_pairs = atoi(LDEF(LEFT_PAIRS_STRING, "1")); - local->right_pairs = atoi(LDEF(RIGHT_PAIRS_STRING, "1")); - - local->left_any = atoi(LDEF(LEFT_ANY_STRING, "2")); - local->right_any = atoi(LDEF(RIGHT_ANY_STRING, "2")); + local->extra_pairs = atoi(LDEF(EXTRA_PAIRS_STRING, "1")); + local->extra_any = atoi(LDEF(EXTRA_ANY_STRING, "2")); local->pair_disjuncts = atoi(LDEF(PAIR_DISJUNCTS_STRING, "4")); local->pair_with_any = atoi(LDEF(PAIR_WITH_ANY_STRING, "2")); @@ -274,8 +266,7 @@ bool as_boolean_lookup(Dictionary dict, const char *s) if (local->enable_sections) found = section_boolean_lookup(dict, s); - if (0 < local->pair_disjuncts or - 0 < local->left_pairs or 0 < local->right_pairs) + if (0 < local->pair_disjuncts or 0 < local->extra_pairs) { bool have_pairs = pair_boolean_lookup(dict, s); found = found or have_pairs; @@ -368,8 +359,7 @@ Exp* make_exprs(Dictionary dict, const Handle& germ) } // Create disjuncts consisting entirely of word-pair links. - if (0 < local->pair_disjuncts or - 0 < local->left_pairs or 0 < local->right_pairs) + if (0 < local->pair_disjuncts or 0 < local->extra_pairs) { Exp* cpr = make_cart_pairs(dict, germ, local->pair_disjuncts); diff --git a/link-grammar/dict-atomese/sections.cc b/link-grammar/dict-atomese/sections.cc index 5486513a08..4fbf3f7189 100644 --- a/link-grammar/dict-atomese/sections.cc +++ b/link-grammar/dict-atomese/sections.cc @@ -219,14 +219,21 @@ Exp* make_sect_exprs(Dictionary dict, const Handle& germ) { Local* local = (Local*) (dict->as_server); Exp* orhead = nullptr; + Exp* extras = nullptr; // Create some optional word-pair links; these may be nullptr's. - Exp* left_pairs = make_cart_pairs(dict, germ, local->left_pairs); - Exp* right_pairs = make_cart_pairs(dict, germ, local->right_pairs); + if (0 < local->extra_pairs) + { + Exp* extra_pairs = make_cart_pairs(dict, germ, local->extra_pairs); + or_enchain(dict, extras, extra_pairs); + } // Create some optional ANY-links; these may be nullptr's. - Exp* left_any = make_any_exprs(dict, local->left_any); - Exp* right_any = make_any_exprs(dict, local->right_any); + if (0 < local->extra_any) + { + Exp* extra_any = make_any_exprs(dict, local->extra_any); + or_enchain(dict, extras, extra_any); + } // Loop over all Sections on the word. HandleSeq sects = germ->getIncomingSetByType(SECTION); @@ -286,27 +293,12 @@ Exp* make_sect_exprs(Dictionary dict, const Handle& germ) continue; } - // Tack on ANY connectors, as configured. - if (left_any) + // Tack on extra connectors, as configured. + if (extras) { - Exp* optex = make_optional_node(dict->Exp_pool, left_any); + Exp* optex = make_optional_node(dict->Exp_pool, extras); and_enchain_left(dict, andhead, andtail, optex); - } - if (right_any) - { - Exp* optex = make_optional_node(dict->Exp_pool, right_any); - and_enchain_right(dict, andhead, andtail, optex); - } - - // Tack on word-pair connectors, as configured. - if (left_pairs) - { - Exp* optex = make_optional_node(dict->Exp_pool, left_pairs); - and_enchain_left(dict, andhead, andtail, optex); - } - if (right_pairs) - { - Exp* optex = make_optional_node(dict->Exp_pool, right_pairs); + optex = make_optional_node(dict->Exp_pool, extras); and_enchain_right(dict, andhead, andtail, optex); } From 0410c15be675c5cc55a1f74dc3d49946302b0c8e Mon Sep 17 00:00:00 2001 From: Linas Vepstas Date: Thu, 3 Nov 2022 18:34:27 -0500 Subject: [PATCH 2/8] Simplify the any-link code --- data/demo-atomese/storage.dict | 21 ++++++++--------- link-grammar/dict-atomese/local-as.h | 8 +++---- link-grammar/dict-atomese/lookup-atomese.cc | 14 +++++------ link-grammar/dict-atomese/sections.cc | 4 ++-- link-grammar/dict-atomese/word-pairs.cc | 26 ++++----------------- 5 files changed, 28 insertions(+), 45 deletions(-) diff --git a/data/demo-atomese/storage.dict b/data/demo-atomese/storage.dict index d4400f7338..cce5ffed94 100644 --- a/data/demo-atomese/storage.dict +++ b/data/demo-atomese/storage.dict @@ -133,7 +133,7 @@ % If set to zero, these supplementary links will not be generated. If set % to 2 or more, then that many extra optional connectors will be added. % -% If sections are not enabled, then these parameters have no effect. +% If sections are not enabled, then this parameter has no effect. #define extra-pairs 1; % Same as above, but the supplementary connectors will all be of type `ANY`, @@ -143,9 +143,9 @@ % those unknown words. The be effective, the cost should be set high enough % so that these are more costly than existing word-pairs. % -% Setting to zero disables this. -% If sections are disabled, the these parameters have no effect. -#define extra-any 2; +% This is a bool on/off flag. Setting to zero disables this. +% If sections are disabled, the this parameter has no effect. +#define extra-any 1; % Create expressions that consist entirely of word-pair relationships. % The disjuncts will have up to the specified number of connectors; 4 is @@ -160,18 +160,17 @@ % Supplement the above with additional connectors of type "ANY". This is % useful for providing links between words that don't already exist as % pairs in the dataset. This has no effect, if `pair-disjuncts` (above) -% is set to zero. -#define pair-with-any 2; +% is set to zero. This is a bool on/off value; setting to zero disables. +#define pair-with-any 1; % Create expressions that consist entirely of "ANY" link-types. -% The disjuncts will have up to the specified number of connectors; 4 is -% the default. Setting this to zero disables the creation of such -% disjuncts. If `enable-sections` and `pair-disjuncts` (above) are turned -% off, the result will be pure random planar tree parsing. +% This is a bool on/off parameter; setting this to zero disables the +% creation of such disjuncts. If `enable-sections` and `pair-disjuncts` +% (above) are turned off, the result will be pure random planar tree parsing. % % Each connector has a cost is that is determined by the config parameters, % above. -#define any-disjuncts 4; +#define any-disjuncts 1; % ----------------------- % For this file to be read, at least one bogus entry is needed. It is diff --git a/link-grammar/dict-atomese/local-as.h b/link-grammar/dict-atomese/local-as.h index 671ea1365b..63593a7704 100644 --- a/link-grammar/dict-atomese/local-as.h +++ b/link-grammar/dict-atomese/local-as.h @@ -45,12 +45,12 @@ class Local // Basic Sections bool enable_sections; int extra_pairs; - int extra_any; + bool extra_any; // Disjuncts made from pairs int pair_disjuncts; - int pair_with_any; - int any_disjuncts; + bool pair_with_any; + bool any_disjuncts; }; bool section_boolean_lookup(Dictionary dict, const char *s); @@ -60,7 +60,7 @@ Exp* make_exprs(Dictionary dict, const Handle& germ); Exp* make_sect_exprs(Dictionary dict, const Handle& germ); Exp* make_pair_exprs(Dictionary dict, const Handle& germ); Exp* make_cart_pairs(Dictionary dict, const Handle& germ, int arity); -Exp* make_any_exprs(Dictionary dict, int arity); +Exp* make_any_exprs(Dictionary dict); void or_enchain(Dictionary, Exp* &orhead, Exp*); void and_enchain_left(Dictionary, Exp* &orhead, Exp* &ortail, Exp*); diff --git a/link-grammar/dict-atomese/lookup-atomese.cc b/link-grammar/dict-atomese/lookup-atomese.cc index d578238fde..83526debd1 100644 --- a/link-grammar/dict-atomese/lookup-atomese.cc +++ b/link-grammar/dict-atomese/lookup-atomese.cc @@ -159,11 +159,11 @@ bool as_open(Dictionary dict) local->enable_sections = atoi(LDEF(ENABLE_SECTIONS_STRING, "1")); local->extra_pairs = atoi(LDEF(EXTRA_PAIRS_STRING, "1")); - local->extra_any = atoi(LDEF(EXTRA_ANY_STRING, "2")); + local->extra_any = atoi(LDEF(EXTRA_ANY_STRING, "1")); local->pair_disjuncts = atoi(LDEF(PAIR_DISJUNCTS_STRING, "4")); - local->pair_with_any = atoi(LDEF(PAIR_WITH_ANY_STRING, "2")); - local->any_disjuncts = atoi(LDEF(ANY_DISJUNCTS_STRING, "4")); + local->pair_with_any = atoi(LDEF(PAIR_WITH_ANY_STRING, "1")); + local->any_disjuncts = atoi(LDEF(ANY_DISJUNCTS_STRING, "1")); dict->as_server = (void*) local; @@ -352,9 +352,9 @@ Exp* make_exprs(Dictionary dict, const Handle& germ) Exp* orhead = nullptr; // Create disjuncts consisting entirely of "ANY" links. - if (0 < local->any_disjuncts) + if (local->any_disjuncts) { - Exp* any = make_any_exprs(dict, local->any_disjuncts); + Exp* any = make_any_exprs(dict); or_enchain(dict, orhead, any); } @@ -364,9 +364,9 @@ Exp* make_exprs(Dictionary dict, const Handle& germ) Exp* cpr = make_cart_pairs(dict, germ, local->pair_disjuncts); // Add "ANY" links, if requested. - if (0 < local->pair_with_any) + if (local->pair_with_any) { - Exp* ap = make_any_exprs(dict, local->pair_with_any); + Exp* ap = make_any_exprs(dict); Exp* dummy; and_enchain_left(dict, cpr, dummy, ap); } diff --git a/link-grammar/dict-atomese/sections.cc b/link-grammar/dict-atomese/sections.cc index 4fbf3f7189..6dd04b09d0 100644 --- a/link-grammar/dict-atomese/sections.cc +++ b/link-grammar/dict-atomese/sections.cc @@ -229,9 +229,9 @@ Exp* make_sect_exprs(Dictionary dict, const Handle& germ) } // Create some optional ANY-links; these may be nullptr's. - if (0 < local->extra_any) + if (local->extra_any) { - Exp* extra_any = make_any_exprs(dict, local->extra_any); + Exp* extra_any = make_any_exprs(dict); or_enchain(dict, extras, extra_any); } diff --git a/link-grammar/dict-atomese/word-pairs.cc b/link-grammar/dict-atomese/word-pairs.cc index e224d37c3b..3672941c89 100644 --- a/link-grammar/dict-atomese/word-pairs.cc +++ b/link-grammar/dict-atomese/word-pairs.cc @@ -203,35 +203,19 @@ Exp* make_cart_pairs(Dictionary dict, const Handle& germ, int arity) /// If these are used all by themselves, the resulting parses will /// be random planar graphs; i.e. will be equivalent to the `any` /// language parses. -Exp* make_any_exprs(Dictionary dict, int arity) +Exp* make_any_exprs(Dictionary dict) { - if (arity <= 0) return nullptr; - // Create a pair of ANY-links that can connect either left or right. - Exp* aneg = make_connector_node(dict, dict->Exp_pool, "ANY", '-', false); - Exp* apos = make_connector_node(dict, dict->Exp_pool, "ANY", '+', false); + Exp* aneg = make_connector_node(dict, dict->Exp_pool, "ANY", '-', true); + Exp* apos = make_connector_node(dict, dict->Exp_pool, "ANY", '+', true); Local* local = (Local*) (dict->as_server); aneg->cost = local->any_default; apos->cost = local->any_default; - Exp* any = make_or_node(dict->Exp_pool, aneg, apos); - Exp* optex = make_optional_node(dict->Exp_pool, any); - - Exp* andhead = nullptr; - Exp* andtail = nullptr; - - andhead = make_and_node(dict->Exp_pool, optex, NULL); - andtail = andhead->operand_first; + Exp* any = make_and_node(dict->Exp_pool, aneg, apos); - for (int i=1; i< arity; i++) - { - Exp* opt = make_optional_node(dict->Exp_pool, any); - andtail->operand_next = opt; - andtail = opt; - } - - return andhead; + return any; } #endif // HAVE_ATOMESE From 824eae03ebefdff7b8ceecb0f2a6a98166aeca72 Mon Sep 17 00:00:00 2001 From: Linas Vepstas Date: Thu, 3 Nov 2022 18:47:22 -0500 Subject: [PATCH 3/8] Add a design note --- link-grammar/dict-atomese/word-pairs.cc | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/link-grammar/dict-atomese/word-pairs.cc b/link-grammar/dict-atomese/word-pairs.cc index 3672941c89..494b6ac713 100644 --- a/link-grammar/dict-atomese/word-pairs.cc +++ b/link-grammar/dict-atomese/word-pairs.cc @@ -170,6 +170,18 @@ Exp* make_pair_exprs(Dictionary dict, const Handle& germ) /// and (A+ or B- or C+ or ()) and (A+ or B- or C+ or ())`. When /// this is exploded into disjuncts, any combination is possible, /// from size zero to three. That's why its a Cartesian product. +/// +/// FYI, this is a work-around for the lack of a commmutative +/// multi-product. What we really want to do here is to have the +/// expression `(@A+ com @B- com @C+)` where `com` is a commutative +/// product. The `@` sign denotes a multi-connector, so that `@A+` +/// is the same things as `(() or A+ or (A+ & A+) or ...)` and the +/// commutative product allows any of these to commute, i.e. so that +/// disjuncts such as `(A+ & C+ & A+ & C+)` are possible. But we do +/// not have such a commutative multi-product, and so we fake it with +/// a plain cartesian product. The only issue is that this eats up +/// RAM. At least RAM use is linear: it goes as `O(arity)`. More +/// precisely, as `O(npairs x arity)`. Exp* make_cart_pairs(Dictionary dict, const Handle& germ, int arity) { if (0 >= arity) return nullptr; From 966bb9bef3eea405cfb1ae3e877a62efb81e9fd9 Mon Sep 17 00:00:00 2001 From: Linas Vepstas Date: Thu, 3 Nov 2022 20:51:42 -0500 Subject: [PATCH 4/8] Fx the tacking n of ANY links --- link-grammar/dict-atomese/local-as.h | 2 +- link-grammar/dict-atomese/lookup-atomese.cc | 11 ++--------- link-grammar/dict-atomese/sections.cc | 13 ++----------- link-grammar/dict-atomese/word-pairs.cc | 13 ++++++++++++- 4 files changed, 17 insertions(+), 22 deletions(-) diff --git a/link-grammar/dict-atomese/local-as.h b/link-grammar/dict-atomese/local-as.h index 63593a7704..82767e1373 100644 --- a/link-grammar/dict-atomese/local-as.h +++ b/link-grammar/dict-atomese/local-as.h @@ -59,7 +59,7 @@ bool pair_boolean_lookup(Dictionary dict, const char *s); Exp* make_exprs(Dictionary dict, const Handle& germ); Exp* make_sect_exprs(Dictionary dict, const Handle& germ); Exp* make_pair_exprs(Dictionary dict, const Handle& germ); -Exp* make_cart_pairs(Dictionary dict, const Handle& germ, int arity); +Exp* make_cart_pairs(Dictionary dict, const Handle& germ, int arity, bool any); Exp* make_any_exprs(Dictionary dict); void or_enchain(Dictionary, Exp* &orhead, Exp*); diff --git a/link-grammar/dict-atomese/lookup-atomese.cc b/link-grammar/dict-atomese/lookup-atomese.cc index 83526debd1..1730aa40fb 100644 --- a/link-grammar/dict-atomese/lookup-atomese.cc +++ b/link-grammar/dict-atomese/lookup-atomese.cc @@ -361,15 +361,8 @@ Exp* make_exprs(Dictionary dict, const Handle& germ) // Create disjuncts consisting entirely of word-pair links. if (0 < local->pair_disjuncts or 0 < local->extra_pairs) { - Exp* cpr = make_cart_pairs(dict, germ, local->pair_disjuncts); - - // Add "ANY" links, if requested. - if (local->pair_with_any) - { - Exp* ap = make_any_exprs(dict); - Exp* dummy; - and_enchain_left(dict, cpr, dummy, ap); - } + Exp* cpr = make_cart_pairs(dict, germ, local->pair_disjuncts, + local->pair_with_any); or_enchain(dict, orhead, cpr); } diff --git a/link-grammar/dict-atomese/sections.cc b/link-grammar/dict-atomese/sections.cc index 6dd04b09d0..6ce405ee6e 100644 --- a/link-grammar/dict-atomese/sections.cc +++ b/link-grammar/dict-atomese/sections.cc @@ -223,17 +223,8 @@ Exp* make_sect_exprs(Dictionary dict, const Handle& germ) // Create some optional word-pair links; these may be nullptr's. if (0 < local->extra_pairs) - { - Exp* extra_pairs = make_cart_pairs(dict, germ, local->extra_pairs); - or_enchain(dict, extras, extra_pairs); - } - - // Create some optional ANY-links; these may be nullptr's. - if (local->extra_any) - { - Exp* extra_any = make_any_exprs(dict); - or_enchain(dict, extras, extra_any); - } + extras = make_cart_pairs(dict, germ, local->extra_pairs, + local->extra_any); // Loop over all Sections on the word. HandleSeq sects = germ->getIncomingSetByType(SECTION); diff --git a/link-grammar/dict-atomese/word-pairs.cc b/link-grammar/dict-atomese/word-pairs.cc index 494b6ac713..742c5195aa 100644 --- a/link-grammar/dict-atomese/word-pairs.cc +++ b/link-grammar/dict-atomese/word-pairs.cc @@ -182,7 +182,8 @@ Exp* make_pair_exprs(Dictionary dict, const Handle& germ) /// a plain cartesian product. The only issue is that this eats up /// RAM. At least RAM use is linear: it goes as `O(arity)`. More /// precisely, as `O(npairs x arity)`. -Exp* make_cart_pairs(Dictionary dict, const Handle& germ, int arity) +Exp* make_cart_pairs(Dictionary dict, const Handle& germ, + int arity, bool with_any) { if (0 >= arity) return nullptr; @@ -192,7 +193,17 @@ Exp* make_cart_pairs(Dictionary dict, const Handle& germ, int arity) Exp* epr = make_pair_exprs(dict, germ); if (nullptr == epr) return nullptr; + // Tack on ANY connectors, if requested. + if (with_any) + { + Exp* ap = make_any_exprs(dict); + epr = make_or_node(dict->Exp_pool, epr, ap); + } Exp* optex = make_optional_node(dict->Exp_pool, epr); + + // If its 1-dimensional, we are done. + if (1 == arity) return optex; + and_enchain_right(dict, andhead, andtail, optex); for (int i=1; i< arity; i++) From 659abe69fdfae90bb56adbec7e465ba12f8e48ac Mon Sep 17 00:00:00 2001 From: Linas Vepstas Date: Thu, 3 Nov 2022 22:04:33 -0500 Subject: [PATCH 5/8] Fix construction of any nodes I thought that and would work, but it doesn't --- link-grammar/dict-atomese/word-pairs.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/link-grammar/dict-atomese/word-pairs.cc b/link-grammar/dict-atomese/word-pairs.cc index 742c5195aa..ed186f44bc 100644 --- a/link-grammar/dict-atomese/word-pairs.cc +++ b/link-grammar/dict-atomese/word-pairs.cc @@ -236,7 +236,7 @@ Exp* make_any_exprs(Dictionary dict) aneg->cost = local->any_default; apos->cost = local->any_default; - Exp* any = make_and_node(dict->Exp_pool, aneg, apos); + Exp* any = make_or_node(dict->Exp_pool, aneg, apos); return any; } From 8ca1a2b2c00537875528d68d215c763041ca228e Mon Sep 17 00:00:00 2001 From: Linas Vepstas Date: Thu, 3 Nov 2022 22:59:50 -0500 Subject: [PATCH 6/8] iAdd automatic handling of UNKNOWN-WORD --- link-grammar/dict-atomese/lookup-atomese.cc | 64 +++++++++++++-------- 1 file changed, 40 insertions(+), 24 deletions(-) diff --git a/link-grammar/dict-atomese/lookup-atomese.cc b/link-grammar/dict-atomese/lookup-atomese.cc index 1730aa40fb..39033371a9 100644 --- a/link-grammar/dict-atomese/lookup-atomese.cc +++ b/link-grammar/dict-atomese/lookup-atomese.cc @@ -260,6 +260,9 @@ bool as_boolean_lookup(Dictionary dict, const char *s) bool found = dict_node_exists_lookup(dict, s); if (found) return true; + if (0 == strcmp(s, "")) + return true; + if (0 == strcmp(s, LEFT_WALL_WORD)) s = "###LEFT-WALL###"; @@ -376,19 +379,54 @@ Exp* make_exprs(Dictionary dict, const Handle& germ) return orhead; } +/// Given an expression, wrap it with a Dict_node and insert it into +/// the dictionary. +static Dict_node * make_dn(Dictionary dict, Exp* exp, const char* ssc) +{ + Dict_node* dn = (Dict_node*) malloc(sizeof(Dict_node)); + memset(dn, 0, sizeof(Dict_node)); + dn->string = ssc; + dn->exp = exp; + + // Cache the result; avoid repeated lookups. + dict->root = dict_node_insert(dict, dict->root, dn); + dict->num_entries++; + + lgdebug(+D_SPEC+5, "as_lookup_list %d for >>%s<< nexpr=%d\n", + dict->num_entries, ssc, size_of_expression(exp)); + + // Rebalance the tree every now and then. + if (0 == dict->num_entries% 30) + { + dict->root = dsw_tree_to_vine(dict->root); + dict->root = dsw_vine_to_tree(dict->root, dict->num_entries); + } + + // Perform the lookup. We cannot return the dn above, as the + // as_free_llist() below will delete it, leading to mem corruption. + dn = dict_node_lookup(dict, ssc); + return dn; +} + /// Given a word, return the collection of Dict_nodes holding the /// expressions for that word. Dict_node * as_lookup_list(Dictionary dict, const char *s) { // Do we already have this word cached? If so, pull from // the cache. - Dict_node * dn = dict_node_lookup(dict, s); + Dict_node* dn = dict_node_lookup(dict, s); if (dn) return dn; const char* ssc = string_set_add(s, dict->string_set); Local* local = (Local*) (dict->as_server); + if (0 == strcmp(s, "")) + { + Exp* exp = make_any_exprs(dict); + return make_dn(dict, exp, ssc); + } + if (0 == strcmp(s, LEFT_WALL_WORD)) s = "###LEFT-WALL###"; @@ -419,29 +457,7 @@ Dict_node * as_lookup_list(Dictionary dict, const char *s) if (nullptr == exp) return nullptr; - dn = (Dict_node*) malloc(sizeof(Dict_node)); - memset(dn, 0, sizeof(Dict_node)); - dn->string = ssc; - dn->exp = exp; - - // Cache the result; avoid repeated lookups. - dict->root = dict_node_insert(dict, dict->root, dn); - dict->num_entries++; - - lgdebug(+D_SPEC+5, "as_lookup_list %d for >>%s<< nexpr=%d\n", - dict->num_entries, ssc, size_of_expression(exp)); - - // Rebalance the tree every now and then. - if (0 == dict->num_entries% 30) - { - dict->root = dsw_tree_to_vine(dict->root); - dict->root = dsw_vine_to_tree(dict->root, dict->num_entries); - } - - // Perform the lookup. We cannot return the dn above, as the - // as_free_llist() below will delete it, leading to mem corruption. - dn = dict_node_lookup(dict, ssc); - return dn; + return make_dn(dict, exp, ssc); } // This is supposed to provide a wild-card lookup. However, From ebf1d4de902ea873e3aae7c3d5860be686419ed9 Mon Sep 17 00:00:00 2001 From: Linas Vepstas Date: Thu, 3 Nov 2022 23:16:09 -0500 Subject: [PATCH 7/8] Add bool flag to enable the unknown-word mechanism --- data/demo-atomese/storage.dict | 13 ++++++++++++- link-grammar/dict-atomese/local-as.h | 2 ++ link-grammar/dict-atomese/lookup-atomese.cc | 10 +++++++--- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/data/demo-atomese/storage.dict b/data/demo-atomese/storage.dict index cce5ffed94..fcc9ca4db4 100644 --- a/data/demo-atomese/storage.dict +++ b/data/demo-atomese/storage.dict @@ -68,6 +68,11 @@ % obtained above can be rescaled linearly befor being used as a cost. % The rescaling is as usual: y=mx+b where m==`cost-scale` and % b==`cost-offset`. +% +% Keep in mind that the parser ranks parses from lowest to highest cost. +% If word-pair links have a negative cost, the parser is incentivized to +% add as many of these as possible, while still resulting in a planar +% graph with many loops. #define cost-scale -0.5; #define cost-offset 0.0; @@ -170,7 +175,13 @@ % % Each connector has a cost is that is determined by the config parameters, % above. -#define any-disjuncts 1; +#define any-disjuncts 0; + +% Enble the automatic generation of . It will be +% automatically added to the dictionary, with multi-ANY connectors on it. +% The ANY connectors will be used, irresepctive of the other any setting +% above. +#define enable-unknown-word 1; % ----------------------- % For this file to be read, at least one bogus entry is needed. It is diff --git a/link-grammar/dict-atomese/local-as.h b/link-grammar/dict-atomese/local-as.h index 82767e1373..647e421fc3 100644 --- a/link-grammar/dict-atomese/local-as.h +++ b/link-grammar/dict-atomese/local-as.h @@ -51,6 +51,8 @@ class Local int pair_disjuncts; bool pair_with_any; bool any_disjuncts; + + bool enable_unknown_word; }; bool section_boolean_lookup(Dictionary dict, const char *s); diff --git a/link-grammar/dict-atomese/lookup-atomese.cc b/link-grammar/dict-atomese/lookup-atomese.cc index 39033371a9..33140f6129 100644 --- a/link-grammar/dict-atomese/lookup-atomese.cc +++ b/link-grammar/dict-atomese/lookup-atomese.cc @@ -58,6 +58,8 @@ using namespace opencog; #define ANY_DISJUNCTS_STRING "any-disjuncts" +#define ENABLE_UNKNOWN_WORD_STRING "enable-unknown-word" + /// Shared global static AtomSpacePtr external_atomspace; static StorageNodePtr external_storage; @@ -163,7 +165,9 @@ bool as_open(Dictionary dict) local->pair_disjuncts = atoi(LDEF(PAIR_DISJUNCTS_STRING, "4")); local->pair_with_any = atoi(LDEF(PAIR_WITH_ANY_STRING, "1")); - local->any_disjuncts = atoi(LDEF(ANY_DISJUNCTS_STRING, "1")); + local->any_disjuncts = atoi(LDEF(ANY_DISJUNCTS_STRING, "0")); + + local->enable_unknown_word = atoi(LDEF(ENABLE_UNKNOWN_WORD_STRING, "1")); dict->as_server = (void*) local; @@ -260,7 +264,7 @@ bool as_boolean_lookup(Dictionary dict, const char *s) bool found = dict_node_exists_lookup(dict, s); if (found) return true; - if (0 == strcmp(s, "")) + if (local->enable_unknown_word and 0 == strcmp(s, "")) return true; if (0 == strcmp(s, LEFT_WALL_WORD)) @@ -421,7 +425,7 @@ Dict_node * as_lookup_list(Dictionary dict, const char *s) const char* ssc = string_set_add(s, dict->string_set); Local* local = (Local*) (dict->as_server); - if (0 == strcmp(s, "")) + if (local->enable_unknown_word and 0 == strcmp(s, "")) { Exp* exp = make_any_exprs(dict); return make_dn(dict, exp, ssc); From 8d1d59156108d6880727dedab8f5e5a03f50f87f Mon Sep 17 00:00:00 2001 From: Linas Vepstas Date: Fri, 4 Nov 2022 00:26:04 -0500 Subject: [PATCH 8/8] Fix how unknown word uses links --- link-grammar/dict-atomese/local-as.h | 1 + link-grammar/dict-atomese/lookup-atomese.cc | 5 +++- link-grammar/dict-atomese/word-pairs.cc | 33 +++++++++++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/link-grammar/dict-atomese/local-as.h b/link-grammar/dict-atomese/local-as.h index 647e421fc3..4c5350519c 100644 --- a/link-grammar/dict-atomese/local-as.h +++ b/link-grammar/dict-atomese/local-as.h @@ -63,6 +63,7 @@ Exp* make_sect_exprs(Dictionary dict, const Handle& germ); Exp* make_pair_exprs(Dictionary dict, const Handle& germ); Exp* make_cart_pairs(Dictionary dict, const Handle& germ, int arity, bool any); Exp* make_any_exprs(Dictionary dict); +Exp* make_cart_any(Dictionary dict, int arity); void or_enchain(Dictionary, Exp* &orhead, Exp*); void and_enchain_left(Dictionary, Exp* &orhead, Exp* &ortail, Exp*); diff --git a/link-grammar/dict-atomese/lookup-atomese.cc b/link-grammar/dict-atomese/lookup-atomese.cc index 33140f6129..80285686ab 100644 --- a/link-grammar/dict-atomese/lookup-atomese.cc +++ b/link-grammar/dict-atomese/lookup-atomese.cc @@ -427,7 +427,10 @@ Dict_node * as_lookup_list(Dictionary dict, const char *s) if (local->enable_unknown_word and 0 == strcmp(s, "")) { - Exp* exp = make_any_exprs(dict); + // XXX Note the hard-coded 6. I do not understand why 2 is not + // enough. See issue #1351 for a discussion. + // https://github.com/opencog/link-grammar/issues/1351 + Exp* exp = make_cart_any(dict, 6); return make_dn(dict, exp, ssc); } diff --git a/link-grammar/dict-atomese/word-pairs.cc b/link-grammar/dict-atomese/word-pairs.cc index ed186f44bc..5aec3d787c 100644 --- a/link-grammar/dict-atomese/word-pairs.cc +++ b/link-grammar/dict-atomese/word-pairs.cc @@ -241,4 +241,37 @@ Exp* make_any_exprs(Dictionary dict) return any; } +/// Much like make_part_pairs, except that this duplicates the +/// ANY connector. It creates the expression +/// {@ANY- or @ANY+} and {@ANY- or @ANY+} and ... and {@ANY- or @ANY+} +/// This cartesian allows multiple connectors to participate in loops. +/// However, the behavior is ... sruprising. See +/// https://github.com/opencog/link-grammar/issues/1351 +/// for a discussion of what this is all about. +Exp* make_cart_any(Dictionary dict, int arity) +{ + if (0 >= arity) return nullptr; + + Exp* andhead = nullptr; + Exp* andtail = nullptr; + + Exp* any = make_any_exprs(dict); + + Exp* optex = make_optional_node(dict->Exp_pool, any); + + // If its 1-dimensional, we are done. + if (1 == arity) return optex; + + and_enchain_right(dict, andhead, andtail, optex); + + for (int i=1; i< arity; i++) + { + Exp* opt = make_optional_node(dict->Exp_pool, any); + and_enchain_right(dict, andhead, andtail, opt); + } + + return andhead; +} + +// =============================================================== #endif // HAVE_ATOMESE