diff --git a/data/demo-atomese/storage.dict b/data/demo-atomese/storage.dict index 8fe16e8f7a..fcc9ca4db4 100644 --- a/data/demo-atomese/storage.dict +++ b/data/demo-atomese/storage.dict @@ -68,6 +68,11 @@ % obtained above can be rescaled linearly befor being used as a cost. % The rescaling is as usual: y=mx+b where m==`cost-scale` and % b==`cost-offset`. +% +% Keep in mind that the parser ranks parses from lowest to highest cost. +% If word-pair links have a negative cost, the parser is incentivized to +% add as many of these as possible, while still resulting in a planar +% graph with many loops. #define cost-scale -0.5; #define cost-offset 0.0; @@ -133,9 +138,8 @@ % If set to zero, these supplementary links will not be generated. If set % to 2 or more, then that many extra optional connectors will be added. % -% If sections are not enabled, then these parameters have no effect. -#define left-pairs 1; -#define right-pairs 1; +% If sections are not enabled, then this parameter has no effect. +#define extra-pairs 1; % Same as above, but the supplementary connectors will all be of type `ANY`, % and thus can connect to anything. Since these can connect to anything, @@ -144,10 +148,9 @@ % those unknown words. The be effective, the cost should be set high enough % so that these are more costly than existing word-pairs. % -% Setting to zero disables this. -% If sections are disabled, the these parameters have no effect. -#define left-any 2; -#define right-any 2; +% This is a bool on/off flag. Setting to zero disables this. +% If sections are disabled, the this parameter has no effect. +#define extra-any 1; % Create expressions that consist entirely of word-pair relationships. % The disjuncts will have up to the specified number of connectors; 4 is @@ -162,18 +165,23 @@ % Supplement the above with additional connectors of type "ANY". This is % useful for providing links between words that don't already exist as % pairs in the dataset. This has no effect, if `pair-disjuncts` (above) -% is set to zero. -#define pair-with-any 2; +% is set to zero. This is a bool on/off value; setting to zero disables. +#define pair-with-any 1; % Create expressions that consist entirely of "ANY" link-types. -% The disjuncts will have up to the specified number of connectors; 4 is -% the default. Setting this to zero disables the creation of such -% disjuncts. If `enable-sections` and `pair-disjuncts` (above) are turned -% off, the result will be pure random planar tree parsing. +% This is a bool on/off parameter; setting this to zero disables the +% creation of such disjuncts. If `enable-sections` and `pair-disjuncts` +% (above) are turned off, the result will be pure random planar tree parsing. % % Each connector has a cost is that is determined by the config parameters, % above. -#define any-disjuncts 4; +#define any-disjuncts 0; + +% Enble the automatic generation of . It will be +% automatically added to the dictionary, with multi-ANY connectors on it. +% The ANY connectors will be used, irresepctive of the other any setting +% above. +#define enable-unknown-word 1; % ----------------------- % For this file to be read, at least one bogus entry is needed. It is diff --git a/link-grammar/dict-atomese/local-as.h b/link-grammar/dict-atomese/local-as.h index 6c1360b477..4c5350519c 100644 --- a/link-grammar/dict-atomese/local-as.h +++ b/link-grammar/dict-atomese/local-as.h @@ -44,18 +44,15 @@ class Local // Basic Sections bool enable_sections; - - // Supplements - int left_pairs; - int right_pairs; - - int left_any; - int right_any; + int extra_pairs; + bool extra_any; // Disjuncts made from pairs int pair_disjuncts; - int pair_with_any; - int any_disjuncts; + bool pair_with_any; + bool any_disjuncts; + + bool enable_unknown_word; }; bool section_boolean_lookup(Dictionary dict, const char *s); @@ -64,8 +61,9 @@ bool pair_boolean_lookup(Dictionary dict, const char *s); Exp* make_exprs(Dictionary dict, const Handle& germ); Exp* make_sect_exprs(Dictionary dict, const Handle& germ); Exp* make_pair_exprs(Dictionary dict, const Handle& germ); -Exp* make_cart_pairs(Dictionary dict, const Handle& germ, int arity); -Exp* make_any_exprs(Dictionary dict, int arity); +Exp* make_cart_pairs(Dictionary dict, const Handle& germ, int arity, bool any); +Exp* make_any_exprs(Dictionary dict); +Exp* make_cart_any(Dictionary dict, int arity); void or_enchain(Dictionary, Exp* &orhead, Exp*); void and_enchain_left(Dictionary, Exp* &orhead, Exp* &ortail, Exp*); diff --git a/link-grammar/dict-atomese/lookup-atomese.cc b/link-grammar/dict-atomese/lookup-atomese.cc index 2dc7a977c8..80285686ab 100644 --- a/link-grammar/dict-atomese/lookup-atomese.cc +++ b/link-grammar/dict-atomese/lookup-atomese.cc @@ -50,17 +50,15 @@ using namespace opencog; #define ANY_DEFAULT_STRING "any-default" #define ENABLE_SECTIONS_STRING "enable-sections" +#define EXTRA_PAIRS_STRING "extra-pairs" +#define EXTRA_ANY_STRING "extra-any" #define PAIR_DISJUNCTS_STRING "pair-disjuncts" #define PAIR_WITH_ANY_STRING "pair-with-any" -#define LEFT_PAIRS_STRING "left-pairs" -#define RIGHT_PAIRS_STRING "right-pairs" - #define ANY_DISJUNCTS_STRING "any-disjuncts" -#define LEFT_ANY_STRING "left-any" -#define RIGHT_ANY_STRING "right-any" +#define ENABLE_UNKNOWN_WORD_STRING "enable-unknown-word" /// Shared global static AtomSpacePtr external_atomspace; @@ -162,16 +160,14 @@ bool as_open(Dictionary dict) local->any_default = atof(LDEF(ANY_DEFAULT_STRING, "3.0")); local->enable_sections = atoi(LDEF(ENABLE_SECTIONS_STRING, "1")); - - local->left_pairs = atoi(LDEF(LEFT_PAIRS_STRING, "1")); - local->right_pairs = atoi(LDEF(RIGHT_PAIRS_STRING, "1")); - - local->left_any = atoi(LDEF(LEFT_ANY_STRING, "2")); - local->right_any = atoi(LDEF(RIGHT_ANY_STRING, "2")); + local->extra_pairs = atoi(LDEF(EXTRA_PAIRS_STRING, "1")); + local->extra_any = atoi(LDEF(EXTRA_ANY_STRING, "1")); local->pair_disjuncts = atoi(LDEF(PAIR_DISJUNCTS_STRING, "4")); - local->pair_with_any = atoi(LDEF(PAIR_WITH_ANY_STRING, "2")); - local->any_disjuncts = atoi(LDEF(ANY_DISJUNCTS_STRING, "4")); + local->pair_with_any = atoi(LDEF(PAIR_WITH_ANY_STRING, "1")); + local->any_disjuncts = atoi(LDEF(ANY_DISJUNCTS_STRING, "0")); + + local->enable_unknown_word = atoi(LDEF(ENABLE_UNKNOWN_WORD_STRING, "1")); dict->as_server = (void*) local; @@ -268,14 +264,16 @@ bool as_boolean_lookup(Dictionary dict, const char *s) bool found = dict_node_exists_lookup(dict, s); if (found) return true; + if (local->enable_unknown_word and 0 == strcmp(s, "")) + return true; + if (0 == strcmp(s, LEFT_WALL_WORD)) s = "###LEFT-WALL###"; if (local->enable_sections) found = section_boolean_lookup(dict, s); - if (0 < local->pair_disjuncts or - 0 < local->left_pairs or 0 < local->right_pairs) + if (0 < local->pair_disjuncts or 0 < local->extra_pairs) { bool have_pairs = pair_boolean_lookup(dict, s); found = found or have_pairs; @@ -361,25 +359,17 @@ Exp* make_exprs(Dictionary dict, const Handle& germ) Exp* orhead = nullptr; // Create disjuncts consisting entirely of "ANY" links. - if (0 < local->any_disjuncts) + if (local->any_disjuncts) { - Exp* any = make_any_exprs(dict, local->any_disjuncts); + Exp* any = make_any_exprs(dict); or_enchain(dict, orhead, any); } // Create disjuncts consisting entirely of word-pair links. - if (0 < local->pair_disjuncts or - 0 < local->left_pairs or 0 < local->right_pairs) + if (0 < local->pair_disjuncts or 0 < local->extra_pairs) { - Exp* cpr = make_cart_pairs(dict, germ, local->pair_disjuncts); - - // Add "ANY" links, if requested. - if (0 < local->pair_with_any) - { - Exp* ap = make_any_exprs(dict, local->pair_with_any); - Exp* dummy; - and_enchain_left(dict, cpr, dummy, ap); - } + Exp* cpr = make_cart_pairs(dict, germ, local->pair_disjuncts, + local->pair_with_any); or_enchain(dict, orhead, cpr); } @@ -393,19 +383,57 @@ Exp* make_exprs(Dictionary dict, const Handle& germ) return orhead; } +/// Given an expression, wrap it with a Dict_node and insert it into +/// the dictionary. +static Dict_node * make_dn(Dictionary dict, Exp* exp, const char* ssc) +{ + Dict_node* dn = (Dict_node*) malloc(sizeof(Dict_node)); + memset(dn, 0, sizeof(Dict_node)); + dn->string = ssc; + dn->exp = exp; + + // Cache the result; avoid repeated lookups. + dict->root = dict_node_insert(dict, dict->root, dn); + dict->num_entries++; + + lgdebug(+D_SPEC+5, "as_lookup_list %d for >>%s<< nexpr=%d\n", + dict->num_entries, ssc, size_of_expression(exp)); + + // Rebalance the tree every now and then. + if (0 == dict->num_entries% 30) + { + dict->root = dsw_tree_to_vine(dict->root); + dict->root = dsw_vine_to_tree(dict->root, dict->num_entries); + } + + // Perform the lookup. We cannot return the dn above, as the + // as_free_llist() below will delete it, leading to mem corruption. + dn = dict_node_lookup(dict, ssc); + return dn; +} + /// Given a word, return the collection of Dict_nodes holding the /// expressions for that word. Dict_node * as_lookup_list(Dictionary dict, const char *s) { // Do we already have this word cached? If so, pull from // the cache. - Dict_node * dn = dict_node_lookup(dict, s); + Dict_node* dn = dict_node_lookup(dict, s); if (dn) return dn; const char* ssc = string_set_add(s, dict->string_set); Local* local = (Local*) (dict->as_server); + if (local->enable_unknown_word and 0 == strcmp(s, "")) + { + // XXX Note the hard-coded 6. I do not understand why 2 is not + // enough. See issue #1351 for a discussion. + // https://github.com/opencog/link-grammar/issues/1351 + Exp* exp = make_cart_any(dict, 6); + return make_dn(dict, exp, ssc); + } + if (0 == strcmp(s, LEFT_WALL_WORD)) s = "###LEFT-WALL###"; @@ -436,29 +464,7 @@ Dict_node * as_lookup_list(Dictionary dict, const char *s) if (nullptr == exp) return nullptr; - dn = (Dict_node*) malloc(sizeof(Dict_node)); - memset(dn, 0, sizeof(Dict_node)); - dn->string = ssc; - dn->exp = exp; - - // Cache the result; avoid repeated lookups. - dict->root = dict_node_insert(dict, dict->root, dn); - dict->num_entries++; - - lgdebug(+D_SPEC+5, "as_lookup_list %d for >>%s<< nexpr=%d\n", - dict->num_entries, ssc, size_of_expression(exp)); - - // Rebalance the tree every now and then. - if (0 == dict->num_entries% 30) - { - dict->root = dsw_tree_to_vine(dict->root); - dict->root = dsw_vine_to_tree(dict->root, dict->num_entries); - } - - // Perform the lookup. We cannot return the dn above, as the - // as_free_llist() below will delete it, leading to mem corruption. - dn = dict_node_lookup(dict, ssc); - return dn; + return make_dn(dict, exp, ssc); } // This is supposed to provide a wild-card lookup. However, diff --git a/link-grammar/dict-atomese/sections.cc b/link-grammar/dict-atomese/sections.cc index 5486513a08..6ce405ee6e 100644 --- a/link-grammar/dict-atomese/sections.cc +++ b/link-grammar/dict-atomese/sections.cc @@ -219,14 +219,12 @@ Exp* make_sect_exprs(Dictionary dict, const Handle& germ) { Local* local = (Local*) (dict->as_server); Exp* orhead = nullptr; + Exp* extras = nullptr; // Create some optional word-pair links; these may be nullptr's. - Exp* left_pairs = make_cart_pairs(dict, germ, local->left_pairs); - Exp* right_pairs = make_cart_pairs(dict, germ, local->right_pairs); - - // Create some optional ANY-links; these may be nullptr's. - Exp* left_any = make_any_exprs(dict, local->left_any); - Exp* right_any = make_any_exprs(dict, local->right_any); + if (0 < local->extra_pairs) + extras = make_cart_pairs(dict, germ, local->extra_pairs, + local->extra_any); // Loop over all Sections on the word. HandleSeq sects = germ->getIncomingSetByType(SECTION); @@ -286,27 +284,12 @@ Exp* make_sect_exprs(Dictionary dict, const Handle& germ) continue; } - // Tack on ANY connectors, as configured. - if (left_any) - { - Exp* optex = make_optional_node(dict->Exp_pool, left_any); - and_enchain_left(dict, andhead, andtail, optex); - } - if (right_any) - { - Exp* optex = make_optional_node(dict->Exp_pool, right_any); - and_enchain_right(dict, andhead, andtail, optex); - } - - // Tack on word-pair connectors, as configured. - if (left_pairs) + // Tack on extra connectors, as configured. + if (extras) { - Exp* optex = make_optional_node(dict->Exp_pool, left_pairs); + Exp* optex = make_optional_node(dict->Exp_pool, extras); and_enchain_left(dict, andhead, andtail, optex); - } - if (right_pairs) - { - Exp* optex = make_optional_node(dict->Exp_pool, right_pairs); + optex = make_optional_node(dict->Exp_pool, extras); and_enchain_right(dict, andhead, andtail, optex); } diff --git a/link-grammar/dict-atomese/word-pairs.cc b/link-grammar/dict-atomese/word-pairs.cc index e224d37c3b..5aec3d787c 100644 --- a/link-grammar/dict-atomese/word-pairs.cc +++ b/link-grammar/dict-atomese/word-pairs.cc @@ -170,7 +170,20 @@ Exp* make_pair_exprs(Dictionary dict, const Handle& germ) /// and (A+ or B- or C+ or ()) and (A+ or B- or C+ or ())`. When /// this is exploded into disjuncts, any combination is possible, /// from size zero to three. That's why its a Cartesian product. -Exp* make_cart_pairs(Dictionary dict, const Handle& germ, int arity) +/// +/// FYI, this is a work-around for the lack of a commmutative +/// multi-product. What we really want to do here is to have the +/// expression `(@A+ com @B- com @C+)` where `com` is a commutative +/// product. The `@` sign denotes a multi-connector, so that `@A+` +/// is the same things as `(() or A+ or (A+ & A+) or ...)` and the +/// commutative product allows any of these to commute, i.e. so that +/// disjuncts such as `(A+ & C+ & A+ & C+)` are possible. But we do +/// not have such a commutative multi-product, and so we fake it with +/// a plain cartesian product. The only issue is that this eats up +/// RAM. At least RAM use is linear: it goes as `O(arity)`. More +/// precisely, as `O(npairs x arity)`. +Exp* make_cart_pairs(Dictionary dict, const Handle& germ, + int arity, bool with_any) { if (0 >= arity) return nullptr; @@ -180,7 +193,17 @@ Exp* make_cart_pairs(Dictionary dict, const Handle& germ, int arity) Exp* epr = make_pair_exprs(dict, germ); if (nullptr == epr) return nullptr; + // Tack on ANY connectors, if requested. + if (with_any) + { + Exp* ap = make_any_exprs(dict); + epr = make_or_node(dict->Exp_pool, epr, ap); + } Exp* optex = make_optional_node(dict->Exp_pool, epr); + + // If its 1-dimensional, we are done. + if (1 == arity) return optex; + and_enchain_right(dict, andhead, andtail, optex); for (int i=1; i< arity; i++) @@ -203,35 +226,52 @@ Exp* make_cart_pairs(Dictionary dict, const Handle& germ, int arity) /// If these are used all by themselves, the resulting parses will /// be random planar graphs; i.e. will be equivalent to the `any` /// language parses. -Exp* make_any_exprs(Dictionary dict, int arity) +Exp* make_any_exprs(Dictionary dict) { - if (arity <= 0) return nullptr; - // Create a pair of ANY-links that can connect either left or right. - Exp* aneg = make_connector_node(dict, dict->Exp_pool, "ANY", '-', false); - Exp* apos = make_connector_node(dict, dict->Exp_pool, "ANY", '+', false); + Exp* aneg = make_connector_node(dict, dict->Exp_pool, "ANY", '-', true); + Exp* apos = make_connector_node(dict, dict->Exp_pool, "ANY", '+', true); Local* local = (Local*) (dict->as_server); aneg->cost = local->any_default; apos->cost = local->any_default; Exp* any = make_or_node(dict->Exp_pool, aneg, apos); - Exp* optex = make_optional_node(dict->Exp_pool, any); + + return any; +} + +/// Much like make_part_pairs, except that this duplicates the +/// ANY connector. It creates the expression +/// {@ANY- or @ANY+} and {@ANY- or @ANY+} and ... and {@ANY- or @ANY+} +/// This cartesian allows multiple connectors to participate in loops. +/// However, the behavior is ... sruprising. See +/// https://github.com/opencog/link-grammar/issues/1351 +/// for a discussion of what this is all about. +Exp* make_cart_any(Dictionary dict, int arity) +{ + if (0 >= arity) return nullptr; Exp* andhead = nullptr; Exp* andtail = nullptr; - andhead = make_and_node(dict->Exp_pool, optex, NULL); - andtail = andhead->operand_first; + Exp* any = make_any_exprs(dict); + + Exp* optex = make_optional_node(dict->Exp_pool, any); + + // If its 1-dimensional, we are done. + if (1 == arity) return optex; + + and_enchain_right(dict, andhead, andtail, optex); for (int i=1; i< arity; i++) { Exp* opt = make_optional_node(dict->Exp_pool, any); - andtail->operand_next = opt; - andtail = opt; + and_enchain_right(dict, andhead, andtail, opt); } return andhead; } +// =============================================================== #endif // HAVE_ATOMESE