diff --git a/README.md b/README.md index 50d812dc..611eaf1e 100644 --- a/README.md +++ b/README.md @@ -134,3 +134,4 @@ provided by the bot. You will only need to do this once across all repos using o This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. + diff --git a/textworld/textgen/__init__.py b/textworld/textgen/__init__.py index 5c01bdec..a974c487 100644 --- a/textworld/textgen/__init__.py +++ b/textworld/textgen/__init__.py @@ -5,9 +5,10 @@ from textworld.textgen.parser import TextGrammarParser -class Alternative: + +class NewAlternative: """ - A single alternative in a production rule. + A single alternative in a production rule. """ def full_form(self, include_adj=True) -> str: @@ -15,43 +16,86 @@ def full_form(self, include_adj=True) -> str: if adj is None: return noun else: - return adj + " " + noun + return adj + "|" + noun -class LiteralAlternative(Alternative): +class LiteralChunk: """ - An alternative from a literal string. + It creates an object with a [str] value for every single literal. + literal is defined as any string which is not a symbol, i.e. it is not bounded by hashtags. """ + def __init__(self, value: str): + self._value = value + - def __init__(self, value: str): +class SymbolChunk: + """ + It creates an object with a [str] value for every single symbol. + symbol is defined as any string in between two consecutive hashtags, e.g. #it_is_a_symbol#. + """ + def __init__(self, value: str): self._value = value + +class NewLiteralAlternative(NewAlternative): + """ + An alternative from a literal string and represents it as a chunk of literal and symbol objects. + """ + def __init__(self, node: str): + self._node = node + # self._val_chunk contains the objects which make the string. + # It is equivalent to self._value in LiteralAlternative. + self._val_chunk = self._symbol_finder(self._node) + + def _symbol_finder(self, node): + self.chunks = [] + while node: + is_has_tag = [i for i, ltr in enumerate(node) if ltr == '#'] + if is_has_tag: + if node[:is_has_tag[0]]: + self.chunks.append(LiteralChunk(node[:is_has_tag[0]])) + self.chunks.append(SymbolChunk(node[is_has_tag[0]:is_has_tag[1] + 1])) + else: + self.chunks.append(SymbolChunk(node[is_has_tag[0]:is_has_tag[1] + 1])) + + node = node[is_has_tag[1] + 1:] + else: + if node: + self.chunks.append(LiteralChunk(node)) + break + return self.chunks + def split_form(self, include_adj=True) -> Tuple[Optional[str], str]: - return None, self._value + return None, self._node -class AdjectiveNounAlternative(Alternative): +class NewAdjectiveNounAlternative(NewLiteralAlternative): """ - An alternative that specifies an adjective and a noun. + An alternative that specifies an adjective and a noun as chunk of objects. """ - def __init__(self, adjective: str, noun: str): - self._adjective = adjective - self._noun = noun + def __init__(self, adj_node: str, n_node: str): + self._adj_node = adj_node + self._n_node = n_node + # self._adj_chunk contains the objects which make the adjective string. + # self._noun_chunk contains the objects which make the noun string. + # These are equivalent to self._adjective and self._noun in AdjectiveNounAlternative. + self._adj_chunk = self._symbol_finder(self._adj_node) + self._noun_chunk = self._symbol_finder(self._n_node) def split_form(self, include_adj=True) -> Tuple[Optional[str], str]: if include_adj: - return self._adjective, self._noun + return self._adj_node, self._n_node else: - return None, self._noun + return None, self._n_node -class MatchAlternative(Alternative): +class MatchAlternative(NewAlternative): """ - An alternative that specifies matching names for two objects. + An alternative that specifies matching names for two objects. """ - def __init__(self, lhs: Alternative, rhs: Alternative): + def __init__(self, lhs: NewAlternative, rhs: NewAlternative): self.lhs = lhs self.rhs = rhs @@ -64,7 +108,7 @@ class ProductionRule: A production rule in a text grammar. """ - def __init__(self, symbol: str, alternatives: Iterable[Alternative]): + def __init__(self, symbol: str, alternatives: Iterable[NewAlternative]): self.symbol = symbol self.alternatives = tuple(alternatives) @@ -79,13 +123,13 @@ def walk_str(self, node): def walk_Literal(self, node): value = self.walk(node.value) if value: - return LiteralAlternative(value) + return NewLiteralAlternative(value) else: # Skip empty literals return None def walk_AdjectiveNoun(self, node): - return AdjectiveNounAlternative(self.walk(node.adjective), self.walk(node.noun)) + return NewAdjectiveNounAlternative(self.walk(node.adjective), self.walk(node.noun)) def walk_Match(self, node): return MatchAlternative(self.walk(node.lhs), self.walk(node.rhs))