From 35ed2c3461b250dd81c368cf9388820c7d6a8451 Mon Sep 17 00:00:00 2001
From: Jan Max Meyer
Date: Tue, 17 Apr 2018 10:18:28 +0200
Subject: [PATCH] Minor docs changes & style, v0.22 is ready
---
.gitignore | 3 +
CHANGELOG.md | 7 +-
Version | 2 +-
doc/array.t2t | 4 +-
doc/list.t2t | 8 +-
doc/parse.t2t | 28 +++++
doc/phorward.css | 73 ++++++++---
doc/phorward.html | 173 ++++++++++++++++++++------
doc/phorward.t2t | 10 +-
doc/ref.t2t | 51 ++++----
examples/grammars/expr-with-assoc.syn | 13 ++
examples/grammars/expr.syn | 16 +++
12 files changed, 301 insertions(+), 87 deletions(-)
create mode 100644 examples/grammars/expr-with-assoc.syn
create mode 100644 examples/grammars/expr.syn
diff --git a/.gitignore b/.gitignore
index 51bf4e06..28d53cd7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,3 +20,6 @@ run/pregex
run/ppgram2c
run/pvm
+examples/parsing
+examples/lexing
+examples/regex
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5b7b7515..789815be 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,7 +4,7 @@ This file is used to document any relevant changes done to libphorward.
## v0.22
-Current development version.
+Released on: April 17, 2018
- Parsing tools
- Revised all modules, separating the grammar definition entirely from the
@@ -15,9 +15,12 @@ Current development version.
- Revised and simplified LR parser driver, now working on state machine, and
not the data-structures from lr.c anymore.
- Created better definition language called PBNF (Phorward BNF, pbnf.c)
- - Support for a BNF, EBNF and a Phorward-style BNF (PBNF) as input grammars
+ - Frontends for BNF, EBNF and a Phorward-style BNF (PBNF) as input grammars
using the functions pp_gram_from_bnf(), pp_gram_from_ebnf() and
pp_gram_from_pbnf().
+ - Implied precedence & associativity for LALR conflict resolution, which can
+ be used via ``<<`` (left-associative), ``>>`` (right-associative) and
+ ``^^`` (non-associative) in the pbnf language.
- Regular expressions
- Internal revisions and renamings.
- Cleaning data structures from temporal and ephemeral values.
diff --git a/Version b/Version
index cbeaf001..07499b96 100755
--- a/Version
+++ b/Version
@@ -1,4 +1,4 @@
#!/bin/sh
-echo "0.22.0 develop"
+echo "0.22.0"
# When changing version number, remove src/version.h to regenerate it!
diff --git a/doc/array.t2t b/doc/array.t2t
index 3a49048f..d2afbf09 100644
--- a/doc/array.t2t
+++ b/doc/array.t2t
@@ -10,14 +10,14 @@ The **parray** object is a general-purpose data structure which can be used for
The **parray** object brings the following advantages and disadvantages:
- __Advantages__
- - Chunk allocation requires lesser @pmalloc()/@prealloc() operations than with **plist**
+ - Chunk allocation requires lesser @pmalloc()/@prealloc() operations than with [plist #plist]
- Dynamic and quick implementation for huger data structures
- Elements are hold in a real array on the heap
- Low memory consumption
- Fast iteration over elements
-
- __Disadvantages__
- - Not so flexible like **plist**
+ - Less flexible then [plist #plist]
- No hashable entries
- Removing elements or rechaining requires more computing power
-
diff --git a/doc/list.t2t b/doc/list.t2t
index 548b2667..d089baef 100644
--- a/doc/list.t2t
+++ b/doc/list.t2t
@@ -3,9 +3,9 @@
=== Overview ===
-Next to the **parray** object, the **plist** object is a powerful C implementation of a double-linked list with some extra features. It is also used for handling homogenious elements of the same size in a dynamic way, and can be used for many tasks.
+Next to the [parray #parray] object, the **plist** object is a powerful C implementation of a double-linked list with some extra features. It is also used for handling homogenious elements of the same size in a dynamic way, and can be used for many tasks.
-**plist** can be seen as a superset of the **parray** object, because it features nearly the same operations but with other underlying data management methods.
+**plist** can be seen as a superset of the [parray #parray] object, because it features nearly the same operations but with other underlying data management methods.
The **plist** object implements:
@@ -30,7 +30,7 @@ The **plist** object brings the following advantages and disadvantages:
- Allows pointer-mode and entity-mode configuration (PLIST_MOD_PTR)
- Additionally find objects using a hash-table
- Provides element recycling methods (PLIST_MOD_RECYCLE)
- - Elements are chained and can be re-arraged
+ - Elements are chained, can be re-arraged and have persistent pointers
- Automatical sorting using individual sort-functions
- Simple set-theory functions (union, diff)
-
@@ -38,7 +38,7 @@ The **plist** object brings the following advantages and disadvantages:
- High memory consumption, especially in case when used with hash-tables
- Iteration over elements is much slower
- List elements are chained as **plistel** data structures, while the data members must be called separately
- - All operations require more computing power in comparison to **parray**
+ - All operations require more computing power in comparison to [parray #parray]
-
-
diff --git a/doc/parse.t2t b/doc/parse.t2t
index ebffe330..1d95370e 100644
--- a/doc/parse.t2t
+++ b/doc/parse.t2t
@@ -142,6 +142,34 @@ expr$ : expr '+' term = add
;
```
+===== Associativity and precedence operators =====
+
+With //pbnf//, and in combination with the LALR parser, precedence and associativity can also be used to resolve conflicts and write shorter grammars. Thus, the following version of the four-function calculator yields in the same parser.
+
+```
+%skip /[\s]+/ ;
+Int : /[0-9]+/ = int;
+
+<< '+' '-';
+<< '*' '/';
+
+expr$ : expr '*' expr = mul
+ | expr '/' expr = div
+ | expr '+' expr = add
+ | expr '-' expr = sub
+ | '(' expr ')'
+ | Int
+ ;
+```
+
+Here is a short table for reference.
+
+|| Operator | Meaning |
+| << | Left-associative configuration |
+| >> | Right-associative configuration |
+| ^^ | Non-associative configuration |
+
+
=== More grammar-related functions ===
There are some more functions on grammars that need to be mentioned.
diff --git a/doc/phorward.css b/doc/phorward.css
index cbf62043..65e9a7e3 100644
--- a/doc/phorward.css
+++ b/doc/phorward.css
@@ -1,24 +1,34 @@
-html, body, table
+html, body
{
- font-family: "Lucida Sans Unicode", "Lucida Grande", sans-serif;
+ font-family: Atlas, "Lucida Sans Unicode", "Lucida Grande", sans-serif;
font-size: 14pt;
+
+ background-color: #000055;
}
-html
+body
{
- background-color: #333;
+ margin: 0 5%;
+ padding: 3%;
+
+ background-color: #fff;
}
-body
+table
{
- margin: 0 10%;
- padding: 1%;
- background-color: #fff;
+ border-collapse: collapse;
+ border-color: #ccc;
+ width: 100%;
+}
+
+tr, td
+{
+ border-color: #ccc;
}
pre, code
{
- font-family: "Lucida Console", Monaco, monospace;
+ font-family: Monaco, monospace;
}
pre
@@ -114,15 +124,6 @@ div#body h3
padding: 0;
}
-
-@media print
-{
- a.back_to_top
- {
- visible: none;
- }
-}
-
table.ref
{
}
@@ -158,3 +159,39 @@ td.refreturns
{
}
+@media print
+{
+ html, body, table
+ {
+ font-size: 10.5pt;
+ background-color: initial;
+ }
+
+ body
+ {
+ margin: initial;
+ padding: initial;
+ background-color: initial;
+ }
+
+ h1
+ {
+ page-break-before: left;
+ }
+
+ .header h1
+ {
+ page-break-before: avoid;
+ margin-top: 8cm;
+ }
+
+ div.function
+ {
+ page-break-inside: avoid;
+ }
+
+ #toc1, div.toc
+ {
+ display: none;
+ }
+}
diff --git a/doc/phorward.html b/doc/phorward.html
index fc4a953d..21f1e6c0 100644
--- a/doc/phorward.html
+++ b/doc/phorward.html
@@ -44,7 +44,7 @@ Table of Contents
Dynamic general-purpose objects
- plist: Linked lists, hash-tables, queues and stacks
+ plist: Linked lists, hash-tables, queues and stacks
- Overview
@@ -80,7 +80,7 @@ Table of Contents
- pccl: Character-classes
+ pccl: Character-classes
@@ -705,9 +705,15 @@ Table of Contents
Introduction
+
+Welcome to the Phorward library on-line documentation!
+
phorward is a versatile C-library. It is split into several modules, and mostly focuses on the definition and implementation of parsers, recognizers, virtual machines and regular expressions.
+
+The library is made up of several modules. These are:
+
- any provides a dynamical, extendible data structure and interface to store, convert and handle variables of different value types ("variant" data type),
@@ -903,7 +909,7 @@
Intention behind this library
Dynamic general-purpose objects
-parray: Arrays and stacks
+parray: Arrays and stacks
Overview
@@ -920,7 +926,7 @@ Overview
- Advantages
- - Chunk allocation requires lesser pmalloc()/prealloc() operations than with plist
+
- Chunk allocation requires lesser pmalloc()/prealloc() operations than with plist
- Dynamic and quick implementation for huger data structures
@@ -934,7 +940,7 @@ Overview
- Disadvantages
- - Not so flexible like plist
+
- Less flexible then plist
- No hashable entries
@@ -1109,15 +1115,15 @@ Additional functions
-plist: Linked lists, hash-tables, queues and stacks
+plist: Linked lists, hash-tables, queues and stacks
Overview
-Next to the parray object, the plist object is a powerful C implementation of a double-linked list with some extra features. It is also used for handling homogenious elements of the same size in a dynamic way, and can be used for many tasks.
+Next to the parray object, the plist object is a powerful C implementation of a double-linked list with some extra features. It is also used for handling homogenious elements of the same size in a dynamic way, and can be used for many tasks.
-plist can be seen as a superset of the parray object, because it features nearly the same operations but with other underlying data management methods.
+plist can be seen as a superset of the parray object, because it features nearly the same operations but with other underlying data management methods.
The plist object implements:
@@ -1164,7 +1170,7 @@
Overview
- Provides element recycling methods (PLIST_MOD_RECYCLE)
- - Elements are chained and can be re-arraged
+
- Elements are chained, can be re-arraged and have persistent pointers
- Automatical sorting using individual sort-functions
@@ -1180,7 +1186,7 @@ Overview
- List elements are chained as plistel data structures, while the data members must be called separately
- - All operations require more computing power in comparison to parray
+
- All operations require more computing power in comparison to parray
@@ -1440,7 +1446,7 @@ Additional functions
-pccl: Character-classes
+pccl: Character-classes
The pccl object is established on top of the plist object and encapsulates easy-to-handle low-level functions for character-class handling.
@@ -2321,6 +2327,51 @@
Phorward BNF
;
+Associativity and precedence operators
+
+
+With pbnf, and in combination with the LALR parser, precedence and associativity can also be used to resolve conflicts and write shorter grammars. Thus, the following version of the four-function calculator yields in the same parser.
+
+
+
+%skip /[\s]+/ ;
+Int : /[0-9]+/ = int;
+
+<< '+' '-';
+<< '*' '/';
+
+expr$ : expr '*' expr = mul
+ | expr '/' expr = div
+ | expr '+' expr = add
+ | expr '-' expr = sub
+ | '(' expr ')'
+ | Int
+ ;
+
+
+
+Here is a short table for reference.
+
+
+
+
+Operator |
+Meaning |
+
+
+<< |
+Left-associative configuration |
+
+
+>> |
+Right-associative configuration |
+
+
+^^ |
+Non-associative configuration |
+
+
+
More grammar-related functions
@@ -2692,7 +2743,7 @@
pregex
-Usage: pregex OPTIONS {expression-only-if-no-other} [input-file]
+Usage: pregex OPTIONS {expression} input
-a --action ACTION Perform regular expression action:
match (default), find, split, replace
@@ -2707,6 +2758,20 @@ pregex
-V --version Show version info and exit.
+
+Example call:
+
+
+
+$ pregex -a find "\d+|[a-z]+" "123 abc456 78xy9"
+123
+abc
+456
+78
+xy
+9
+
+
plex
@@ -2726,6 +2791,20 @@
plex
-V --version Show version info and exit.
+
+Example call:
+
+
+
+plex -b ":" -e "\n" -i "123 abc456 78xy9" "\d+" "[a-z]+"
+1:123
+2:abc
+1:456
+1:78
+2:xy
+1:9
+
+
pparse
@@ -2748,6 +2827,23 @@
pparse
-V --version Show version info and exit.
+
+Example call:
+
+
+
+$ pparse "Int := /[0-9]+/; f : Int | '(' e ')'; t : t '*' f = mul | f ; e : e '+' t = add | t ;" "1+2*3+4*5"
+add
+ add
+ Int (1)
+ mul
+ Int (2)
+ Int (3)
+ mul
+ Int (4)
+ Int (5)
+
+
Other tools
@@ -7749,7 +7845,7 @@
pp_gram_from_ebnf
-
+
pp_gram_from_pbnf
@@ -7788,51 +7884,58 @@ pp_gram_from_pbnf
// Terminals -------------------------------------------------------------------
-[Terminal #fn_Terminal] /[A-Z][A-Za-z0-9_]*/ ;
-[Nonterminal #fn_Nonterminal] /[a-z_][A-Za-z0-9_]*/ ;
+Terminal := /[A-Z][A-Za-z0-9_]*/ ;
+Nonterminal := /[a-z_][A-Za-z0-9_]*/ ;
-[CCL #fn_CCL] /\[(\\.|[^\\\]])*/]/ ;
-[String #fn_String] /'[^']*'/ ;
-[Token #fn_Token] /"[^"]*"/ ;
-[Regex #fn_Regex] /\/(\\.|[^\\\/])*/// ;
+CCL := /\[(\\.|[^\\\]])*/]/ ;
+String := /'[^']*'/ ;
+Token := /"[^"]*"/ ;
+Regex := /\/(\\.|[^\\\/])*/// ;
-[Int #fn_Int] /[0-9]+/ ;
-[Function #fn_Function] /[A-Za-z_][A-Za-z0-9_]*/(\)/ ;
+Int := /[0-9]+/ ;
+Function := /[A-Za-z_][A-Za-z0-9_]*/(\)/ ;
-[Flag_emit #fn_Flag_emit] '@([A-Za-z0-9_]+)?' ;
-[Flag_goal #fn_Flag_goal] '$' ;
-[Flag_lexem #fn_Flag_lexem] '!' ;
-[Flag_ignore #fn_Flag_ignore] /%(ignore|skip)/ ;
+Flag_emit := '@([A-Za-z0-9_]+)?' ;
+Flag_goal := '$' ;
+Flag_lexem := '!' ;
+Flag_ignore := /%(ignore|skip)/ ;
// Nonterminals ----------------------------------------------------------------
-[inline #fn_inline] : Flag_emit '(' alternation ')'
+inline := Flag_emit '(' alternation ')'
| '(' alternation ')'
;
-[symbol #fn_symbol] : Terminal | Nonterminal | CCL | String | Token
- | Regex | Function | inline ;
+terminal : CCL | String | Token | Regex | Function ;
-modifier : ( symbol '*' )=kle
- | ( symbol '+' )=pos
- | ( symbol '?' )=opt
+symbol := Terminal | Nonterminal | terminal | inline ;
+
+modifier : symbol '*' = kle
+ | symbol '+' = pos
+ | symbol '?' = opt
| symbol
;
sequence : sequence modifier | modifier ;
-[production #fn_production] : sequence | ;
+production := sequence | ;
alternation : alternation '|' production | production ;
-[nontermdef #fn_nontermdef] : Nonterminal ':' alternation ';'
+nontermdef := Nonterminal ':' alternation ';'
+ ;
+
+termdef := Terminal ':' terminal ';'
;
-[termdef #fn_termdef] : Terminal ':' ( CCL | String | Regex | Function ) ';'
+assocdef := '<<' terminal+ ';' = assoc_left
+ | '>>' terminal+ ';' = assoc_right
+ | '^^' terminal+ ';' = assoc_none
;
definition : nontermdef
| termdef
+ | assocdef
;
grammar $ : definition+ ;
diff --git a/doc/phorward.t2t b/doc/phorward.t2t
index 255154d3..4ace5bf1 100644
--- a/doc/phorward.t2t
+++ b/doc/phorward.t2t
@@ -22,8 +22,12 @@ April 2018
= Introduction =
+Welcome to the Phorward library on-line documentation!
+
**phorward** is a versatile C-library. It is split into several modules, and mostly focuses on the definition and implementation of parsers, recognizers, virtual machines and regular expressions.
+The library is made up of several modules. These are:
+
- **any** provides a dynamical, extendible data structure and interface to store, convert and handle variables of different value types ("variant" data type),
- **base** provides tools for dynamic data structures and utility functions used throughout the library, including linked lists, hash-tables, stacks and arrays,
- **parse** defines tools to express grammars and provides a built-in LALR(1) parser generator and objects to handle abstract syntax trees, integrating perfectly with the tools from *regex* for lexical analysis,
@@ -128,15 +132,15 @@ The final destination of the Phorward library is not entirely clear yet. For now
= Dynamic general-purpose objects =
-== parray: Arrays and stacks ==
+== parray: Arrays and stacks ==[parray]
%!include: array.t2t
-== plist: Linked lists, hash-tables, queues and stacks ==
+== plist: Linked lists, hash-tables, queues and stacks ==[plist]
%!include: list.t2t
-== pccl: Character-classes ==
+== pccl: Character-classes ==[pccl]
%!include: ccl.t2t
diff --git a/doc/ref.t2t b/doc/ref.t2t
index 576da4ce..c0c84f9e 100644
--- a/doc/ref.t2t
+++ b/doc/ref.t2t
@@ -3496,7 +3496,7 @@ defs : defs nonterm | nonterm ;
grammar$ : defs ;
```
%FUNCTION:END
-%NEED:77
+%NEED:84
%FUNCTION:BEGIN
=== pp_gram_from_pbnf ===[fn_pp_gram_from_pbnf]
@@ -3526,51 +3526,58 @@ lexical analyzer-specific definitions, grammar and AST construction features.
// Terminals -------------------------------------------------------------------
-@Terminal /[A-Z][A-Za-z0-9_]*/ ;
-@Nonterminal /[a-z_][A-Za-z0-9_]*/ ;
+Terminal := /[A-Z][A-Za-z0-9_]*/ ;
+Nonterminal := /[a-z_][A-Za-z0-9_]*/ ;
-@CCL /\[(\\.|[^\\\]])*/]/ ;
-@String /'[^']*'/ ;
-@Token /"[^"]*"/ ;
-@Regex /\/(\\.|[^\\\/])*/// ;
+CCL := /\[(\\.|[^\\\]])*/]/ ;
+String := /'[^']*'/ ;
+Token := /"[^"]*"/ ;
+Regex := /\/(\\.|[^\\\/])*/// ;
-@Int /[0-9]+/ ;
-@Function /[A-Za-z_][A-Za-z0-9_]*/(\)/ ;
+Int := /[0-9]+/ ;
+Function := /[A-Za-z_][A-Za-z0-9_]*/(\)/ ;
-@Flag_emit '@([A-Za-z0-9_]+)?' ;
-@Flag_goal '$' ;
-@Flag_lexem '!' ;
-@Flag_ignore /%(ignore|skip)/ ;
+Flag_emit := '@([A-Za-z0-9_]+)?' ;
+Flag_goal := '$' ;
+Flag_lexem := '!' ;
+Flag_ignore := /%(ignore|skip)/ ;
// Nonterminals ----------------------------------------------------------------
-@inline : Flag_emit '(' alternation ')'
+inline := Flag_emit '(' alternation ')'
| '(' alternation ')'
;
-@symbol : Terminal | Nonterminal | CCL | String | Token
- | Regex | Function | inline ;
+terminal : CCL | String | Token | Regex | Function ;
-modifier : ( symbol '*' )=kle
- | ( symbol '+' )=pos
- | ( symbol '?' )=opt
+symbol := Terminal | Nonterminal | terminal | inline ;
+
+modifier : symbol '*' = kle
+ | symbol '+' = pos
+ | symbol '?' = opt
| symbol
;
sequence : sequence modifier | modifier ;
-@production : sequence | ;
+production := sequence | ;
alternation : alternation '|' production | production ;
-@nontermdef : Nonterminal ':' alternation ';'
+nontermdef := Nonterminal ':' alternation ';'
+ ;
+
+termdef := Terminal ':' terminal ';'
;
-@termdef : Terminal ':' ( CCL | String | Regex | Function ) ';'
+assocdef := '<<' terminal+ ';' = assoc_left
+ | '>>' terminal+ ';' = assoc_right
+ | '^^' terminal+ ';' = assoc_none
;
definition : nontermdef
| termdef
+ | assocdef
;
grammar $ : definition+ ;
diff --git a/examples/grammars/expr-with-assoc.syn b/examples/grammars/expr-with-assoc.syn
new file mode 100644
index 00000000..7e70a6c1
--- /dev/null
+++ b/examples/grammars/expr-with-assoc.syn
@@ -0,0 +1,13 @@
+%skip /[\s]+/ ;
+Int : /[0-9]+/ = int;
+
+<< '+' '-';
+<< '*' '/';
+
+expr$ : expr '*' expr = mul
+ | expr '/' expr = div
+ | expr '+' expr = add
+ | expr '-' expr = sub
+ | '(' expr ')'
+ | Int
+ ;
diff --git a/examples/grammars/expr.syn b/examples/grammars/expr.syn
new file mode 100644
index 00000000..fe533244
--- /dev/null
+++ b/examples/grammars/expr.syn
@@ -0,0 +1,16 @@
+%skip /[\s]+/ ;
+Int : /[0-9]+/ = int;
+
+factor : Int
+ | '(' expr ')'
+ ;
+
+term : term '*' factor = mul
+ | term '/' factor = div
+ | factor
+ ;
+
+expr$ : expr '+' term = add
+ | expr '-' term = sub
+ | term
+ ;