diff --git a/src/Std/Scan.java b/src/Std/Scan.java index 6a3a7aae..164b69cf 100644 --- a/src/Std/Scan.java +++ b/src/Std/Scan.java @@ -11,13 +11,15 @@ public class Scan implements IScan { public int lno; // current line number public Token tok; // this is persistent across all calls to cur() + public Token lineMode; // token to toggle line mode // create a scanner object on a buffered reader public Scan(BufferedReader rdr) { this.rdr = rdr; this.lno = 0; - s = null; - tok = null; + this.lineMode = null; + this.s = null; + this.tok = null; // force the enum Match class to compile its patterns String msg = Token.Match.init(); if (msg != null) { @@ -36,6 +38,7 @@ public void reset() { // force the scanner to process the next line s = null; tok = null; + lineMode = null; } // fill the string buffer from the reader if it's exhausted or null) @@ -47,7 +50,7 @@ public void fillString() { if (s == null) return; // end of file lno++; - s += "\n"; + s += "\n"; // make sure the string has a newline start = 0; end = s.length(); } catch (IOException e) { @@ -73,20 +76,41 @@ public Token cur() { return tok; } // s cannot be null here + // are we in line mode? + if (lineMode != null) { + Pattern cpat = lineMode.match.cPattern; + Matcher m = cpat.matcher(s); + m.region(0,end); + start = end; // consume the line before next match + if (m.lookingAt()) { + // found the lineMode token, exit line mode + // and return the matched lineMode token + // System.out.println("leaving line mode..."); + tok = new Token(lineMode.match, m.group(), lno, s); + lineMode = null; + return tok; + } else { + // return the entire line as a token + tok = new Token(Token.Match.$LINE, s, lno, s); + return tok; + } + } int matchEnd = start; // current end of match for (Token.Match match : Token.Match.values()) { Pattern cpat = match.cPattern; if (cpat == null) break; // nothing matches, so can't find a token - if (match.skip && matchFound != null) + if (match.tokType == Token.TokType.SKIP && matchFound != null) continue; // ignore skips if we have a pending token + if (start != 0 && match.pattern.charAt(0) == '^') + continue; // '^' must match at start of line Matcher m = cpat.matcher(s); m.region(start, end); if (m.lookingAt()) { int e = m.end(); if (e == start) continue; // empty match, so try next pattern - if (match.skip) { + if (match.tokType == Token.TokType.SKIP) { // there's a non-empty skip match, // so we skip over the matched part // and get more stuff to read @@ -114,6 +138,12 @@ public Token cur() { start = matchEnd; // start of next token match // matchString is the matching string tok = new Token(matchFound, matchString, lno, s); // persistent + // System.out.println(String.format("match=%s\n", toggle)); + if (matchFound.tokType == Token.TokType.LINE_TOGGLE) { + // System.out.println("going to line mode..."); + start = end; // swallow the rest of the line + lineMode = tok; + } return tok; } } diff --git a/src/Std/Token.pattern b/src/Std/Token.pattern index dc8956d8..47946dfd 100644 --- a/src/Std/Token.pattern +++ b/src/Std/Token.pattern @@ -4,29 +4,50 @@ import java.util.regex.*; // Token class with match patterns (used with the built-in Scan class) public class Token { - // this is set to an error message string + // patternFail is set to an error message string // if there are pattern compile errors public static String patternFail = null; // public static final Match $eof = Match.$EOF; + public enum TokType { + TOKEN, + SKIP, + LINE_TOGGLE, + SPECIAL; + } + public enum Match { %%Match%% $ERROR (null), - $EOF (null); + $EOF (null), + $LINE (null); public String pattern; - public boolean skip; - public Pattern cPattern; // compiled pattern + public TokType tokType; + public Pattern cPattern = null; // compiled pattern - // a token pattern (skip == false) + // a SPECIAL token type or a TOKEN/LINE_TOGGLE Match(String pattern) { - this(pattern, false); - } + this(pattern, null); + } + // legacy ?? Match(String pattern, boolean skip) { - this.pattern = pattern; - this.skip = skip; + this(pattern, TokType.SKIP); + } + + Match(String pattern, TokType tokType) { if (pattern != null) { + if (tokType == TokType.SKIP) { + this.tokType = TokType.SKIP; + } else if (pattern.length() >= 2 && + pattern.substring(0,2).equals("^^")) { + pattern = pattern.substring(1); + this.tokType = TokType.LINE_TOGGLE; + } else { + this.tokType = TokType.TOKEN; + } + this.pattern = pattern; try { this.cPattern = Pattern.compile(pattern, Pattern.DOTALL); } catch (PatternSyntaxException e) { @@ -36,12 +57,14 @@ public class Token { patternFail += (" " +this); this.cPattern = null; } + } else { + this.tokType = TokType.SPECIAL; // SPECIAL } } // Use this to force loading Match class to compile patterns. public static String init() { - return patternFail; + return patternFail; // returns null if no errors } } @@ -88,22 +111,27 @@ public class Token { public static void main(String [] args) { String msg = Match.init(); - if (msg != null) + if (msg != null) { System.out.println(msg); + System.exit(1); + } for (Match match: Match.values()) { - if (match.pattern == null) - continue; - String what; - if (match.skip) - what = "skip"; - else - what = "token"; + if (match.tokType == TokType.SPECIAL) { + System.out.println( + String.format("special "+match.toString()) + ); + continue; // not a real token + } + String what = "??"; + switch(match.tokType) { + case SKIP -> what = "skip"; + case TOKEN -> what = "token"; + case LINE_TOGGLE -> what = "token (line toggle)"; + } System.out.println( - String.format("%s %s '%s'", what, match, match.pattern) + String.format("%s %s '%s'",what,match.toString(),match.pattern) ); } - if (msg != null) - System.exit(1); } //Token// diff --git a/src/Std/Token.template b/src/Std/Token.template index cdaf9ced..4f63da26 100644 --- a/src/Std/Token.template +++ b/src/Std/Token.template @@ -5,7 +5,7 @@ public class Token { public enum Match { %%Match%% - } + } public Match match; // token match public String str; // this token's lexeme (never empty!) diff --git a/src/plcc b/src/plcc index 9dc5c4d4..bd59c3ed 100755 --- a/src/plcc +++ b/src/plcc @@ -1,6 +1,6 @@ #!/bin/bash -LIB="${LIBPLCC:-/usr/local/pub/plcc/PLCC}" +LIB="${LIBPLCC:-/home/fossumtv/PL/src}" PYTHON3=python3 PLCC="$LIB/plcc.py" diff --git a/src/plcc.py b/src/plcc.py index f08dd44f..61107018 100644 --- a/src/plcc.py +++ b/src/plcc.py @@ -2,7 +2,7 @@ """ PLCC: A Programming Languages Compiler-Compiler - Copyright (C) 2021 Timothy Fossum + Copyright (C) 2023 Timothy Fossum This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -23,21 +23,23 @@ import os import io import shutil -import pipes import tempfile argv = sys.argv[1:] # skip over the command-line argument # current file information -Lno = 0 # current line number Fname = '' # current file name (STDIN if standard input) +Lno = 0 # current line number in file Line = '' # current line in the file +nlgen = None # next line generator for Fname STD = [] # reserved names from Std library classes STDT = [] # token-related files in the Std library directory STDP = [] # parse/runtime-related files in the Std library directory flags = {} # processing flags (dictionary) +lineMode = False # True if in line mode + startSymbol = '' # start symbol (first nonterm in rules) term = set() # set of term (token) names termSpecs = [] # term (token) specifications for generating the Token file @@ -122,9 +124,8 @@ def plccInit(): for fname in STD: flags[fname] = fname flags['libplcc'] = LIBPLCC() - flags['Token'] = True + flags['Token'] = True # generate scanner-related files # behavior-related flags - flags['PP'] = '' # preprocessor cmd (e.g., 'cpp -P') flags['debug'] = 0 # default debug value flags['destdir'] = 'Java' # the default destination directory flags['pattern'] = True # create a scanner that uses re. patterns @@ -135,6 +136,9 @@ def plccInit(): def lex(nxt): # print('=== lexical specification') + # Handle any flags appearing at beginning of lexical spec section; + # turn off when all flags have been processed + flagSwitch = True # turn off after all the flags have been processed for line in nxt: line = re.sub('\s+#.*', '', line) # remove trailing comments ... # NOTE: a token that has a substring like this ' #' will mistakenly be @@ -142,13 +146,26 @@ def lex(nxt): line = line.strip() if len(line) == 0: # skip empty lines continue - if line[0] == '#': + if line[0] == '#': # skip comments continue + if line[0] == '!': # handle a PLCC compile-time flag + if flagSwitch: + line = line[1:] + # print ('>>> flag line: {}'.format(line)) + try: + processFlag(line) + except Exception as msg: + deathLNO(msg) + continue + else: + deathLNO('all PLCC flags must occur before token/skip specs') + flagSwitch = False # stop accepting compile-time flags if line == '%': - break; + break; # end of lexical specification section # print ('>>> {}'.format(line)) jpat = '' # the Java regular expression pattern for this skip/term pFlag = getFlag('pattern') + # only process patterns if the 'pattern' flag is True if pFlag: # handle capturing the match part and replacing with empty string def qsub(match): @@ -182,7 +199,6 @@ def qsub(match): if re.search("[\"']", line): deathLNO('Puzzling skip/token pattern specification') # next determine if it's a skip or token specification - line = line.strip() result = line.split() rlen = len(result) if rlen >= 3: @@ -199,7 +215,7 @@ def qsub(match): deathLNO(name + ': duplicate token/skip name') term.update({name}) if what == 'skip': - skip = ', true' # Java boolean constant + skip = ', TokType.SKIP' # Java constant elif what == 'token': skip = '' else: @@ -254,7 +270,7 @@ def lexFinishUp(): death(fname + ': cannot read library file') for line in tokenTemplate: # note that line keeps its trailing newline - if re.match('^\s*%%Match%%', line): + if re.match('^%%Match%%', line): for ts in termSpecs: print(' ' + ts + ',', file=tokenFile) else: @@ -269,7 +285,7 @@ def lexFinishUp(): death(fname + ': cannot read library file') for line in tokenTemplate: # note that line keeps its trailing newline - if re.match('^\s*%%Match%%', line): + if re.match('^%%Match%%', line): tssep = '' for ts in termSpecs: print(tssep + ' ' + ts, file=tokenFile, end='') @@ -951,20 +967,22 @@ def getCode(nxt): if re.match(r'\s*#', line) or re.match(r'\s*$', line): # skip comments or blank lines continue - if re.match(r'\s*%%{', line): # legacy plcc - stopMatch = r'\s*%%}' + if re.match(r'%%{', line): # legacy plcc + stopMatch = r'%%}' break - if re.match(r'\s*%%%', line): - stopMatch = r'\s*%%%' + if re.match(r'%%%', line): + stopMatch = r'%%%' break else: deathLNO('expecting a code segment') + lineMode = True # switch on line mode for line in nxt: if re.match(stopMatch, line): break code.append(line) else: deathLNO('premature end of file') + lineMode = False # switch off line mode str = '\n'.join(code) return str + '\n' @@ -974,22 +992,14 @@ def semFinishUp(): global stubs, STD dst = flags['destdir'] print('\nJava source files created:') - cmd = getFlag('PP') # run a preprocessor, if specified + # print *all* of the generated files for cls in sorted(stubs): if cls in STD: death('{}: reserved class name'.format(cls)) try: fname = '{}/{}.java'.format(dst, cls) - if len(cmd) > 0: - t = pipes.Template() - # print('>>> adding {} preprocessor to the pipe'.format(cmd)) - t.append(cmd, '--') - # print('>>> writing to file {}'.format(fname)) - with t.open(fname, 'w') as f: - print(stubs[cls], end='', file=f) - else: - with open(fname, 'w') as f: - print(stubs[cls], end='', file=f) + with open(fname, 'w') as f: + print(stubs[cls], end='', file=f) except: death('cannot write to file {}'.format(fname)) print(' {}.java'.format(cls)) @@ -1005,34 +1015,77 @@ def done(): def nextLine(): # create a generator to get the next line in the current input file global Lno, Fname, Line - for Fname in argv: - # open the next input file - f = None # the current open file - if Fname == '-': - f = sys.stdin - Fname = 'STDIN' + global stack # used for '#include ...' processing + global argv # file arguments + global nlgen # next line generator for Fname + maxStack = 4 # maximum #include stacking level + stack = [] + # debug('...here...') + while True: + if len(stack) > 0: + # pop any #include parent off the stack (stack is initially empty) + (Fname, Lno, nlgen) = stack.pop() + debug('back to reading from file ' + Fname) + elif len(argv) > 0: + # get the next command line filename + Fname = argv[0] + nlgen = nextLineGen(Fname) # resets Lno to zero + argv = argv[1:] # advance to next filename parameter else: + return None # nothing left!! + while True: try: - f = open(Fname, 'r') + Line = next(nlgen) + if Line == None: + debug('exiting current nextLineGen') + break + Line = Line.rstrip() + debug('[{}]: {}'.format(Fname, Line)) + # Line is the next line in this file + # first handle '#include ...' directives + if lineMode: + pass # don't process #include directives when in line mode + else: + if Line[:8] == '#include': + ary = Line.split(None, maxsplit = 1) + if len(ary) == 2 and ary[0] == '#include': + if len(stack) >= maxStack: + death('max #include nesting depth exceeded') + debug('include directive: {} {}' + .format(ary[0],ary[1])) + # ary[1] must be a filename + stack.append((Fname, Lno, nlgen)) + Fname = ary[1].strip() + # print('### now reading from file '+Fname) + nlgen = nextLineGen(Fname) + continue + else: + death(line + ': invalid #include directive') + line = Line.rstrip() + debug('{:4} [{}] {}'.format(Lno,Fname,Line), level=2) + yield line except: - death(Fname + ': error opening file') - Lno = 0 - # f is the current open file - for Line in f: - # get the next line in this file - Lno += 1 - line = Line.rstrip() - if len(line) > 0 and line[0] == '!': - line = line[1:] - # print ('>>> flag line: {}'.format(line)) - try: - processFlag(line) - except Exception as msg: - deathLNO(msg) - continue - debug('{:4} [{}] {}'.format(Lno,Fname,Line), level=2) - yield line - f.close() + break + + +# next line generator for fname +def nextLineGen(fname): + global Lno + debug('creating a nextLineGen generator for file ' + fname) + if fname == '-': + fname = 'STDIN' + f = sys.stdin + else: + try: + f = open(fname, 'r') + except: + death(fname + ': error opening file') + Lno = 0 + debug('now reading from file ' + fname) + for Line in f: + Lno += 1 + yield Line + f.close def processFlag(flagSpec): global flags @@ -1089,11 +1142,13 @@ def defang(item): # xxx must be a nonterm or a token name global term # all token names debug('[defang] item={}'.format(item)) - m = re.match(r'<(\w+#?)>(:?\w*)$', item) + m = re.match(r'<(\w*#?)>(:?\w*)$', item) if m: xxx = m.group(1) yyy = m.group(2) - if isTerm(xxx) or isNonterm(xxx): + if xxx == '': + xxx = '$LINE'; + elif isTerm(xxx) or isNonterm(xxx): pass else: deathLNO('malformed "<{}>" in BNF item {}'.format(xxx, item)) @@ -1162,10 +1217,10 @@ def isNonterm(nt): return re.match('[a-z]\w*#?$', nt) def isClass(cls): - return cls == 'void' or re.match('[A-Z][\$\w]*$', cls) + return re.match('[A-Z][\$\w]*$', cls) or cls == 'void' def isTerm(term): - return re.match('[A-Z][A-Z\d_]*$', term) + return re.match('[A-Z][A-Z\d_$]*$', term) or term == '$LINE' def nt2cls(nt): # return the class name of the nonterminal nt