signature PARSER = sig datatype stp = TpBase of string (* base type *) | TpFun of stp * stp (* function type *) | TpTup of stp list (* tuple type *) | TpExn (* exception type *) | TpRef of stp (* reference type *) (* existential type variable *) | TpExi of {name: int, link: stp option ref} (* universal type variable *) | TpUni of int type ostp = stp option datatype ttm = TtmBool of bool (* boolean constant *) | TtmInt of int (* integer constant *) | TtmStr of string (* string constant *) | TtmVar of string (* variable *) | TtmIf of ttm * ttm * ttm (* if-then-else term *) | TtmOp of string * ttm list (* built-in operator *) | TtmApp of ttm * ttm (* application *) | TtmLet of string * ttm * ttm (* let-binding *) | TtmLetrec of string * ostp * ttm * ttm (* letrec-binding *) | TtmTup of ttm list (* tuple *) | TtmPro of ttm * int (* projection *) | TtmFix of ttm (* fixed point *) | TtmAsc of ttm * stp (* ascription *) | TtmExn of string (* exception *) | TtmTry of ttm * (ttm * ttm) list (* try ... with ... *) (* nonrecursive function *) | TtmFn of (string * ostp) list * ostp * ttm (* recursive function *) | TtmFun of string * (string * ostp) list * ostp * ttm (* mutually recursive functions *) | TtmFuns of (string * (string * ostp) list * ostp * ttm) list | TtmChoose of ttm * int (* sequence *) | TtmSeq of ttm list val parseFile: string -> ttm val parseKeybd: unit -> ttm val parseString: string -> ttm end structure Parser :> PARSER = struct structure T = Tokens structure E = Error (val defaultMaxErrors=0 val defaultWarningThreshold=0) open Parsing infixr 4 << >> infixr 3 && infix 2 -- ## infix 2 wth suchthat return guard infixr 1 || exception Fatal fun fatal (msg: string) = (print msg; raise Fatal) fun error (msg: string) (pos: Pos.T) = (E.error " in parser.sml: " msg pos; raise Fatal) val LPAREN = T.litWord "(" val RPAREN = T.litWord ")" val BAR = T.litWord "|" val COLON = T.litWord ":" val SEMICOLON = T.litWord ";" val COMMA = T.litWord "," val DOT = T.litWord "." val SHARP = T.litWord "#" val EQ = T.litWord "=" val EQGT = T.litWord "=>" val MINUSGT = T.litWord "->" val TILDE = T.litWord "~" val PLUS = T.litWord "+" val MINUS = T.litWord "-" val STAR = T.litWord "*" val SLASH = T.litWord "/" val LT = T.litWord "<" val LTEQ = T.litWord "<=" val GT = T.litWord ">" val GTEQ = T.litWord ">=" val LTGT = T.litWord "<>" val AND = T.litWord "and" val ELSE = T.litWord "else" val FIX = T.litWord "fix" val FN = T.litWord "fn" val FUN = T.litWord "fun" val IF = T.litWord "if" val IN = T.litWord "in" val IS = T.litWord "is" val LAM = T.litWord "lam" val LET = T.litWord "let" val LETREC = T.litWord "letrec" val MOD = T.litWord "mod" val PRINT = T.litWord "print" val THEN = T.litWord "then" val TREF = T.litWord "ref" val TEXN = T.litWord "exn" val REF = T.litWord "ref" val BANG = T.litWord "!" val COLONEQ = T.litWord ":=" val RAISE = T.litWord "raise" val TRY = T.litWord "try" val WITH = T.litWord "with" val keywords: string list = ["and", "else", "fix", "fn", "fun", "if", "in", "lam", "let", "letrec", "mod", "print", "raise", "ref", "then", "try", "with"] fun isLetter (c: char): bool = Char.isAlpha c orelse Char.contains "_'" c fun isLetterDigit (c: char): bool = isLetter c orelse Char.isDigit c fun isKeyword (s: string): bool = List.exists (fn x => x = s) keywords fun isIdent (s: string) = let val cs = explode s in case cs of [] => false | c :: cs => (isLetter c) andalso (List.all isLetterDigit cs) end fun isVar (s: string): bool = not (isKeyword s) andalso isIdent s (* we first build a parser for parsing types *) datatype stp = TpBase of string | TpFun of stp * stp | TpTup of stp list | TpExn | TpRef of stp | TpExi of {name: int, link: stp option ref} | TpUni of int type ostp = stp option val unitType = T.litWord "unit" return TpTup [] fun isBaseType (s: string): bool = isIdent s val baseType = T.anyWord suchthat isBaseType fun typ0 () = unitType || TEXN return TpExn || TREF >> $typ0 wth (fn t => TpRef t) || baseType wth (fn name => TpBase name) || LPAREN >> $typ << RPAREN and typ1 () = $typ0 && $typ1' wth (fn (t, []) => t | (t, ts) => TpTup (t :: ts)) and typ1' () = repeat (STAR >> $typ0) and typ () = $typ1 && $typ' wth (fn (t, k) => k (t)) and typ' () = MINUSGT >> $typ wth (fn t => fn t' => TpFun (t', t)) || succeed (fn t => t) (* we now build a parser for parsing terms *) datatype ttm = TtmBool of bool | TtmInt of int | TtmStr of string | TtmVar of string | TtmIf of ttm * ttm * ttm | TtmOp of string * ttm list | TtmApp of ttm * ttm | TtmLet of string * ttm * ttm | TtmLetrec of string * ostp * ttm * ttm | TtmTup of ttm list | TtmPro of ttm * int | TtmFix of ttm | TtmAsc of ttm * stp | TtmExn of string | TtmTry of ttm * (ttm * ttm) list | TtmFn of (string * ostp) list * ostp * ttm | TtmFun of string * (string * ostp) list * ostp * ttm | TtmFuns of (string * (string * ostp) list * ostp * ttm) list | TtmChoose of ttm * int | TtmSeq of ttm list val ttmBool = T.litWord "true" return TtmBool true || T.litWord "false" return TtmBool false val ttmInt = T.anyInteger wth (fn i => TtmInt i) val ttmStr = T.anyString wth (fn s => TtmStr s) val ttmExn = T.litWord "Exit" return TtmExn "Exit" || T.litWord "Error" return TtmExn "Error" val ttmConst = ttmBool || ttmInt || ttmStr || ttmExn val var = T.anyWord suchthat isVar val ttmVar = var wth (fn name => TtmVar name) fun otyp () = COLON >> $typ wth (fn tp => SOME tp) || succeed NONE fun varotyp () = var wth (fn x => (x, NONE)) || LPAREN >> var && $otyp << RPAREN wth (fn (x, otp) => (x, otp)) fun varotyps () = repeat1 ($varotyp) fun negOp (t: ttm): ttm = TtmOp ("~", [t]) fun plusOp (t1: ttm, t2: ttm): ttm = TtmOp ("+", [t1, t2]) fun minusOp (t1:ttm, t2: ttm): ttm = TtmOp ("-", [t1, t2]) fun starOp (t1: ttm, t2: ttm): ttm = TtmOp ("*", [t1, t2]) fun slashOp (t1: ttm, t2: ttm): ttm = TtmOp ("/", [t1, t2]) fun modOp (t1: ttm, t2: ttm): ttm = TtmOp ("mod", [t1, t2]) fun ltOp (t1: ttm, t2: ttm): ttm = TtmOp ("<", [t1, t2]) fun lteqOp (t1: ttm, t2: ttm): ttm = TtmOp ("<=", [t1, t2]) fun gtOp (t1: ttm, t2: ttm): ttm = TtmOp (">", [t1, t2]) fun gteqOp (t1: ttm, t2: ttm): ttm = TtmOp (">=", [t1, t2]) fun eqOp (t1: ttm, t2: ttm): ttm = TtmOp ("=", [t1, t2]) fun ltgtOp (t1: ttm, t2:ttm): ttm = TtmOp ("<>", [t1, t2]) fun printOp (t: ttm): ttm = TtmOp ("print", [t]) fun refOp (t: ttm): ttm = TtmOp ("ref", [t]) fun derefOp (t: ttm): ttm = TtmOp ("!", [t]) fun assignOp (t1: ttm, t2: ttm): ttm = TtmOp (":=", [t1, t2]) fun sequenceOp (t1: ttm, t2: ttm): ttm = TtmOp (";", [t1, t2]) fun raiseOp (t: ttm): ttm = TtmOp ("raise", [t]) val operator = TILDE return Prefix (4, negOp) || PLUS return Infix (LeftAssoc, 2, plusOp) || MINUS return Infix (LeftAssoc, 2, minusOp) || STAR return Infix (LeftAssoc, 3, starOp) || SLASH return Infix (LeftAssoc, 3, slashOp) || MOD return Infix (LeftAssoc, 3, modOp) || LT return Infix (NonAssoc, 1, ltOp) || LTEQ return Infix (NonAssoc, 1, lteqOp) || GT return Infix (NonAssoc, 1, gtOp) || GTEQ return Infix (NonAssoc, 1, gteqOp) || EQ return Infix (NonAssoc, 1, eqOp) || LTGT return Infix (NonAssoc, 1, ltgtOp) || PRINT return Prefix (4, printOp) || (* for references *) REF return Prefix (4, refOp) || BANG return Prefix (4, derefOp) || COLONEQ return Infix (NonAssoc, 0, assignOp) || (* for exceptions *) RAISE return Prefix (4, raiseOp) || (* sequence *) SEMICOLON return Infix (RightAssoc, ~1, sequenceOp) val projectLabel = DOT >> T.anyInteger suchthat (fn (n: int) => n > 0) val chooseLabel = SHARP >> T.anyInteger suchthat (fn (n: int) => n > 0) val pat = ttmConst || ttmVar fun ttm0 () = ttmConst || ttmVar || LPAREN >> $ttm && COLON >> $typ << RPAREN wth (fn (t, tp) => TtmAsc (t, tp)) || LPAREN >> $ttms << RPAREN wth (fn [t] => t | ts => TtmTup ts) and ttm1' () = projectLabel && $ttm1' wth (fn (n, k) => (fn t => k (TtmPro (t, n)))) || chooseLabel && $ttm1' wth (fn (n, k) => (fn t => k (TtmChoose (t, n)))) || $ttm0 && $ttm1' wth (fn (t, k) => (fn t' => k (TtmApp (t', t)))) || succeed (fn t => t) and ttm1 () = $ttm0 && $ttm1' wth (fn (t, k) => k (t)) and ttm1_or_operator () = $ttm1 wth (fn t => Atm t) || operator wth (fn opr => Opr opr) and ttm () = operator && repeat1 ($ttm1_or_operator) wth (fn (opr, items) => result (resolveFixity (Opr opr :: items))) || $ttm1 && $ttm' wth (fn (t, k) => k (t)) || IF >> $ttm && THEN >> $ttm && ELSE >> $ttm wth (fn (t1, (t2, t3)) => TtmIf (t1, t2, t3)) || LET >> var && EQ >> $ttm && IN >> $ttm wth (fn (x, (t1, t2)) => TtmLet (x, t1, t2)) || LETREC >> var && $otyp && EQ >> $ttm && IN >> $ttm wth (fn (x, (otp, (t1, t2))) => TtmLetrec (x, otp, t1, t2)) || FN >> $varotyps && $otyp && EQGT >> $ttm wth (fn (xotps, (otp, t)) => TtmFn (xotps, otp, t)) || FIX >> $ttm0 wth (fn t => TtmFix t) || TRY >> $ttm && WITH >> $clauses wth (fn (t, cs) => TtmTry (t, cs)) || FUN >> var && $varotyps && $otyp && EQGT >> $ttm && (repeat ($funs)) wth (fn (f, (xotps, (otp, (t, fs)))) => case fs of [] => TtmFun (f, xotps, otp, t) | _ => TtmFuns ((f, xotps, otp, t) :: fs)) and ttm' () = operator && repeat1 ($ttm1_or_operator) wth (fn (opr, items) => fn t => result (resolveFixity (Atm t :: Opr opr :: items))) || succeed (fn t => t) and ttms () = $ttm && repeat (COMMA >> $ttm) wth (fn (t, ts) => t :: ts) || succeed [] and funs () = AND >> var && $varotyps && $otyp && EQGT >> $ttm wth (fn (f, (xotps, (otp, t))) => (f, xotps, otp, t)) and clause () = pat && EQGT >> $ttm and clauses () = separate ($clause) BAR fun parseFile file = let val s = Input.readFile file val s = Pos.markStream s val s = transform Tokens.token s in let val (x, _, _, _) = ($ttm -- done) (Pos.initpos, s) in x end handle Fail err => error "Syntax error" err end fun parseKeybd () = let val s = Input.readKeybd () val s = Pos.markStream s val s = transform Tokens.token s in let val (x, _, _, _) = ($ttm -- done) (Pos.initpos, s) in x end handle Fail err => error "Syntax error" err end fun parseString str = let val s = Input.readString str val s = Pos.markStream s val s = transform Tokens.token s in let val (x, _, _, _) = ($ttm -- done) (Pos.initpos, s) in x end handle Fail err => error "Syntax error" err end val _ = Compiler.Control.Print.printDepth := 50 end