CSNagoyaの宿題

皆さんがんばって実装しているみたいなので僕も、今回smlでスキャナを実装した。
Gaucheは後回し。すいません。
でも、関数型で実装するイメージはこれでだいぶつかんだと思う。
TextIOモジュールのlookaheadは先読みする関数なので、これを使えば1文字ずつ読み込んでいくスタイルでも難なく実装できた。
とはいえ、けっこう手抜きなところがあって申し訳ないですが。
次回の勉強会ではパーサの実装に入る予定なので、それまでに、Gauche版スキャナを実装せねば。。。

datatype token =
    EOF
  | IDENT of string    | NUMBER of string
  | STR of string
  | MODULE of string   | BEGIN of string
  | END of string      | VAR of string
  | INTEGER of string  | STRING of string
  | IF of string       | THEN of string
  | ELSE of string     | WHILE of string
  | DO of string
  | OPEN    | CLOSE  | PERIOD
  | COMMA   | COLON  | SEMICOLON
  | EQ      | MINUS  | PLUS
  | MULT    | DIV    | LT
  | LE      | GT     | GE
  | NE      | MYASSIGN
  | UNKNOWN


structure T = TextIO
fun skipSpaces ins =
    case T.lookahead ins of
        SOME c => (if Char.isSpace c
                        then (T.input1 ins;skipSpaces ins)
                   else ())
      |  _     => ()

fun getID ins =
    let fun getRest s =
        case T.lookahead ins of
            SOME c => (if Char.isAlphaNum c then getRest (s ^ T.inputN(ins,1))
                       else s)
          | _ => s
    in IDENT(getRest "")
    end


fun isReserved (IDENT(str)) =
    case str of
        "MODULE"  => MODULE(str)
      | "BEGIN"   => BEGIN(str)
      | "END"     => END(str)
      | "VAR"     => VAR(str)
      | "INTEGER" => INTEGER(str)
      | "STRING"  => STRING(str)
      | "IF"      => IF(str)
      | "THEN"    => THEN(str)
      | "ELSE"    => ELSE(str)
      | "WHILE"   => WHILE(str)
      | "DO"      => DO(str)
      | _         => IDENT(str)


fun getNum ins =
    let fun getRest s =
        case T.lookahead ins of
            SOME c => (if Char.isDigit c then getRest (s ^ T.inputN(ins,1))
                       else s)
          | _ => s
    in NUMBER(getRest "")
    end
fun getStr ins =
    let fun getRest s =
        case valOf(T.input1 ins) of
            #"\"" => s
          | c => getRest(s ^ str c)
    in STR(getRest "")
    end

fun lex ins =
    (skipSpaces ins;
     if T.endOfStream ins then EOF
     else let val c = valOf(T.lookahead ins)
          in if Char.isDigit c then getNum ins
             else if Char.isAlpha c then isReserved(getID ins)
                  else case valOf (T.input1 ins) of
                      #"+"  => PLUS
                    | #"-"  => MINUS
                    | #"*"  => MULT
                    | #"/"  => DIV
                    | #"="  => EQ
                    | #"<"  => LT
                    | #">"  => GT
                    | #":"  => COLON
                    | #";"  => SEMICOLON
                    | #","  => COMMA
                    | #"("  => OPEN
                    | #")"  => CLOSE
                    | #"."  => PERIOD
                    | #"\"" => getStr ins
                    | _ => UNKNOWN
          end)


fun toString (IDENT(str)) = "ID(" ^ str ^ ")"
  | toString (NUMBER(str)) = "NUMBER(" ^ str ^ ")"
  | toString (STR(str)) = "STR(" ^ str ^ ")"
  | toString (MODULE(str)) = "MODULE(" ^ str ^ ")"
  | toString (BEGIN(str)) = "BEGIN(" ^ str ^ ")"
  | toString PLUS = "PLUS"
  | toString MINUS = "MINUS"
  | toString MULT = "MULT"
  | toString DIV = "DIV"
  | toString EQ = "EQ"
  | toString LT = "LT"
  | toString RT = "RT"

fun testLex () =
    let val token = lex TextIO.stdIn
        in case token of
            EOF => ()
          | _ => (print (toString token ^ "\n"); testLex())
    end