Scan.hs 5.83 KB
Newer Older
Akim Demaille's avatar
Akim Demaille committed
1 2 3
module Scan (end, scan)
where

4
import Data.Char (isSpace, isAlpha, isAlphaNum, isDigit, isOctDigit, isHexDigit,
5
	     digitToInt, chr)
6
import Data.List (isPrefixOf)
Akim Demaille's avatar
Akim Demaille committed
7 8 9

import Ir
import Token
10
import Location (Loc (Loc), incc, incl, incchar, incs, merge, advance)
Akim Demaille's avatar
Akim Demaille committed
11 12 13 14 15 16 17 18

end :: [Tok] -> [Tok]
end []                           = []
end (TokSeq l : TokEnd l' : tokens)   = (TokSeqEnd (merge l l') : end tokens)
end (TokCall l : TokEnd l' : tokens)  = (TokCallEnd (merge l l') : end tokens)
end (TokLabel l : TokEnd l' : tokens) = (TokLabelEnd (merge l l') : end tokens)
end (token : tokens)             = (token : end tokens)

19
-- Initial part of an identifier: [a-zA-Z$_()+-*/%<>=].
20 21
isIdent :: Char -> Bool
isIdent c
22
  | isAlpha c      = True
23
  | c `elem` "$_()+-*/%<>=" = True
24
  | otherwise      = False
25

26
-- Part of an identifier: [a-zA-Z0-9$_()+-*/%<>=].
27 28
isIdentNum :: Char -> Bool
isIdentNum c
29 30 31
  | isIdent c = True
  | isDigit c = True
  | otherwise = False
32

Akim Demaille's avatar
Akim Demaille committed
33 34 35
scan :: String -> Loc -> [Tok]
scan [] loc = []

36
-- White spaces
Akim Demaille's avatar
Akim Demaille committed
37 38 39
scan ('\n' : cs) loc = scan cs (incl 1 loc)
scan ('\r' : cs) loc = scan cs (incl 1 loc)

40 41 42 43
-- Comments
scan ('/' : '*' : cs) loc = scanComment "*/" cs (incc 2 loc)
scan ('#' : cs) loc = scanComment "\n" cs (incc 1 loc)

Akim Demaille's avatar
Akim Demaille committed
44 45
scan ('"' : cs) loc = scanLiteral cs (incc 1 $ advance loc)

46
-- Negative int
Akim Demaille's avatar
Akim Demaille committed
47 48 49 50 51 52
scan ('-' : input@(c : cs)) loc
    | isDigit c = scanTokInt False (c : cs) (incc 1 $ advance loc)

scan input@(c : cs) loc
    | isSpace c = scan cs (incc 1 loc)
    | isDigit c = scanTokInt True input (advance loc)
53
    | isIdent c = scanTokString input (advance loc)
Akim Demaille's avatar
Akim Demaille committed
54 55
    | otherwise = error (show loc ++ ": unexpected character: " ++ [c])

56
scanComment :: String -> String -> Loc -> [Tok]
57 58 59
scanComment close cs loc
  | isPrefixOf close cs = scan (drop (length close) cs) (incs close loc)
  | otherwise           = scanComment close (tail cs) (incchar (head cs) loc)
Akim Demaille's avatar
Akim Demaille committed
60 61 62 63 64 65 66

scanLiteral :: String -> Loc -> [Tok]
scanLiteral input loc =
    case scanLiteralContent input loc of
	(string, loc, tokens) -> TokLiteral (string, loc) : tokens

escapeToChar :: Char -> Char
67 68 69 70 71 72 73
escapeToChar 'a'  = '\a'
escapeToChar 'b'  = '\b'
escapeToChar 'f'  = '\f'
escapeToChar 'n'  = '\n'
escapeToChar 'r'  = '\r'
escapeToChar 't'  = '\t'
escapeToChar 'v'  = '\v'
Akim Demaille's avatar
Akim Demaille committed
74
escapeToChar '\\' = '\\'
75 76
escapeToChar '"'  = '"'
escapeToChar '\'' = '\''
Akim Demaille's avatar
Akim Demaille committed
77 78 79 80 81 82 83 84 85 86 87 88 89

scanLiteralContent :: String -> Loc -> (String, Loc, [Tok])
scanLiteralContent ('\\' : c : cs) loc
    | c `elem` "abfnrtv\\\"'" =
    case scanLiteralContent cs (incc 2 loc) of
	 (string, loc, tokens) -> (escapeToChar c : string, loc, tokens)

scanLiteralContent ('\\' : 'x' : h : l : cs) loc
    | isHexDigit h && isHexDigit l =
	case scanLiteralContent cs (incc 4 loc) of
	     (string, loc, tokens) -> (c : string, loc, tokens)
	    where c = chr ((digitToInt h) * 16 + (digitToInt l))

90 91 92 93 94 95 96 97
scanLiteralContent ('\\' : h : m : l : cs) loc
    | isOctDigit h && isOctDigit m && isOctDigit l =
	case scanLiteralContent cs (incc 4 loc) of
	     (string, loc, tokens) -> (c : string, loc, tokens)
	    where c = chr ((digitToInt h) * 64
                           + (digitToInt m) * 8
                           + (digitToInt l))

98 99 100
scanLiteralContent ('\\' : c : cs) loc =
    error (show loc ++ ": unexpected escape: \\" ++ [c])

Akim Demaille's avatar
Akim Demaille committed
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
scanLiteralContent ('\n' : cs) loc =
    case scanLiteralContent cs (incl 1 loc) of
         (string, loc, tokens) -> ('\n' : string, loc, tokens)

scanLiteralContent ('"' : cs) loc =
    ("", incc 1 loc, scan cs (incc 1 loc))

scanLiteralContent (c : cs) loc =
    case scanLiteralContent cs (incc 1 loc) of
         (string, loc, tokens) -> (c : string, loc, tokens)

scanTokInt :: Bool -> String -> Loc -> [Tok]
scanTokInt True input l =
    TokInt ((read head), loc) : scan tail loc
    where (head, tail) = span isDigit input
	  size         = length head
	  loc          = incc size l

scanTokInt False input l =
    TokInt (-(read head), loc) : scan tail loc
    where (head, tail) = span isDigit input
	  size         = length head
	  loc          = incc size l

scanTokString :: String -> Loc -> [Tok]
scanTokString input l =
    scanTokKeyword head loc : scan tail loc
128
    where (head, tail) = span isIdentNum input
Akim Demaille's avatar
Akim Demaille committed
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
	  size         = length head
	  loc          = incc size l

scanTokKeyword :: String -> Loc -> Tok
scanTokKeyword "binop" loc = TokBinop loc
scanTokKeyword "cjump" loc = TokCJump loc
scanTokKeyword "call" loc  = TokCall loc
scanTokKeyword "const" loc = TokConst loc
scanTokKeyword "end" loc   = TokEnd loc
scanTokKeyword "eseq" loc  = TokESeq loc
scanTokKeyword "frame" loc = TokFrame loc
scanTokKeyword "sxp" loc   = TokExp loc
scanTokKeyword "jump" loc  = TokJump loc
scanTokKeyword "label" loc = TokLabel loc
scanTokKeyword "mem" loc   = TokMem loc
scanTokKeyword "move" loc  = TokMove loc
scanTokKeyword "name" loc  = TokName loc
scanTokKeyword "seq" loc   = TokSeq loc
scanTokKeyword "temp" loc  = TokTemp loc
148 149 150 151 152 153 154 155 156 157 158 159 160

scanTokKeyword "add" loc   = TokOp (Add, loc)
scanTokKeyword "mul" loc   = TokOp (Mul, loc)
scanTokKeyword "sub" loc   = TokOp (Sub, loc)
scanTokKeyword "div" loc   = TokOp (Div, loc)
scanTokKeyword "mod" loc   = TokOp (Mod, loc)

scanTokKeyword "(+)" loc   = TokOp (Add, loc)
scanTokKeyword "(*)" loc   = TokOp (Mul, loc)
scanTokKeyword "(-)" loc   = TokOp (Sub, loc)
scanTokKeyword "(/)" loc   = TokOp (Div, loc)
scanTokKeyword "(%)" loc   = TokOp (Mod, loc)

Akim Demaille's avatar
Akim Demaille committed
161 162 163 164 165 166
scanTokKeyword "eq" loc    = TokRelop (Eq, loc)
scanTokKeyword "ne" loc    = TokRelop (Ne, loc)
scanTokKeyword "lt" loc    = TokRelop (Lt, loc)
scanTokKeyword "gt" loc    = TokRelop (Gt, loc)
scanTokKeyword "le" loc    = TokRelop (Le, loc)
scanTokKeyword "ge" loc    = TokRelop (Ge, loc)
167 168 169 170

scanTokKeyword "(=)"  loc = TokRelop (Eq, loc)
scanTokKeyword "(<>)" loc = TokRelop (Ne, loc)
scanTokKeyword "(<)"  loc = TokRelop (Lt, loc)
171 172
scanTokKeyword "(<=)" loc = TokRelop (Le, loc)
scanTokKeyword "(>)"  loc = TokRelop (Gt, loc)
173 174
scanTokKeyword "(>=)" loc = TokRelop (Ge, loc)

Akim Demaille's avatar
Akim Demaille committed
175
scanTokKeyword str loc     = TokString (str, loc)