11-- -----------------------------------------------------------------------------
2- --
2+ --
33-- ParseMonad.hs, part of Alex
44--
55-- (c) Simon Marlow 2003
99module ParseMonad (
1010 AlexInput , alexInputPrevChar , alexGetChar , alexGetByte ,
1111 AlexPosn (.. ), alexStartPos ,
12-
12+ Warning ( .. ), warnIfNullable ,
1313 P , runP , StartCode , failP , lookupSMac , lookupRMac , newSMac , newRMac ,
1414 setStartCode , getStartCode , getInput , setInput ,
1515 ) where
@@ -23,7 +23,7 @@ import UTF8
2323#if __GLASGOW_HASKELL__ < 710
2424import Control.Applicative ( Applicative (.. ) )
2525#endif
26- import Control.Monad ( liftM , ap )
26+ import Control.Monad ( liftM , ap , when )
2727import Data.Word (Word8 )
2828-- -----------------------------------------------------------------------------
2929-- The input type
@@ -49,15 +49,15 @@ alexGetChar (_, _ ,_ : _, _) = undefined -- hide compiler warning
4949alexGetByte :: AlexInput -> Maybe (Byte ,AlexInput )
5050alexGetByte (p,c,(b: bs),s) = Just (b,(p,c,bs,s))
5151alexGetByte (_,_,[] ,[] ) = Nothing
52- alexGetByte (p,_,[] ,(c: s)) = let p' = alexMove p c
52+ alexGetByte (p,_,[] ,(c: s)) = let p' = alexMove p c
5353 (b: bs) = UTF8. encode c
5454 in p' `seq` Just (b, (p', c, bs, s))
5555
5656-- -----------------------------------------------------------------------------
5757-- Token positions
5858
5959-- `Posn' records the location of a token in the input text. It has three
60- -- fields: the address (number of chacaters preceding the token), line number
60+ -- fields: the address (number of charaters preceding the token), line number
6161-- and column of a token within the file. `start_pos' gives the position of the
6262-- start of the file and `eof_pos' a standard encoding for the end of file.
6363-- `move_pos' calculates the new position after traversing a given character,
@@ -77,15 +77,22 @@ alexMove (AlexPn a l c) _ = AlexPn (a+1) l (c+1)
7777-- -----------------------------------------------------------------------------
7878-- Alex lexing/parsing monad
7979
80+ data Warning
81+ = WarnNullableRExp
82+ { _warnPos :: AlexPosn -- ^ The position of the code following the regex.
83+ , _warnText :: String -- ^ Warning text.
84+ }
85+
8086type ParseError = (Maybe AlexPosn , String )
8187type StartCode = Int
8288
83- data PState = PState {
84- smac_env :: Map String CharSet ,
85- rmac_env :: Map String RExp ,
86- startcode :: Int ,
87- input :: AlexInput
88- }
89+ data PState = PState
90+ { warnings :: [Warning ] -- ^ Stack of warnings, top = last warning.
91+ , smac_env :: Map String CharSet
92+ , rmac_env :: Map String RExp
93+ , startcode :: Int
94+ , input :: AlexInput
95+ }
8996
9097newtype P a = P { unP :: PState -> Either ParseError (PState ,a ) }
9198
@@ -102,15 +109,27 @@ instance Monad P where
102109 Right (env',ok) -> unP (k ok) env'
103110 return = pure
104111
105- runP :: String -> (Map String CharSet , Map String RExp )
106- -> P a -> Either ParseError a
107- runP str (senv,renv) (P p)
112+ -- | Run the parser on given input.
113+ runP :: String
114+ -- ^ Input string.
115+ -> (Map String CharSet , Map String RExp )
116+ -- ^ Character set and regex definitions.
117+ -> P a
118+ -- ^ Parsing computation.
119+ -> Either ParseError ([Warning ], a )
120+ -- ^ List of warnings in first-to-last order, result.
121+ runP str (senv,renv) (P p)
108122 = case p initial_state of
109123 Left err -> Left err
110- Right (_,a) -> Right a
111- where initial_state =
112- PState { smac_env= senv, rmac_env= renv,
113- startcode = 0 , input= (alexStartPos,' \n ' ,[] ,str) }
124+ Right (s, a) -> Right (reverse (warnings s), a)
125+ where
126+ initial_state = PState
127+ { warnings = []
128+ , smac_env = senv
129+ , rmac_env = renv
130+ , startcode = 0
131+ , input = (alexStartPos, ' \n ' , [] , str)
132+ }
114133
115134failP :: String -> P a
116135failP str = P $ \ PState { input = (p,_,_,_) } -> Left (Just p,str)
@@ -121,24 +140,24 @@ failP str = P $ \PState{ input = (p,_,_,_) } -> Left (Just p,str)
121140
122141lookupSMac :: (AlexPosn ,String ) -> P CharSet
123142lookupSMac (posn,smac)
124- = P $ \ s@ PState { smac_env = senv } ->
143+ = P $ \ s@ PState { smac_env = senv } ->
125144 case Map. lookup smac senv of
126145 Just ok -> Right (s,ok)
127146 Nothing -> Left (Just posn, " unknown set macro: $" ++ smac)
128147
129148lookupRMac :: String -> P RExp
130- lookupRMac rmac
131- = P $ \ s@ PState { rmac_env = renv } ->
149+ lookupRMac rmac
150+ = P $ \ s@ PState { rmac_env = renv } ->
132151 case Map. lookup rmac renv of
133152 Just ok -> Right (s,ok)
134153 Nothing -> Left (Nothing , " unknown regex macro: %" ++ rmac)
135154
136155newSMac :: String -> CharSet -> P ()
137- newSMac smac set
156+ newSMac smac set
138157 = P $ \ s -> Right (s{smac_env = Map. insert smac set (smac_env s)}, () )
139158
140159newRMac :: String -> RExp -> P ()
141- newRMac rmac rexp
160+ newRMac rmac rexp
142161 = P $ \ s -> Right (s{rmac_env = Map. insert rmac rexp (rmac_env s)}, () )
143162
144163setStartCode :: StartCode -> P ()
@@ -152,3 +171,21 @@ getInput = P $ \s -> Right (s, input s)
152171
153172setInput :: AlexInput -> P ()
154173setInput inp = P $ \ s -> Right (s{ input = inp }, () )
174+
175+ -- | Add a warning if given regular expression is nullable
176+ -- unless the user wrote the regex 'Eps'.
177+ warnIfNullable
178+ :: RExp -- ^ Regular expression.
179+ -> AlexPosn -- ^ Position associated to regular expression.
180+ -> P ()
181+ -- If the user wrote @()@, they wanted to match the empty sequence!
182+ -- Thus, skip the warning then.
183+ warnIfNullable Eps _ = return ()
184+ warnIfNullable r pos = when (nullable r) $ P $ \ s ->
185+ Right (s{ warnings = WarnNullableRExp pos w : warnings s}, () )
186+ where
187+ w = unwords
188+ [ " Regular expression"
189+ , show r
190+ , " matches the empty string."
191+ ]
0 commit comments