From 874c50b8fd3e4a17348f6f2bd43dfd543567c8e0 Mon Sep 17 00:00:00 2001 From: "Anton Vl. Kalinin" Date: Thu, 4 Aug 2022 14:41:03 +0300 Subject: [PATCH] [ haskell, #423 ] structured errors in the Haskell backend New options are introduced: "--structured-errors" and "--string-errors". They can specify the parser failure type. --- source/src/BNFC/Backend/Haskell.hs | 22 ++-- source/src/BNFC/Backend/Haskell/CFtoAlex3.hs | 7 +- source/src/BNFC/Backend/Haskell/CFtoHappy.hs | 122 +++++++++++++++---- source/src/BNFC/Backend/Haskell/MkErrM.hs | 11 +- source/src/BNFC/Backend/HaskellGADT.hs | 4 +- source/src/BNFC/Options.hs | 17 ++- source/test/BNFC/Backend/HaskellSpec.hs | 5 + source/test/BNFC/Hspec.hs | 10 ++ 8 files changed, 159 insertions(+), 39 deletions(-) diff --git a/source/src/BNFC/Backend/Haskell.hs b/source/src/BNFC/Backend/Haskell.hs index 93d3600f..293b8c20 100644 --- a/source/src/BNFC/Backend/Haskell.hs +++ b/source/src/BNFC/Backend/Haskell.hs @@ -33,7 +33,7 @@ import qualified BNFC.Backend.Common.Makefile as Makefile import BNFC.CF import BNFC.Options ( SharedOptions(..), TokenText(..), AlexVersion(..), HappyMode(..) - , isDefault, printOptions + , isDefault, printOptions, ErrorType (..) ) import BNFC.Utils (when, table, getZonedTimeTruncatedToSeconds) @@ -68,12 +68,12 @@ makeHaskell opts cf = do -- Generate Happy parser and matching test program. do mkfile (happyFile opts) commentWithEmacsModeHint $ - cf2Happy parMod absMod lexMod (glr opts) (tokenText opts) (functor opts) cf + cf2Happy parMod absMod lexMod (glr opts) (tokenText opts) (functor opts) (errorType opts) cf -- liftIO $ printf "%s Tested with Happy 1.15\n" (happyFile opts) mkfile (tFile opts) comment $ testfile opts cf -- Both Happy parser and skeleton (template) rely on Err. - mkfile (errFile opts) comment $ mkErrM errMod + mapM_ (mkfile (errFile opts) comment) $ mkErrM errMod (errorType opts) mkfile (templateFile opts) comment $ cf2Template (templateFileM opts) absMod (functor opts) cf -- Generate txt2tags documentation. @@ -335,7 +335,7 @@ testfile opts cf = unlines $ concat $ [ [ [ "import " , absFileM opts , " (" ++ if_glr impTopCat ++ ")" ] ] , [ [ "import " , layoutFileM opts , " ( resolveLayout )" ] | lay ] , [ [ "import " , alexFileM opts , " ( Token, mkPosToken )" ] - , [ "import " , happyFileM opts , " ( " ++ impParser ++ ", myLexer" ++ impParGLR ++ " )" ] + , [ "import " , happyFileM opts , " ( " ++ impParser ++ ", myLexer" ++ impParGLR ++ ", Err )" ] , [ "import " , printerFileM opts , " ( Print, printTree )" ] , [ "import " , templateFileM opts , " ()" ] ] @@ -344,7 +344,6 @@ testfile opts cf = unlines $ concat $ , [ "import qualified Data.Map ( Map, lookup, toList )" | use_glr ] , [ "import Data.Maybe ( fromJust )" | use_glr ] , [ "" - , "type Err = Either String" , if use_glr then "type ParseFun a = [[Token]] -> (GLRResult, GLR_Output (Err a))" else "type ParseFun a = [Token] -> Err a" @@ -357,7 +356,7 @@ testfile opts cf = unlines $ concat $ , "runFile v p f = putStrLn f >> readFile f >>= run v p" , "" , "run :: (" ++ xpr ++ if_glr "TreeDecode a, " ++ "Print a, Show a) => Verbosity -> ParseFun a -> " ++ tokenTextType (tokenText opts) ++ " -> IO ()" - , (if use_glr then runGlr else runStd use_xml) myLLexer + , if use_glr then runGlr myLLexer else runStd use_xml myLLexer (errorType opts) , "showTree :: (Show a, Print a) => Int -> a -> IO ()" , "showTree v tree = do" , " putStrV v $ \"\\n[Abstract Syntax]\\n\\n\" ++ show tree" @@ -408,8 +407,8 @@ testfile opts cf = unlines $ concat $ (hasTopLevelLayout, layoutKeywords, _) = layoutPragmas cf useTopLevelLayout = isJust hasTopLevelLayout -runStd :: Bool -> (String -> String) -> String -runStd xml myLLexer = unlines $ concat +runStd :: Bool -> (String -> String) -> ErrorType -> String +runStd xml myLLexer errorType = unlines $ concat [ [ "run v p s =" , " case p ts of" , " Left err -> do" @@ -417,8 +416,11 @@ runStd xml myLLexer = unlines $ concat , " putStrV v \"Tokens:\"" , " mapM_ (putStrV v . showPosToken . mkPosToken) ts" -- , " putStrV v $ show ts" - , " putStrLn err" - , " exitFailure" + ] + , case errorType of + ErrorTypeString -> [ " putStrLn err" ] + ErrorTypeStructured -> [ " putStrLn $ \"Error: \" ++ show err" ] + , [ " exitFailure" , " Right tree -> do" , " putStrLn \"\\nParse Successful!\"" , " showTree v tree" diff --git a/source/src/BNFC/Backend/Haskell/CFtoAlex3.hs b/source/src/BNFC/Backend/Haskell/CFtoAlex3.hs index 65d0dbf5..019ddf7b 100644 --- a/source/src/BNFC/Backend/Haskell/CFtoAlex3.hs +++ b/source/src/BNFC/Backend/Haskell/CFtoAlex3.hs @@ -268,8 +268,11 @@ restOfAlex tokenText cf = concat , "-- A modified \"posn\" wrapper." , "-------------------------------------------------------------------" , "" - , "data Posn = Pn !Int !Int !Int" - , " deriving (Eq, Show, Ord)" + , "data Posn = Pn" + , " { pnAbsolute :: !Int" + , " , pnLine :: !Int" + , " , pnColumn :: !Int" + , " } deriving (Eq, Show, Ord)" , "" , "alexStartPos :: Posn" , "alexStartPos = Pn 0 1 1" diff --git a/source/src/BNFC/Backend/Haskell/CFtoHappy.hs b/source/src/BNFC/Backend/Haskell/CFtoHappy.hs index f21f9aba..4efe6800 100644 --- a/source/src/BNFC/Backend/Haskell/CFtoHappy.hs +++ b/source/src/BNFC/Backend/Haskell/CFtoHappy.hs @@ -17,7 +17,7 @@ import Data.List (intersperse) import BNFC.CF import BNFC.Backend.Common.StrUtils (escapeChars) import BNFC.Backend.Haskell.Utils -import BNFC.Options (HappyMode(..), TokenText(..)) +import BNFC.Options (HappyMode(..), TokenText(..), ErrorType(..)) import BNFC.PrettyPrint import BNFC.Utils @@ -42,9 +42,10 @@ cf2Happy -> HappyMode -- ^ Happy mode. -> TokenText -- ^ Use @ByteString@ or @Text@? -> Bool -- ^ AST is a functor? + -> ErrorType -- ^ The error type in the parser result type. -> CF -- ^ Grammar. -> String -- ^ Generated code. -cf2Happy name absName lexName mode tokenText functor cf = unlines +cf2Happy name absName lexName mode tokenText functor errorType cf = unlines [ header name absName lexName tokenText eps , render $ declarations mode functor eps , render $ tokens cf functor @@ -52,7 +53,7 @@ cf2Happy name absName lexName mode tokenText functor cf = unlines , specialRules absName functor tokenText cf , render $ prRules absName functor (rulesForHappy absName functor cf) , "" - , footer absName tokenText functor eps cf + , footer absName tokenText functor eps errorType cf ] where eps = toList $ allEntryPoints cf @@ -66,7 +67,14 @@ header modName absName lexName tokenText eps = unlines $ concat , "{-# LANGUAGE PatternSynonyms #-}" , "" , "module " ++ modName - , " ( happyError" + , " ( Err" + , " , Failure(..)" + , " , InvalidTokenFailure(..)" + , " , UnexpectedTokenFailure(..)" + , " , UnexpectedEofFailure(..)" + -- TODO: maybe we should stop exporting happyError, since there is no reason + -- to use it outside and its type can vary? + , " , happyError" , " , myLexer" ] , map ((" , " ++) . render . parserName) eps @@ -91,6 +99,8 @@ header modName absName lexName tokenText eps = unlines $ concat -- -- no lexer declaration -- %monad { Err } { (>>=) } { return } -- %tokentype {Token} +-- %errorhandlertype explist +-- %error { happyError } -- -- >>> declarations Standard True [Cat "A", Cat "B", ListCat (Cat "B")] -- %name pA_internal A @@ -99,14 +109,18 @@ header modName absName lexName tokenText eps = unlines $ concat -- -- no lexer declaration -- %monad { Err } { (>>=) } { return } -- %tokentype {Token} +-- %errorhandlertype explist +-- %error { happyError } declarations :: HappyMode -> Bool -> [Cat] -> Doc declarations mode functor ns = vcat [ vcat $ map generateP ns , case mode of Standard -> "-- no lexer declaration" - GLR -> "%lexer { myLexer } { Err _ }", - "%monad { Err } { (>>=) } { return }", - "%tokentype" <+> braces (text tokenName) + GLR -> "%lexer { myLexer } { Err _ }" + , "%monad { Err } { (>>=) } { return }" + , "%tokentype" <+> braces (text tokenName) + , "%errorhandlertype explist" + , "%error { happyError }" ] where generateP n = "%name" <+> parserName n <> (if functor then "_internal" else "") <+> text (identCat n) @@ -255,24 +269,88 @@ prRules absM functor = vsep . map prOne -- Finally, some haskell code. -footer :: ModuleName -> TokenText -> Bool -> [Cat] -> CF -> String -footer absName tokenText functor eps _cf = unlines $ concat +footer :: ModuleName -> TokenText -> Bool -> [Cat] -> ErrorType -> CF -> String +footer absName tokenText functor eps errorType _cf = unlines $ concat [ [ "{" , "" - , "type Err = Either String" + , "-- | The parser failure type." + , "--" + , "-- It contains values of more specific failure record types, so that they" + , "-- could easily be extended with new fields." + , "data Failure" + , " = FailureInvalidToken !InvalidTokenFailure" + , " | FailureUnexpectedToken !UnexpectedTokenFailure" + , " | FailureUnexpectedEof !UnexpectedEofFailure" + , " deriving (Show, Eq)" , "" - , "happyError :: [" ++ tokenName ++ "] -> Err a" - , "happyError ts = Left $" - , " \"syntax error at \" ++ tokenPos ts ++ " - , " case ts of" - , " [] -> []" - , " [Err _] -> \" due to lexer error\"" - , unwords - [ " t:_ -> \" before `\" ++" - , "(prToken t)" - -- , tokenTextUnpack tokenText "(prToken t)" - , "++ \"'\"" - ] + , "-- | The lexer error type." + , "newtype InvalidTokenFailure = InvalidTokenFailure" + , " { itfPosn :: Posn -- ^ The position of the beginning of an invalid token." + , " } deriving (Show, Eq)" + , "" + , "-- | The parser error: no production is found to match a token." + , "data UnexpectedTokenFailure = UnexpectedTokenFailure" + , " { utfPosn :: !Posn -- ^ The position of the beginning of the unexpected token." + , " , utfTokenText :: !(" ++ tokenTextType tokenText ++ ")" + , " , utfExpectedTokens :: [String] -- ^ Names of possible tokens at this position according to the grammar." + , " } deriving (Show, Eq)" + , "" + , "-- | The parser error: the end of file is encountered but a token is expected." + , "newtype UnexpectedEofFailure = UnexpectedEofFailure" + , " { ueofExpectedTokens :: [String] -- ^ Names of possible tokens at this position according to the grammar." + , " } deriving (Show, Eq)" + , "" + ] + , case errorType of + ErrorTypeStructured -> + [ "type Err = Either Failure" + , "" + , "happyError :: ([" ++ tokenName ++ "], [String]) -> Err a" + , "happyError = Left . uncurry mkFailure" + ] + ErrorTypeString -> + [ "type Err = Either String" + , "" + , "happyError :: ([" ++ tokenName ++ "], [String]) -> Err a" + , "happyError = Left . failureToString . uncurry mkFailure" + , "" + , "failureToString :: Failure -> String" + , "failureToString f =" + , " \"syntax error at \" ++ pos ++ " + , " case f of" + , " FailureUnexpectedEof _ -> []" + , " FailureInvalidToken _ -> \" due to lexer error\"" + , unwords + [ " FailureUnexpectedToken ut -> \" before `\" ++" + , tokenTextUnpack tokenText "(utfTokenText ut)" + , "++ \"'\"" + ] + , " where" + , " pos = case f of" + , " FailureInvalidToken it -> printPosn (itfPosn it)" + , " FailureUnexpectedToken ut -> printPosn (utfPosn ut)" + , " FailureUnexpectedEof _ -> \"end of file\"" + ] + , [ "" + , "mkFailure :: [" ++ tokenName ++ "] -> [String] -> Failure" + , "mkFailure ts expectedTokens = case ts of" + , " [] ->" + , " FailureUnexpectedEof" + , " UnexpectedEofFailure" + , " { ueofExpectedTokens = expectedTokens" + , " }" + , " [Err pos] ->" + , " FailureInvalidToken" + , " InvalidTokenFailure" + , " { itfPosn = pos" + , " }" + , " t : _ ->" + , " FailureUnexpectedToken" + , " UnexpectedTokenFailure" + , " { utfPosn = tokenPosn t" + , " , utfTokenText = tokenText t" + , " , utfExpectedTokens = expectedTokens" + , " }" , "" , "myLexer :: " ++ tokenTextType tokenText ++ " -> [" ++ tokenName ++ "]" , "myLexer = tokens" diff --git a/source/src/BNFC/Backend/Haskell/MkErrM.hs b/source/src/BNFC/Backend/Haskell/MkErrM.hs index 6c452577..b9f099fe 100644 --- a/source/src/BNFC/Backend/Haskell/MkErrM.hs +++ b/source/src/BNFC/Backend/Haskell/MkErrM.hs @@ -11,9 +11,16 @@ module BNFC.Backend.Haskell.MkErrM where import BNFC.PrettyPrint +import BNFC.Options (ErrorType(..)) -mkErrM :: String -> Doc -mkErrM errMod = vcat +-- | Creates @ErrM.hs@ file if needed. +-- +-- It returns 'Nothing' if there is no need to create it. +mkErrM :: String -> ErrorType -> Maybe Doc +mkErrM _ ErrorTypeStructured = Nothing + -- ErrM.hs is only for backward compatibility with old code using string + -- errors, so that we don't create it in case of structured errors. +mkErrM errMod ErrorTypeString = Just $ vcat [ "{-# LANGUAGE CPP #-}" , "" , "#if __GLASGOW_HASKELL__ >= 708" diff --git a/source/src/BNFC/Backend/HaskellGADT.hs b/source/src/BNFC/Backend/HaskellGADT.hs index 40a65ce2..a0da0999 100644 --- a/source/src/BNFC/Backend/HaskellGADT.hs +++ b/source/src/BNFC/Backend/HaskellGADT.hs @@ -43,14 +43,14 @@ makeHaskellGadt opts cf = do mkHsFileHint (alexFile opts) $ cf2alex3 lexMod (tokenText opts) cf liftIO $ putStrLn " (Use Alex 3 to compile.)" mkHsFileHint (happyFile opts) $ - cf2Happy parMod absMod lexMod (glr opts) (tokenText opts) False cf + cf2Happy parMod absMod lexMod (glr opts) (tokenText opts) False (errorType opts) cf liftIO $ putStrLn " (Tested with Happy 1.15 - 1.20)" mkHsFile (templateFile opts) $ cf2Template (templateFileM opts) absMod cf mkHsFile (printerFile opts) $ cf2Printer StringToken False True prMod absMod cf when (hasLayout cf) $ mkHsFile (layoutFile opts) $ cf2Layout layMod lexMod cf mkHsFile (tFile opts) $ Haskell.testfile opts cf - mkHsFile (errFile opts) $ mkErrM errMod + mapM_ (mkHsFile (errFile opts)) $ mkErrM errMod (errorType opts) Makefile.mkMakefile opts $ Haskell.makefile opts cf case xml opts of 2 -> makeXML opts True cf diff --git a/source/src/BNFC/Options.hs b/source/src/BNFC/Options.hs index e044ff40..2b712b84 100644 --- a/source/src/BNFC/Options.hs +++ b/source/src/BNFC/Options.hs @@ -12,7 +12,7 @@ module BNFC.Options , SharedOptions(..) , defaultOptions, isDefault, printOptions , AlexVersion(..), HappyMode(..), OCamlParser(..), JavaLexerParser(..) - , RecordPositions(..), TokenText(..) + , RecordPositions(..), TokenText(..), ErrorType(..) , Ansi(..) , InPackage , removedIn290 @@ -82,6 +82,12 @@ instance Show Target where show TargetPygments = "Pygments" show TargetCheck = "Check LBNF file" +-- | Which error type to use in the generated parser result? +data ErrorType + = ErrorTypeString -- ^ Errors are plain strings. + | ErrorTypeStructured -- ^ Errors are values of a record/structure type. + deriving (Show,Eq,Ord) + -- | Which version of Alex is targeted? data AlexVersion = Alex3 deriving (Show,Eq,Ord,Bounded,Enum) @@ -138,6 +144,7 @@ data SharedOptions = Options , glr :: HappyMode -- ^ Happy option @--glr@. , xml :: Int -- ^ Options @--xml@, generate DTD and XML printers. , agda :: Bool -- ^ Option @--agda@. Create bindings for Agda? + , errorType :: ErrorType -- ^ An error type to use in the parser result. --- OCaml specific , ocamlParser :: OCamlParser -- ^ Option @--menhir@ to switch to @Menhir@. --- Java specific @@ -172,6 +179,7 @@ defaultOptions = Options , glr = Standard , xml = 0 , agda = False + , errorType = ErrorTypeString -- OCaml specific , ocamlParser = OCamlYacc -- Java specific @@ -232,6 +240,7 @@ printOptions opts = unwords . concat $ , [ "--xml" | xml opts == 1 ] , [ "--xmlt" | xml opts == 2 ] , [ "--agda" | agda opts ] + , [ "--structured-errors" | errorType opts == ErrorTypeStructured ] -- C# options: , [ "--vs" | visualStudio opts ] , [ "--wfc" | wcf opts ] @@ -375,6 +384,12 @@ specificOptions = , ( Option [] ["generic"] (NoArg (\o -> o {generic = True})) "Derive Data, Generic, and Typeable instances for AST types" , haskellTargets ) + , ( Option [] ["structured-errors"] (NoArg (\o -> o {errorType = ErrorTypeStructured})) + "Return structured errors from the parser" + , [TargetHaskell] ) + , ( Option [] ["string-errors"] (NoArg (\o -> o {errorType = ErrorTypeString})) + "Return string errors from the parser (default)" + , [TargetHaskell] ) , ( Option [] ["xml"] (NoArg (\o -> o {xml = 1})) "Also generate a DTD and an XML printer" , haskellTargets ) diff --git a/source/test/BNFC/Backend/HaskellSpec.hs b/source/test/BNFC/Backend/HaskellSpec.hs index 0a0626ca..2a6036de 100644 --- a/source/test/BNFC/Backend/HaskellSpec.hs +++ b/source/test/BNFC/Backend/HaskellSpec.hs @@ -50,6 +50,11 @@ spec = do calc <- getCalc makeHaskell calcOptions calc `shouldGenerate` "ErrM.hs" + it "does not generate a error module file for structured errors" $ do + let options = calcOptions { errorType = ErrorTypeStructured } + calc <- getCalc + makeHaskell options calc `shouldNotGenerate` "ErrM.hs" + context "with option -mMyMakefile and the Calc grammar" $ do it "generates a Makefile" $ do calc <- getCalc diff --git a/source/test/BNFC/Hspec.hs b/source/test/BNFC/Hspec.hs index dc91aad6..58d8f860 100644 --- a/source/test/BNFC/Hspec.hs +++ b/source/test/BNFC/Hspec.hs @@ -21,3 +21,13 @@ backend `shouldGenerate` file = do let filenames = map fileName files file `elem` filenames @? printf "file %s not found in %s" file (show filenames) + +shouldNotGenerate + :: Backend -- ^ Backend to run. + -> String -- ^ Name of file that should be created during that run. + -> Expectation +backend `shouldNotGenerate` file = do + files <- execBackend backend + let filenames = map fileName files + file `notElem` filenames + @? printf "unexpected file %s found in %s" file (show filenames)