diff --git a/CHANGELOG.md b/CHANGELOG.md index c82852e..3333030 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ Breaking changes: New features: - add `MonadAsk` and `MonadReader` instances (#208 by @bentongxyz) +- Add `Parsing.String.parseErrorHuman` (#209 by @jamesdbrock) Other improvements: diff --git a/src/Parsing.purs b/src/Parsing.purs index ee6fb29..1cf290f 100644 --- a/src/Parsing.purs +++ b/src/Parsing.purs @@ -51,9 +51,11 @@ import Data.Tuple (Tuple(..), fst) -- | the position in the input stream at which the error occurred. data ParseError = ParseError String Position +-- | Get the `Message` from a `ParseError` parseErrorMessage :: ParseError -> String parseErrorMessage (ParseError msg _) = msg +-- | Get the `Position` from a `ParseError`. parseErrorPosition :: ParseError -> Position parseErrorPosition (ParseError _ pos) = pos diff --git a/src/Parsing/String.purs b/src/Parsing/String.purs index 21e79be..d8869f9 100644 --- a/src/Parsing/String.purs +++ b/src/Parsing/String.purs @@ -1,4 +1,5 @@ --- | Primitive parsers for working with an input stream of type `String`. +-- | Primitive parsers, combinators and functions for working with an input +-- | stream of type `String`. -- | -- | All of these primitive parsers will consume when they succeed. -- | @@ -47,17 +48,21 @@ module Parsing.String , regex , anyTill , consumeWith + , parseErrorHuman ) where import Prelude hiding (between) import Control.Monad.Rec.Class (Step(..), tailRecM) +import Data.Array (replicate) import Data.Array.NonEmpty as NonEmptyArray import Data.Either (Either(..)) import Data.Enum (fromEnum, toEnum) import Data.Function.Uncurried (mkFn5, runFn2) +import Data.Int (odd) import Data.Maybe (Maybe(..), fromJust) import Data.String (CodePoint, Pattern(..), codePointAt, length, null, splitAt, stripPrefix, uncons) +import Data.String as CodePoint import Data.String as String import Data.String.CodeUnits as SCU import Data.String.Regex as Regex @@ -339,4 +344,98 @@ anyTill p = do ( do _ <- anyCodePoint pure $ Loop unit - ) \ No newline at end of file + ) + +-- | Returns three `String`s which, when printed line-by-line, will show +-- | a human-readable parsing error message with context. +-- | +-- | #### Input arguments +-- | +-- | * The first argument is the input `String` given to the parser which +-- | errored. +-- | * The second argument is a positive `Int` which indicates how many +-- | characters of input `String` context are wanted around the parsing error. +-- | * The third argument is the `ParseError` for the input `String`. +-- | +-- | #### Output `String`s +-- | +-- | 1. The parse error message and the parsing position. +-- | 2. A string with an arrow that points to the error position in the +-- | input context (in a fixed-width font). +-- | 3. The input context. A substring of the input which tries to center +-- | the error position and have the wanted length and not include +-- | any newlines or carriage returns. +-- | +-- | If the parse error occurred on a carriage return or newline character, +-- | then that character will be included at the end of the input context. +-- | +-- | #### Example +-- | +-- | ``` +-- | let input = "12345six789" +-- | case runParser input (replicateA 9 String.Basic.digit) of +-- | Left err -> +-- | log $ String.joinWith "\n" $ parseErrorHuman input 20 err +-- | ``` +-- | --- +-- | ``` +-- | Expected digit at position index:5 (line:1, column:6) +-- | ▼ +-- | 12345six789 +-- | ``` +parseErrorHuman :: String -> Int -> ParseError -> Array String +parseErrorHuman input contextSize (ParseError msg (Position { line, column, index })) = + -- inspired by + -- https://github.com/elm/parser/blob/master/README.md#tracking-context + [ msg <> " at position index:" <> show index + <> " (line:" + <> show line + <> ", column:" + <> show column + <> ")" + , (String.joinWith "" (replicate (lineIndex - minPosBefore) " ")) <> "▼" -- best way to construct string of spaces? + , inputContext + ] + where + -- select the input line in which the error appears + -- sadly we can't use splitCap because of circular module dependency and we + -- don't feel like separating out an “Internal” module. + { posBegin, posEnd, lineBegin } = go 0 input 0 input + where + go posBegin lineBegin posEnd lineEnd = + case String.uncons lineEnd of + Just { head, tail } | head == CodePoint.codePointFromChar '\n' -> + if posEnd == index -- uh-oh, error at the newline + -- so include the newline at the end of the selected line. + then { posBegin, posEnd: posEnd + 1, lineBegin } + else if posEnd > index then { posBegin, posEnd, lineBegin } + else go (posEnd + 1) tail (posEnd + 1) tail + Just { head, tail } | head == CodePoint.codePointFromChar '\r' -> + if posEnd == index -- uh-oh, error at the carriage return + -- so include the carriage return at the end of the selected line. + -- we don't need to add the possible following newline because + -- we're not printing a line break here, we're just making sure + -- to include the character at the position which errored. + then { posBegin, posEnd: posEnd + 1, lineBegin } + else if posEnd > index then { posBegin, posEnd, lineBegin } + else go (posEnd + 1) tail (posEnd + 1) tail + Just { tail } -> go posBegin lineBegin (posEnd + 1) tail + _ -> { posBegin, posEnd, lineBegin } + lineSelect = String.take (posEnd - posBegin) lineBegin + lineIndex = index - posBegin + lineLength = String.length lineSelect + + -- position minus half of context + bestPosBefore = lineIndex - (contextSize / 2) + -- position plus half of context + bestPosAfter = lineIndex + (contextSize / 2) + if odd contextSize then 1 else 0 + + -- constrain the context window to selected line + -- grow the context window to contextSize if the error is at beginning or end of selected line + Tuple minPosBefore maxPosAfter = + if bestPosBefore >= 0 then + if bestPosAfter <= lineLength then Tuple bestPosBefore bestPosAfter + else Tuple (max 0 (lineLength - contextSize)) lineLength + else Tuple 0 (min lineLength contextSize) + + inputContext = String.take (maxPosAfter - minPosBefore) $ String.drop minPosBefore lineSelect diff --git a/test/Main.purs b/test/Main.purs index e2cb1b5..a4cb822 100644 --- a/test/Main.purs +++ b/test/Main.purs @@ -13,7 +13,7 @@ import Control.Monad.State (State, lift, modify, runState) import Data.Array (some, toUnfoldable) import Data.Array as Array import Data.Bifunctor (lmap, rmap) -import Data.Either (Either(..), either, hush) +import Data.Either (Either(..), either, fromLeft, hush) import Data.Foldable (oneOf) import Data.List (List(..), fromFoldable, (:)) import Data.List.NonEmpty (NonEmptyList(..), catMaybes, cons, cons') @@ -34,12 +34,13 @@ import Effect.Console (log, logShow) import Effect.Unsafe (unsafePerformEffect) import Node.Process (lookupEnv) import Parsing (ParseError(..), ParseState(..), Parser, ParserT, Position(..), consume, fail, getParserT, initialPos, parseErrorMessage, parseErrorPosition, position, region, runParser) -import Parsing.Combinators (advance, between, chainl, chainl1, chainr, chainr1, choice, empty, endBy, endBy1, lookAhead, many, many1, many1Till, many1Till_, manyIndex, manyTill, manyTill_, notFollowedBy, optionMaybe, sepBy, sepBy1, sepEndBy, sepEndBy1, skipMany, skipMany1, try, (), (), (<~?>)) +import Parsing.Combinators (advance, between, chainl, chainl1, chainr, chainr1, choice, empty, endBy, endBy1, lookAhead, many, many1, many1Till, many1Till_, manyIndex, manyTill, manyTill_, notFollowedBy, optionMaybe, replicateA, sepBy, sepBy1, sepEndBy, sepEndBy1, skipMany, skipMany1, try, (), (), (<~?>)) import Parsing.Combinators.Array as Combinators.Array import Parsing.Expr (Assoc(..), Operator(..), buildExprParser) import Parsing.Language (haskellDef, haskellStyle, javaStyle) -import Parsing.String (anyChar, anyCodePoint, anyTill, char, eof, match, regex, rest, satisfy, string, takeN) +import Parsing.String (anyChar, anyCodePoint, anyTill, char, eof, match, parseErrorHuman, regex, rest, satisfy, string, takeN) import Parsing.String.Basic (intDecimal, letter, noneOfCodePoints, number, oneOfCodePoints, skipSpaces, whiteSpace) +import Parsing.String.Basic as String.Basic import Parsing.String.Replace (breakCap, replace, replaceT, splitCap, splitCapT) import Parsing.Token (TokenParser, makeTokenParser, token, when) import Parsing.Token as Token @@ -1070,3 +1071,38 @@ main = do { actual: lmap parseErrorPosition $ runParser "aa" $ advance consume , expected: Left (Position { index: 0, line: 1, column: 1 }) } + + log "\nTESTS error messages\n" + do + let input = "12345six789" + assertEqual' "parseErrorHuman 1" + { actual: Array.drop 1 $ parseErrorHuman input 20 $ fromLeft (ParseError "" initialPos) + $ runParser input (replicateA 9 String.Basic.digit :: Parser String (List Char)) + , expected: [ " ▼", "12345six789" ] + } + + do + let input = "12345six789" + assertEqual' "parseErrorHuman 2" + { actual: Array.drop 1 $ parseErrorHuman input 5 $ fromLeft (ParseError "" initialPos) + $ runParser input (replicateA 9 String.Basic.digit :: Parser String (List Char)) + , expected: [ " ▼", "45six" ] + } + + do + let input = "aaaa🍷\r\nbbbb" + assertEqual' "parseErrorHuman 3" + { actual: parseErrorHuman input 20 $ fromLeft (ParseError "" initialPos) + $ runParser input + $ string "aaaa" *> (replicateA 7 letter :: Parser String (List Char)) + , expected: [ "Expected letter at position index:4 (line:1, column:5)", " ▼", "aaaa🍷" ] + } + + do + let input = "aaaa\r\n🍷bbbb" + assertEqual' "parseErrorHuman 4" + { actual: parseErrorHuman input 20 $ fromLeft (ParseError "" initialPos) + $ runParser input + $ string "aaaa\r\n" *> (replicateA 5 letter :: Parser String (List Char)) + , expected: [ "Expected letter at position index:6 (line:2, column:1)", "▼", "🍷bbbb" ] + }