From ee9fb8140f5deb84e2f6b97d2c824d76b110a0f6 Mon Sep 17 00:00:00 2001 From: Robert Porter Date: Wed, 16 Dec 2020 11:47:46 +0900 Subject: [PATCH 1/4] Non-empty combinators return NonEmptyList --- src/Text/Parsing/Parser/Combinators.purs | 31 ++++++++++++++---------- src/Text/Parsing/Parser/Token.purs | 9 ++++--- test/Main.purs | 13 +++++----- 3 files changed, 30 insertions(+), 23 deletions(-) diff --git a/src/Text/Parsing/Parser/Combinators.purs b/src/Text/Parsing/Parser/Combinators.purs index df2daa1..b77b9dc 100644 --- a/src/Text/Parsing/Parser/Combinators.purs +++ b/src/Text/Parsing/Parser/Combinators.purs @@ -29,7 +29,8 @@ import Control.Monad.State (StateT(..), runStateT) import Control.Plus (empty, (<|>)) import Data.Either (Either(..)) import Data.Foldable (class Foldable, foldl) -import Data.List (List(..), (:), many, some, singleton) +import Data.List (List(..), (:), many) +import Data.List.NonEmpty (NonEmptyList, cons', singleton, toList) import Data.Maybe (Maybe(..)) import Data.Newtype (unwrap) import Data.Tuple (Tuple(..)) @@ -99,30 +100,33 @@ lookAhead p = (ParserT <<< ExceptT <<< StateT) \s -> do -- | digit `sepBy` string "," -- | ``` sepBy :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (List a) -sepBy p sep = sepBy1 p sep <|> pure Nil +sepBy p sep = (toList <$> sepBy1 p sep) <|> pure Nil -- | Parse phrases delimited by a separator, requiring at least one match. -sepBy1 :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (List a) +sepBy1 :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (NonEmptyList a) sepBy1 p sep = do a <- p as <- many $ sep *> p - pure (a : as) + pure (cons' a as) -- | Parse phrases delimited and optionally terminated by a separator. sepEndBy :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (List a) -sepEndBy p sep = sepEndBy1 p sep <|> pure Nil +sepEndBy p sep = (toList <$> sepEndBy1 p sep) <|> pure Nil -- | Parse phrases delimited and optionally terminated by a separator, requiring at least one match. -sepEndBy1 :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (List a) +sepEndBy1 :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (NonEmptyList a) sepEndBy1 p sep = do a <- p - (do _ <- sep - as <- sepEndBy p sep - pure (a : as)) <|> pure (singleton a) + (do as <- many $ sep *> p + optional sep + pure (cons' a as)) <|> pure (singleton a) -- | Parse phrases delimited and terminated by a separator, requiring at least one match. -endBy1 :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (List a) -endBy1 p sep = some $ p <* sep +endBy1 :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (NonEmptyList a) +endBy1 p sep = do + a <- p <* sep + as <- endBy p sep + pure (cons' a as) -- | Parse phrases delimited and terminated by a separator. endBy :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (List a) @@ -193,8 +197,9 @@ manyTill p end = scan pure (x:xs) -- | Parse several phrases until the specified terminator matches, requiring at least one match. -many1Till :: forall s a m e. Monad m => ParserT s m a -> ParserT s m e -> ParserT s m (List a) +many1Till :: forall s a m e. Monad m => ParserT s m a -> ParserT s m e -> ParserT s m (NonEmptyList a) many1Till p end = do x <- p xs <- manyTill p end - pure (x:xs) + pure (cons' x xs) + diff --git a/src/Text/Parsing/Parser/Token.purs b/src/Text/Parsing/Parser/Token.purs index 750819b..1b2f023 100644 --- a/src/Text/Parsing/Parser/Token.purs +++ b/src/Text/Parsing/Parser/Token.purs @@ -36,6 +36,7 @@ import Data.Identity (Identity) import Data.Int (toNumber) import Data.List (List(..)) import Data.List as List +import Data.List.NonEmpty (NonEmptyList) import Data.Maybe (Maybe(..), maybe) import Data.String (null, toLower) import Data.String.CodeUnits as SCU @@ -258,7 +259,7 @@ type GenTokenParser s m semiSep :: forall a . ParserT s m a -> ParserT s m (List a), -- | Lexeme parser `semiSep1 p` parses *one* or more occurrences of `p` -- | separated by `semi`. Returns a list of values pureed by `p`. - semiSep1 :: forall a . ParserT s m a -> ParserT s m (List a), + semiSep1 :: forall a . ParserT s m a -> ParserT s m (NonEmptyList a), -- | Lexeme parser `commaSep p` parses *zero* or more occurrences of -- | `p` separated by `comma`. Returns a list of values pureed -- | by `p`. @@ -266,7 +267,7 @@ type GenTokenParser s m -- | Lexeme parser `commaSep1 p` parses *one* or more occurrences of -- | `p` separated by `comma`. Returns a list of values pureed -- | by `p`. - commaSep1 :: forall a . ParserT s m a -> ParserT s m (List a) + commaSep1 :: forall a . ParserT s m a -> ParserT s m (NonEmptyList a) } ----------------------------------------------------------- @@ -369,10 +370,10 @@ makeTokenParser (LanguageDef languageDef) semiSep :: forall a . ParserT String m a -> ParserT String m (List a) semiSep p = sepBy p semi - commaSep1 :: forall a . ParserT String m a -> ParserT String m (List a) + commaSep1 :: forall a . ParserT String m a -> ParserT String m (NonEmptyList a) commaSep1 p = sepBy1 p comma - semiSep1 :: forall a . ParserT String m a -> ParserT String m (List a) + semiSep1 :: forall a . ParserT String m a -> ParserT String m (NonEmptyList a) semiSep1 p = sepBy1 p semi ----------------------------------------------------------- diff --git a/test/Main.purs b/test/Main.purs index 21c0dd5..1867e5c 100644 --- a/test/Main.purs +++ b/test/Main.purs @@ -7,6 +7,7 @@ import Control.Lazy (fix) import Data.Array (some) import Data.Either (Either(..)) import Data.List (List(..), fromFoldable, many) +import Data.List.NonEmpty (cons, cons') import Data.Maybe (Maybe(..)) import Data.String.CodeUnits (fromCharArray, singleton) import Data.Tuple (Tuple(..)) @@ -357,10 +358,10 @@ tokenParserSemiSepTest = do tokenParserSemiSep1Test :: TestM tokenParserSemiSep1Test = do -- parse semi sep1 - parseTest "foo; foo" (fromFoldable ["foo", "foo"]) $ testTokenParser.semiSep1 $ string "foo" + parseTest "foo; foo" (cons "foo" (cons' "foo" Nil)) $ testTokenParser.semiSep1 $ string "foo" -- parse semi sep1 with newline - parseTest "foo; \nfoo" (fromFoldable ["foo", "foo"]) $ testTokenParser.semiSep1 $ string "foo" + parseTest "foo; \nfoo" (cons "foo" (cons' "foo" Nil)) $ testTokenParser.semiSep1 $ string "foo" -- no parse on empty string parseErrorTestPosition (testTokenParser.semiSep1 $ string "foo") "" $ mkPos 1 @@ -379,10 +380,10 @@ tokenParserCommaSepTest = do tokenParserCommaSep1Test :: TestM tokenParserCommaSep1Test = do -- parse comma sep1 - parseTest "foo, foo" (fromFoldable ["foo", "foo"]) $ testTokenParser.commaSep1 $ string "foo" + parseTest "foo, foo" (cons "foo" (cons' "foo" Nil)) $ testTokenParser.commaSep1 $ string "foo" -- parse comma sep1 with newline - parseTest "foo, \nfoo" (fromFoldable ["foo", "foo"]) $ testTokenParser.commaSep1 $ string "foo" + parseTest "foo, \nfoo" (cons "foo" (cons' "foo" Nil)) $ testTokenParser.commaSep1 $ string "foo" -- no parse on empty string parseErrorTestPosition (testTokenParser.commaSep1 $ string "foo") "" $ mkPos 1 @@ -438,8 +439,8 @@ main = do parseTest "(ab)" (Just "b") $ parens do _ <- string "a" optionMaybe $ string "b" - parseTest "a,a,a" (Cons "a" (Cons "a" (Cons "a" Nil))) $ string "a" `sepBy1` string "," - parseTest "a,a,a," (Cons "a" (Cons "a" (Cons "a" Nil))) $ do + parseTest "a,a,a" (cons "a" (cons "a" (cons' "a" Nil))) $ string "a" `sepBy1` string "," + parseTest "a,a,a," (cons "a" (cons "a" (cons' "a" Nil))) $ do as <- string "a" `endBy1` string "," eof pure as From 4ae59c864a66ae294d20233192bd731bc4bd009c Mon Sep 17 00:00:00 2001 From: Robert Porter Date: Wed, 16 Dec 2020 12:22:54 +0900 Subject: [PATCH 2/4] Updated change log --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9678288..3d83616 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ Notable changes to this project are documented in this file. The format is based ## [Unreleased] Breaking changes (😱!!!): +- Non-empty combinators return `NonEmptyList` ([#102](https://github.com/purescript-contrib/purescript-parsing/pull/102)) New features: From 7a10ed949060184a59699d44afd61e42e4da3d8a Mon Sep 17 00:00:00 2001 From: Robert Porter Date: Thu, 17 Dec 2020 18:02:11 +0900 Subject: [PATCH 3/4] Separate 0+ and 1+ combinator implementations --- src/Text/Parsing/Parser/Combinators.purs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/Text/Parsing/Parser/Combinators.purs b/src/Text/Parsing/Parser/Combinators.purs index b77b9dc..5900f28 100644 --- a/src/Text/Parsing/Parser/Combinators.purs +++ b/src/Text/Parsing/Parser/Combinators.purs @@ -100,7 +100,10 @@ lookAhead p = (ParserT <<< ExceptT <<< StateT) \s -> do -- | digit `sepBy` string "," -- | ``` sepBy :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (List a) -sepBy p sep = (toList <$> sepBy1 p sep) <|> pure Nil +sepBy p sep = + (do a <- p + as <- many $ sep *> p + pure (a : as)) <|> pure Nil -- | Parse phrases delimited by a separator, requiring at least one match. sepBy1 :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (NonEmptyList a) @@ -111,7 +114,11 @@ sepBy1 p sep = do -- | Parse phrases delimited and optionally terminated by a separator. sepEndBy :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (List a) -sepEndBy p sep = (toList <$> sepEndBy1 p sep) <|> pure Nil +sepEndBy p sep = + (do a <- p + as <- many $ sep *> p + optional sep + pure (a : as)) <|> pure Nil -- | Parse phrases delimited and optionally terminated by a separator, requiring at least one match. sepEndBy1 :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (NonEmptyList a) @@ -125,7 +132,7 @@ sepEndBy1 p sep = do endBy1 :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (NonEmptyList a) endBy1 p sep = do a <- p <* sep - as <- endBy p sep + as <- many $ p <* sep pure (cons' a as) -- | Parse phrases delimited and terminated by a separator. From 81329b55baf3df11a2023f6e3b3bcce42da50cad Mon Sep 17 00:00:00 2001 From: Robert Porter Date: Thu, 17 Dec 2020 18:04:17 +0900 Subject: [PATCH 4/4] Removed `toList` from imports --- src/Text/Parsing/Parser/Combinators.purs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Text/Parsing/Parser/Combinators.purs b/src/Text/Parsing/Parser/Combinators.purs index 5900f28..f814b63 100644 --- a/src/Text/Parsing/Parser/Combinators.purs +++ b/src/Text/Parsing/Parser/Combinators.purs @@ -30,7 +30,7 @@ import Control.Plus (empty, (<|>)) import Data.Either (Either(..)) import Data.Foldable (class Foldable, foldl) import Data.List (List(..), (:), many) -import Data.List.NonEmpty (NonEmptyList, cons', singleton, toList) +import Data.List.NonEmpty (NonEmptyList, cons', singleton) import Data.Maybe (Maybe(..)) import Data.Newtype (unwrap) import Data.Tuple (Tuple(..))