From eec50ea9209b4f8715737128c6a367e3d421e257 Mon Sep 17 00:00:00 2001 From: martyall Date: Wed, 13 Sep 2023 08:35:23 -0700 Subject: [PATCH 1/2] failing test --- .github/workflows/ci.yaml | 30 ++++++++++++++++++++++++++++++ .gitignore | 1 + test/Main.purs | 4 ++++ 3 files changed, 35 insertions(+) create mode 100644 .github/workflows/ci.yaml diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..8b07ce4 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,30 @@ +name: CI + +on: push + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - uses: purescript-contrib/setup-purescript@main + + - name: Cache PureScript dependencies + uses: actions/cache@v2 + # This cache uses the .dhall files to know when it should reinstall + # and rebuild packages. It caches both the installed packages from + # the `.spago` directory and compilation artifacts from the `output` + # directory. When restored the compiler will rebuild any files that + # have changed. If you do not want to cache compiled output, remove + # the `output` path. + with: + key: ${{ runner.os }}-spago-${{ hashFiles('**/*.dhall') }} + path: | + .spago + node_modules + output + + - run: npm i + - run: npm run build + - run: npm run test diff --git a/.gitignore b/.gitignore index 332b6cf..29cac07 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ /.* !/.gitignore +!/.github !/.eslintrc.json !/.travis.yml /bower_components/ diff --git a/test/Main.purs b/test/Main.purs index a3e4b3d..488d44f 100644 --- a/test/Main.purs +++ b/test/Main.purs @@ -87,6 +87,10 @@ main = do quickCheck $ fromString "ABCD" Hex === Just (withOctets pack [0xAB, 0xCD]) -- this line is commented out as for invalid input result is `pack []` and shuold be fixed later -- quickCheck $ fromString "LOL" Hex === Nothing + + log "utf8" + quickCheck $ \(s :: String) -> fromUTF8 (toUTF8 s) === s + where subL a b = a - runQuotient b From 3d4f8320808bd2d11a555505e0c6df64a8d8db0c Mon Sep 17 00:00:00 2001 From: martyall Date: Wed, 13 Sep 2023 10:34:57 -0700 Subject: [PATCH 2/2] generating proper unicode --- spago.dhall | 39 +++++++++++++++++-------------- test/Main.purs | 62 +++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 73 insertions(+), 28 deletions(-) diff --git a/spago.dhall b/spago.dhall index 5fc55da..0823754 100644 --- a/spago.dhall +++ b/spago.dhall @@ -1,22 +1,27 @@ { name = "bytestrings" , dependencies = - [ "arrays" - , "console" - , "effect" - , "exceptions" - , "foldable-traversable" - , "integers" - , "leibniz" - , "maybe" - , "newtype" - , "node-buffer" - , "partial" - , "prelude" - , "quickcheck" - , "quickcheck-laws" - , "quotient" - , "unsafe-coerce" - ] + [ "arrays" + , "console" + , "effect" + , "enums" + , "exceptions" + , "foldable-traversable" + , "gen" + , "integers" + , "leibniz" + , "maybe" + , "newtype" + , "node-buffer" + , "nonempty" + , "partial" + , "prelude" + , "quickcheck" + , "quickcheck-laws" + , "quotient" + , "strings" + , "tuples" + , "unsafe-coerce" + ] , packages = ./packages.dhall , sources = [ "src/**/*.purs", "test/**/*.purs" ] } diff --git a/test/Main.purs b/test/Main.purs index 488d44f..5e5edbc 100644 --- a/test/Main.purs +++ b/test/Main.purs @@ -2,14 +2,23 @@ module Test.Main ( main ) where -import Effect (Effect) -import Effect.Console (log) -import Data.ByteString +import Data.ByteString (ByteString, Encoding(..), Octet, cons, empty, foldl, foldr, fromString, fromUTF8, head, init, isEmpty, last, length, map, pack, reverse, singleton, snoc, tail, toUTF8, uncons, unpack, unsnoc) +import Prelude (Unit, bind, bottom, discard, flip, identity, pure, top, (#), ($), (&&), (+), (-), (/), (<), (<$>), (<*>), (<<<), (<>), (==), (>), (||)) + +import Control.Monad.Gen (frequency) +import Data.Array (foldMap) +import Data.Enum (toEnumWithDefaults) import Data.Foldable as Foldable +import Data.Int (toNumber) import Data.Maybe (Maybe(..)) -import Prelude hiding (map) +import Data.NonEmpty (NonEmpty(..)) +import Data.String (CodePoint, fromCodePointArray) +import Data.Tuple (Tuple(..)) +import Effect (Effect) +import Effect.Console (log) import Prelude as Prelude -import Test.QuickCheck ((===), quickCheck) +import Test.QuickCheck (class Arbitrary, arbitrary, quickCheck, (===)) +import Test.QuickCheck.Gen (arrayOf, chooseInt, suchThat) import Test.QuickCheck.Laws.Data.Eq (checkEq) import Test.QuickCheck.Laws.Data.Monoid (checkMonoid) import Test.QuickCheck.Laws.Data.Ord (checkOrd) @@ -17,6 +26,7 @@ import Test.QuickCheck.Laws.Data.Semigroup (checkSemigroup) import Type.Proxy (Proxy(..)) import Type.Quotient (mkQuotient, runQuotient) + main :: Effect Unit main = do log "laws" @@ -87,14 +97,44 @@ main = do quickCheck $ fromString "ABCD" Hex === Just (withOctets pack [0xAB, 0xCD]) -- this line is commented out as for invalid input result is `pack []` and shuold be fixed later -- quickCheck $ fromString "LOL" Hex === Nothing - log "utf8" - quickCheck $ \(s :: String) -> fromUTF8 (toUTF8 s) === s + quickCheck $ \(BMPString s) -> fromUTF8 (toUTF8 s) === s - - where - subL a b = a - runQuotient b - subR a b = runQuotient a - b + where + subL a b = a - runQuotient b + subR a b = runQuotient a - b + +newtype BMPString = BMPString String + +data UnicodeChar = Normal CodePoint | Surrogates CodePoint CodePoint + +instance Arbitrary BMPString where + arbitrary = BMPString <$> do + ucs <- arrayOf (arbitrary @UnicodeChar) + pure $ fromCodePointArray $ foldMap f ucs + where + f :: UnicodeChar -> Array CodePoint + f uc = case uc of + Normal a -> [a] + Surrogates a b -> [a, b] + +instance Arbitrary UnicodeChar where + arbitrary = frequency $ NonEmpty (Tuple (1.0 - p) normalGen) [Tuple p surrogatesGen] + + where + hiLB = 0xD800 + hiUB = 0xDBFF + loLB = 0xDC00 + loUB = 0xDFFF + maxCP = 65535 + toCP = toEnumWithDefaults bottom top + -- must have a high surrogate followed by a low surrogate + surrogatesGen = Surrogates <$> (toCP <$> chooseInt hiLB hiUB) <*> (toCP <$> chooseInt loLB loUB) + normalGen = Normal <<< toCP <$> do + chooseInt 0 maxCP `suchThat` \n -> + (n < hiLB || n > hiUB) && (n < loLB || n > loUB) + -- probability that you pick a surrogate from all possible codepoints + p = toNumber ((hiUB - hiLB + 1) + (loUB - loLB + 1)) / toNumber (maxCP + 1) withOctet :: ∀ a. (Octet -> a) -> Int -> a withOctet = flip $ (#) <<< mkQuotient