ITMO CTD Haskell
Lecture slides on Functional programming course at the ITMO university CT department. You can find course description here: https://github.com/jagajaga/FP-Course-ITMO
Parsing — process of converting poorly structured data (text, bytes) into strongly structured data (custom user data types).
1. Somehow
2. Parser generators: ANTLR, Yacc/Bison, Happy
3. Parser combinators (usually in Haskell programming language)
We want to combine parsers easily.
We want to build bigger parsers from smaller ones.
What is a type of function which parses integer number?
parseInteger :: String -> Bool
No. We want to get actual Integer.
parseInteger :: String -> Integer
No. Parsers can fail.
parseInteger :: String -> Maybe Integer -- Either for more descriptive error
Now. How do you parse two integers separated by space? Or by any number of spaces? Or comma-separated list of integers? Or the same list inside square brackets [] with any number of spaces between elements?
parseInteger :: String -> Maybe (Integer, String)
┌── input stream
│
┌─ result type │ ┌─ remain stream
│ │ │
newtype Parser a = Parser { runP :: String -> Maybe (a, String) }
│ │
└─ unwrapper └─ parsing result
parseInteger :: String -> Maybe (Integer, String)
Instead of this
it's more convenient to work with newtype wrapper
newtype allows us to have useful instances!
parseInteger :: Parser Integer -- String -> Maybe (Integer, String)
Parser combinator type
newtype Parser a = Parser { runP :: String -> Maybe (a, String) }
Parsing functions
runP :: Parser a -> String -> Maybe (a, String)
How to use it and which behavior we want?
ghci> runP parseInteger "5"
Just (5, "") :: Maybe (Integer, String)
ghci> runP parseInteger "42x7"
Just (42, "x7") :: Maybe (Integer, String)
ghci> runP parseInteger "abc"
Nothing :: Maybe (Integer, String)
Such behavior allows us to combine parsers relatively easy!
Key idea of parser combinators: implement manually very simple parsers, implement combinators for combining parser and then implement more complex parsers by combining simpler ones.
instance Functor Parser -- replace parser value
instance Applicative Parser -- run parsers sequentially one after another
instance Monad Parser -- same as above but with monadic capabilities
instance Alternative Parser -- allows to choose parser
Haskell provides combinators through standard type classes. And it's convenient to use them. What we need is just...
Let's start with simpler part...
newtype Parser a = Parser { runP :: String -> Maybe (a, String) }
-- always succeeds without consuming any input
ok :: Parser ()
ok = Parser $ \s -> Just ((), s)
-- fails w/o consuming any input if given parser succeeds,
-- and succeeds if given parser fails
isnot :: Parser a -> Parser ()
isnot parser = Parser $ \s -> case runP parser s of
Just _ -> Nothing
Nothing -> Just ((), s)
-- succeeds only at the end of input stream
eof :: Parser ()
eof = Parser $ \s -> case s of
[] -> Just ((), "")
_ -> Nothing
-- consumes only single character and returns it if predicate is true
satisfy :: (Char -> Bool) -> Parser Char
satisfy p = Parser $ \s -> case s of
[] -> Nothing
(x:xs) -> if p x then Just (x, xs) else Nothing
-- consumes given character and returns it
char :: Char -> Parser Char
char c = satisfy (== c)
-- always fails without consuming any input
notok :: Parser ()
notok = isnot ok
ghci> runP eof ""
Just ((),"")
ghci> runP eof "aba"
Nothing
ghci> runP (char 'a') "aba"
Just ('a',"ba")
ghci> runP (char 'x') "aba"
Nothing
-- consumes any character or any digit only
anyChar, digit :: Parser Char
anyChar = satisfy (const True)
digit = satisfy isDigit
instance Functor Parser where
fmap :: (a -> b) -> Parser a -> Parser b
fmap f (Parser parser) = Parser (fmap (first f) . parser)
newtype Parser a = Parser { runP :: String -> Maybe (a, String) }
instance Applicative Parser where
pure :: a -> Parser a
pure a = Parser $ \s -> Just (a, s)
(<*>) :: Parser (a -> b) -> Parser a -> Parser b
Parser pf <*> Parser pa = Parser $ \s -> case pf s of
Nothing -> Nothing
Just (f, t) -> case pa t of
Nothing -> Nothing
Just (a, r) -> Just (f a, r)
-- can be written shorter using Maybe as Monad
instance Monad Parser -- exercise
instance Alternative Parser where
empty :: Parser a -- always fails
(<|>) :: Parser a -> Parser a -> Parser a -- run first, if fails — run second
-- type
newtype Parser a = Parser { runP :: String -> Maybe (a, String) }
-- parsers
eof, ok :: Parser ()
satisfy :: (Char -> Bool) -> Parser Char
empty :: Parser a
-- combinators
pure :: a -> Parser a
(<|>) :: Parser a -> Parser a -> Parser a -- orElse
(>>=) :: Parser a -> (a -> Parser b) -> Parser b -- andThen
-- type
newtype Parser a = Parser { runP :: String -> Maybe (a, String) }
-- primitive parsers
eof, ok :: Parser ()
satisfy :: (Char -> Bool) -> Parser Char
-- combinators
-- * Functor
fmap :: (a -> b) -> Parser a -> Parser b
(<$) :: a -> Parser b -> Parser a
-- * Applicative
pure :: a -> Parser a
(<*>) :: Parser (a -> b) -> Parser a -> Parser b
(<*) :: Parser a -> Parser b -> Parser a -- run both in sequence, result of first
(*>) :: Parser a -> Parser b -> Parser b -- similar to above
-- * Alternative
empty :: Parser a
(<|>) :: Parser a -> Parser a -> Parser a -- orElse
many :: Parser a -> Parser [a] -- zero or more
some :: Parser a -> Parser [a] -- one or more (should be NonEmpty)
-- * Monadic
(>>=) :: Parser a -> (a -> Parser b) -> Parser b -- andThen
ghci> runP (ord <$> char 'A') "A"
Just (65,"")
ghci> runP ((\x y -> [x, y]) <$> char 'a' <*> char 'b') "abc"
Just ("ab","c")
ghci> runP ((\x y -> [x, y]) <$> char 'a' <*> char 'b') "xxx"
Nothing
ghci> runP (char 'a' <* eof) "a"
Just ('a',"")
ghci> runP (char 'a' <* eof) "ab"
Nothing
ghci> runP (many $ char 'a') "aaabcd"
Just ("aaa","bcd")
ghci> runP (many $ char 'a') "xxx"
Just ("","xxx")
ghci> runP (some $ char 'a') "xxx"
Nothing
ghci> runP (char 'a' <|> char 'b') "abc"
Just ('a',"bc")
ghci> runP (char 'a' <|> char 'b') "bca"
Just ('b',"ca")
ghci> runP (char 'a' <|> char 'b') "cab"
Nothing
string :: String -> Parser String -- like 'char' but for string
oneOf :: [String] -> Parser String -- parse first matched string from list
Problem: parse user [y/n] answer
data Answer = Yes | No
yesP :: Parser Answer
yesP = Yes <$ oneOf ["y", "Y", "yes", "Yes", "ys"]
noP :: Parser Answer
noP = No <$ oneOf ["n", "N", "no", "No"]
answerP :: Parser Answer
answerP = yesP <|> noP
-- | @'ParsecT' e s m a@ is a parser with custom data component of error
-- @e@, stream type @s@, underlying monad @m@ and return type @a@.
newtype ParsecT e s m a = ParsecT
{ unParser
:: forall b. State s
-> (a -> State s -> Hints (Token s) -> m b) -- consumed-OK
-> (ParseError (Token s) e -> State s -> m b) -- consumed-error
-> (a -> State s -> Hints (Token s) -> m b) -- empty-OK
-> (ParseError (Token s) e -> State s -> m b) -- empty-error
-> m b }
parsec — first mature library, very old
attoparsec — fast, poor error messages, backtracks by default
megaparsec — excellent error messages, mature
Types in mature libraries are scary...
hspec — declarative unit testing
hedgehog — property-based testing
tasty — testing framework for combining different approaches
• tasty-hspec — tasty provider for hspec
• tasty-hedgehog — tasty provider for hedgehog
It's easier to understand and write things if you're familiar with IO monad and do-notation but bare with me...
Test.Unit module
module Test.Unit
( hspecTestTree
) where
import Data.Maybe (isJust, isNothing)
import Test.Tasty (TestTree)
import Test.Tasty.Hspec (Spec, describe, it, shouldBe, shouldSatisfy, testSpec)
import Parser (char, eof, runP)
hspecTestTree :: IO TestTree
hspecTestTree = testSpec "Simple parser" spec_Parser
spec_Parser :: Spec
spec_Parser = do
describe "eof works" $ do
it "eof on empty input" $
runP eof "" `shouldSatisfy` isJust
it "eof on non-empty input" $
runP eof "x" `shouldSatisfy` isNothing
describe "char works" $ do
it "char parses character" $
runP (char 'a') "abc" `shouldBe` Just ('x', "bc")
Now using our TestTree
module Main where
import Test.Tasty (defaultMain, testGroup)
import Test.Unit (hspecTestTree)
main :: IO ()
main = hspecTestTree >>= \unitTests ->
let allTests = testGroup "Parser" [unitTests]
in defaultMain allTests
Writing unit tests for different cases is really boring and tedious. We have a lot of functions, every function has corner cases. And our tests will be quite limited in terms of testing set size.
With property-based testing:
1. Library generates arbitrary input for your function.
2. Instead of specifying expected values you specify desired properties to which function results should satisfy.
Often it's much more difficult to write property-based tests rather than unit tests. But properties test your functions better (more cases, more reliable, more robust).
import Hedgehog
import qualified Data.List as List
import qualified Hedgehog.Gen as Gen
import qualified Hedgehog.Range as Range
genIntList :: Gen [Int]
genIntList =
let listLength = Range.linear 0 100000
in Gen.list listLength Gen.enumBounded
prop_reverse :: Property
prop_reverse = property $
forAll genIntList >>= \xs ->
List.reverse (List.reverse xs) === xs
If test is not passed we can show generated test case. But because this test case is generated randomly, it can be very big! It's much easier to test on smaller results.
Property-based libraries usually come with shrinking — techinique for automatically decrease size of false test to come up with minimal reproducible example.
-- Drops an element somewhere around the middle of the list.
fauxReverse :: [a] -> [a]
fauxReverse xs =
let sx = List.reverse xs
mp = length xs `div` 2
(as, bs) = List.splitAt mp sx
in as ++ List.drop 1 bs
Let's implement bad reverse function
prop_fauxReverse :: Property
prop_fauxReverse = property $
forAll genIntList >>= \xs ->
fauxReverse xs === List.reverse xs
And the output is...
1. Round-trip properties
read . show ≡ id
decode . encode ≡ id
deserialize . serialize ≡ id
2. Type classes laws
(a <> b) <> c ≡ a <> (b <> c)
a <> mempty ≡ a
mempty <> a ≡ a
Though, some laws are harder to test...
(m >>= f) >>= g ≡ m >>= (\x -> f x >>= g)
You need to generate arbitrary functions and show them. Don't worry, this is possible, but not out of the box...
Very simple and naive test...
module Test.Property (okTestTree) where
import Hedgehog (Gen, Property, forAll, property, (===))
import Test.Tasty (TestTree)
import Test.Tasty.Hedgehog (testProperty)
import Parser (ok, runP)
import qualified Hedgehog.Gen as Gen
import qualified Hedgehog.Range as Range
okTestTree :: TestTree
okTestTree = testProperty "ok always succeeds" prop_Ok
genString :: Gen String
genString =
let listLength = Range.linear 0 100
in Gen.list listLength Gen.alpha
prop_Ok :: Property
prop_Ok = property $
forAll genString >>= \s ->
runP ok s === Just ((), s)
function add(a, b) {
return a + b;
}
add :: Int -> Int -> Int
add x y = x + y
addCPS :: Int -> Int -> (Int -> r) -> r
addCPS x y onDone = onDone (x + y)
function addCPS(a, b, callback) {
callback(a + b);
}
addCPS(1, 2, function (result) {
// use result here
});
onInput :: (String -> IO ()) -> IO () -- every callback framework
onInput action = forever $ getLine >>= action
square :: Int -> Int
square x = x * x
pythagoras :: Int -> Int -> Int
pythagoras x y = (+) (square x) (square y)
addCPS :: Int -> Int -> ((Int -> r) -> r)
addCPS x y = \k -> k (x + y)
squareCPS :: Int -> ((Int -> r) -> r)
squareCPS x = \k -> k (square x)
pythagorasCPS :: Int -> Int -> ((Int -> r) -> r)
pythagorasCPS x y = \k -> -- k :: Int -> r
squareCPS x $ \x2 ->
squareCPS y $ \y2 ->
addCPS x2 y2 $ k -- addCPS x2 y2 :: (Int -> r) -> r
ghci> pythagorasCPS 3 4 id
25
ghci> :t ($)
($) :: (a -> b) -> a -> b
gchi> :t ($ 2)
($ 2) :: Num a => (a -> b) -> b
ghci> map ($ 2) [(3*), (2+),(1-)]
[6,4,-1]
newtype Cont r a = Cont { runCont :: (a -> r) -> r }
ghci> :t cont
cont :: ((a -> r) -> r) -> Cont r a
gchi> runCont (cont ($ 2)) `map` [(3*), (2+), (1-)]
[6,4,-1]
ghci> runCont (cont ($ 2)) id
2
addCPS :: Int -> Int -> Cont r Int
addCPS x y = cont $ \k -> k (x + y)
squareCPS :: Int -> Cont r Int
squareCPS x = cont $ \k -> k (square x)
pythagorasCPS :: Int -> Int -> Cont r Int
pythagorasCPS x y = cont $ \k ->
runCont (squareCPS x) $ \x2 ->
runCont (squareCPS y) $ \y2 ->
runCont (addCPS x2 y2) $ k
ghci> runCont (pythagorasCPS 3 4) id
25
newtype Cont r a = Cont { runCont :: (a -> r) -> r }
instance Monad (Cont r) where
return :: a -> Cont r a
return a =
(>>=) :: Cont r a -> (a -> Cont r b) -> Cont r b
Cont arr >>= f =
instance Monad (Cont r) where
return :: a -> Cont r a
return a = Cont ($ a)
(>>=) :: Cont r a -> (a -> Cont r b) -> Cont r b
Cont arr >>= f = Cont $ \br -> arr $ \a -> runCont (f a) br
-- arr :: (a -> r) -> r
-- br :: (b -> r)
-- f :: a -> Cont r b
addCPS :: Int -> Int -> Cont r Int
addCPS x y = return $ x + y
squareCPS :: Int -> Cont r Int
squareCPS = return . square
pythagorasCPS :: Int -> Int -> Cont r Int
pythagorasCPS x y = squareCPS x >>= \x2 ->
squareCPS y >>= \y2 ->
addCPS x2 y2
By ITMO CTD Haskell
Lecture about parser combinators, their goal. And the same ideas applied to testing combinators (especially in context of property-based testing).
Lecture slides on Functional programming course at the ITMO university CT department. You can find course description here: https://github.com/jagajaga/FP-Course-ITMO