Open in CoCalc x

Basic Parser

Welcome to the Haskell World! In this notebook, we present you a basic parser that allows you to pass in a Rule and get an output order based on the defined rule. Have fun with IHaskell!

In [1]:
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances,TypeSynonymInstances #-}
import qualified Text.Parsec as Parsec
import Text.Parsec ((<?>))
--import Control.Applicative
import Text.Show
import Data.List
import Text.Parsec.Combinator
import qualified Data.Set as Set
import Control.Monad
import qualified Data.Map as Map
import Text.ParserCombinators.Parsec hiding (State)
import System.IO
import Control.Monad.State
import qualified Data.Set as Set
import System.Console.Haskeline
import System.Console.Haskeline.History
import Data.Maybe
import Data.Text.Internal.Read
import Data.Char
import Data.Function (on)


symbol :: Parser Char
symbol = oneOf "!#$%&|*+-:/[email protected]^_~"

parseConstant :: Parser SemVal
parseConstant = do
    first <- letter <|> char '%'
    rest <- many (letter <|> digit <|> symbol)
    return $ Constant (first:rest)

parseNumber :: Parser SemVal
parseNumber = liftM (Number . read) $ many1 digit

parseList :: Parser SemVal
parseList = liftM List $ sepBy parseExpr comma_

parseSet :: Parser SemVal
parseSet = liftM Set $ liftM Set.fromList $ sepBy parseExpr comma_

parseFunction :: Parser SemVal
parseFunction = parens (do
  func <- parseExpr <|>
          do  f<-many (letter <|> digit <|> symbol) 
              return $ Constant f
  spaces
  args <- sepBy parseExpr spaces
  return $ Function func args)

parseLambda :: Parser SemVal
parseLambda = do
  char '\\'
  spaces
  vars <- endBy parseVariable spaces
  spaces
  body <- parseExpr
  return $ Lambda vars body nullSemState

parseVariable :: Parser SemVal
parseVariable = do
  typ <- char '$' <|> char '#'
  var <- many1 (letter <|> digit <|> symbol)
  val <- optionMaybe $ try equals_ >> parseExpr
  return $ maybe  (Variable $ typ:var)
                  (Function (Constant "set") . ([Variable $ typ:var] ++) . (:[])) val

parseTree :: Parser SemVal
parseTree = brackets $ do
  x <- parseLeftNode
  spaces
  y <- parseRightNode
  return $ Branch ""x y

parseLeaf :: Parser SemVal
parseLeaf = liftM Leaf $ many1 (letter <|> digit <|> symbol)

parseLeftNode :: Parser SemVal
parseLeftNode = parseTree <|> parseLeaf

parseRightNode :: Parser SemVal
parseRightNode = parseTree <|> try parseRightBranch <|> parseLeaf

parseRightBranch :: Parser SemVal
parseRightBranch = do
  x <- parseLeftNode
  spaces
  y <- parseRightNode
  return $ Branch "" x y
            
spacy     = between spaces spaces
angles    = between (char '<') (char '>')
comma_    = spacy (char ',')
braces    = between (char '{') (char '}')
equals_   = spacy (char '=')
brackets  = between (spacy $ char '[') (spacy $ char ']')
parens    = between (spacy $ char '(') (spacy $ char ')')

parseExpr :: Parser SemVal
parseExpr = parseConstant
            <|> parseNumber 
            <|> angles parseList
            <|> braces parseSet
            <|> parseFunction
            <|> parseLambda
            <|> parseVariable
            <|> try (brackets parseLeaf)
            <|> parseTree




data SemVal = Constant String
             | List [SemVal]
             | Set (Set.Set SemVal)
             | Number Integer
             | Function SemVal [SemVal]
             | Lambda [SemVal] SemVal SemState
             | Variable String
             | Leaf String 
             | Branch String SemVal SemVal
             | Error
             deriving (Ord, Eq)

data SemState = SemState  (Map.Map String SemVal) -- Regular Variables
                          (Map.Map String SemVal) -- Words
                    deriving (Ord, Eq, Show)

nullSemState :: SemState
nullSemState = SemState Map.empty Map.empty

instance Show SemVal where
  show (Constant x@(h:t)) = if h == '%' then latex t else x
  show (List x) = "⟨" ++ (intercalate ", " $ map show x) ++ "⟩"
  show (Number x) = show x
  show (Set x) 
      | Set.null x    = "False"
      | Set.isSubsetOf x (Set.fromList [List []]) = "True"
      | otherwise     = "{" ++ (intercalate ", " $ map show $ Set.toList x) ++ "}"
  show (Variable x) = show x
  show (Function x xs) = show x ++ " " ++ show xs
  show (Lambda vars f env) 
      = "(λ " ++ (intercalate " " $ map show vars) ++ " . " 
            ++ show env ++ " in " ++ show f ++ ")"
  show (Leaf x) = "L" ++ x
  show (Branch "" x y) = "[" ++ (intercalate " " [show x, show y]) ++ "]"
  show (Branch s x y) = "[" ++ (intercalate " " [s, show x, show y]) ++ "]"
  show _ = "Error"

latex "in"    = "∈"
latex "alpha" = "α"
latex "beta"  = "β"
latex "gamma" = "γ"
latex "delta" = "δ"
latex "lambda" = "λ"

run rule text = Parsec.parse rule "(source)" text
-----------------------------------------------------------------------------------------------
data Rule = Rule String 
            [String] 
            [String]

-- Still need to fix show instance so that it takes a list of arbitrary length
instance Show Rule where
    show (Rule a b c) = "Rule " ++ a ++ " [" ++ (intercalate ", " b) ++ "] [" ++ (intercalate ", " c) ++ "]"

updatedParser :: Parsec.Parsec String () (Rule)
updatedParser = do
    head <- many1 letter
    spaces
    char '-'
    char '>'
    spaces
    leftL <- endBy1 (many1 letter) (spaces)
    char ':'
    spaces
    rightL <- endBy1 (many1 letter) (spaces)
    return (Rule head leftL rightL)  



-- Getting started (For the week of 6/3/2019 - 6/7/2019)
-- Generic implemetation, unnecessary code will be removed/cleaned up later

-- Helper function to slice substring from a string
slice :: Int -> Int -> String -> String
slice start end s_in = drop start (take end s_in)

-- Helper function to split a string into a list by space
-- Usage: split "Your String" []
split [] t = [t]
split (a:l) t = if a==' ' then (t:split l []) else split l (t++[a])

getParser :: String -> Parsec.Parsec String () String
getParser "Expr" = do
                   x <- parseExpr
                   spaces
                   return ((show x) ++ " ") -- add space at the end to separate arguments 
getParser "Symbol" = do
                   s <- many1 symbol
                   spaces
                   return (s ++ " ") 

stringToIndex :: String -> Int
stringToIndex s = (read s :: Int) - 1

reorder :: [String] -> [String] -> [String]
reorder indexes list = [list !! x | x <- (map stringToIndex indexes)]

{-  iParser Algorithm
    1. use foldl1 to concatenate all the parsers
    2. x outputs a string in the format "Arg1{space}Arg2{space}...etc"
    3. Split x into a list by space -> [Arg1, Arg2, Arg3....]
    4. reorder x by indices, and output the result
-}
iParser :: Rule -> Parsec.Parsec String () String --(String)
iParser (Rule _ listB listC) = do
                               x <- foldl1 (liftM2 (++)) (map getParser listB)
                               -- filter out "" from list, since we always append white space at the end of the output in getParser. This can avoid error occur in the split helper function
                               let y = filter (not . null) (split x [])
                               return (intercalate " " (reorder listC y))

-- compute :: String -> String
-- compute s = do 

-- Now you can create a parser using the iParser function that takes in a Rule
let p = iParser (Rule "S" ["Expr", "Symbol", "Expr"] ["3", "1", "2"])
let x = iParser (Rule "S" ["Expr", "Symbol", "Expr"] ["2", "3", "1"])
In [5]:
-- Sample code
3+1000
run p "2+3"
run x "2+4"
1003
Right "3 2 +"
Right "+ 4 2"
In [3]:
-- <script>
--   function code_toggle() {
--     if (code_shown){
--       $('div.input').hide('500');
--       $('#toggleButton').val('Show Code')
--     } else {
--       $('div.input').show('500');
--       $('#toggleButton').val('Hide Code')
--     }
--     code_shown = !code_shown
--   }

--   $( document ).ready(function(){
--     code_shown=false;
--     $('div.input').hide()
--   });
-- </script>
-- <form action="javascript:code_toggle()"><input type="submit" id="toggleButton" value="Show Code"></form>
In [ ]: