{-# LANGUAGE Safe #-}
{-# LANGUAGE ViewPatterns #-}
{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
module Text.Gigaparsec.Errors.TokenExtractors (
Token(..), TokenExtractor,
tillNextWhitespace,
singleChar,
matchParserDemand,
lexToken, lexTokenWithSelect
) where
import Text.Gigaparsec (
Parsec, Result(Success), parse,
atomic, lookAhead, notFollowedBy, some, (<+>), (<~>), mapMaybeS
)
import Text.Gigaparsec.Char (item)
import Text.Gigaparsec.Combinator (option)
import Text.Gigaparsec.Position (offset)
import Data.Char (generalCategory, ord, GeneralCategory(Format, Surrogate, PrivateUse, NotAssigned, Control))
import Data.Char qualified as Char (isSpace)
import Data.List.NonEmpty (NonEmpty((:|)), nonEmpty)
import Data.List.NonEmpty qualified as NonEmpty (toList)
import Data.Void (Void)
import Data.Foldable (maximumBy)
import Numeric (showHex)
import Data.Function (on)
import Data.Maybe (catMaybes)
type TokenExtractor :: *
type = NonEmpty Char
-> Word
-> Bool
-> Token
type Token :: *
data Token = Raw
!String
| Named
!String
{-# UNPACK #-} !Word
{-# INLINABLE tillNextWhitespace #-}
tillNextWhitespace :: Bool
-> (Char -> Bool)
-> TokenExtractor
tillNextWhitespace :: Bool -> (Char -> Bool) -> TokenExtractor
tillNextWhitespace Bool
_ Char -> Bool
_ (NonEmpty Char -> Maybe Token
whitespaceOrUnprintable -> Just Token
tok) Word
_ Bool
_ = Token
tok
tillNextWhitespace Bool
trimToDemand Char -> Bool
isSpace (Char
c :| [Char]
cs) Word
parserDemanded Bool
_
| Bool
trimToDemand = [Char] -> Token
Raw (Int -> [Char] -> [Char]
forall a. Int -> [a] -> [a]
take (Word -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
parserDemanded) ([Char] -> [Char]
tillSpace (Char
cChar -> [Char] -> [Char]
forall a. a -> [a] -> [a]
:[Char]
cs)))
| Bool
otherwise = [Char] -> Token
Raw ([Char] -> [Char]
tillSpace (Char
cChar -> [Char] -> [Char]
forall a. a -> [a] -> [a]
:[Char]
cs))
where tillSpace :: [Char] -> [Char]
tillSpace = (Char -> Bool) -> [Char] -> [Char]
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isSpace)
{-# INLINABLE singleChar #-}
singleChar :: TokenExtractor
singleChar :: TokenExtractor
singleChar (NonEmpty Char -> Maybe Token
whitespaceOrUnprintable -> Just Token
tok) Word
_ Bool
_ = Token
tok
singleChar (Char
c :| [Char]
_) Word
_ Bool
_ = [Char] -> Token
Raw [Char
c]
{-# INLINABLE matchParserDemand #-}
matchParserDemand :: TokenExtractor
matchParserDemand :: TokenExtractor
matchParserDemand (NonEmpty Char -> Maybe Token
whitespaceOrUnprintable -> Just Token
tok) Word
_ Bool
_ = Token
tok
matchParserDemand (Char
c :| [Char]
cs) Word
parserDemanded Bool
_ = [Char] -> Token
Raw (Int -> [Char] -> [Char]
forall a. Int -> [a] -> [a]
take (Word -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
parserDemanded) (Char
cChar -> [Char] -> [Char]
forall a. a -> [a] -> [a]
:[Char]
cs))
{-# INLINABLE whitespaceOrUnprintable #-}
whitespaceOrUnprintable :: NonEmpty Char -> Maybe Token
whitespaceOrUnprintable :: NonEmpty Char -> Maybe Token
whitespaceOrUnprintable (Char
'\n' :| [Char]
_) = Token -> Maybe Token
forall a. a -> Maybe a
Just (Token -> Maybe Token) -> Token -> Maybe Token
forall a b. (a -> b) -> a -> b
$ [Char] -> Word -> Token
Named [Char]
"newline" Word
1
whitespaceOrUnprintable (Char
'\r' :| [Char]
_) = Token -> Maybe Token
forall a. a -> Maybe a
Just (Token -> Maybe Token) -> Token -> Maybe Token
forall a b. (a -> b) -> a -> b
$ [Char] -> Word -> Token
Named [Char]
"carriage return" Word
1
whitespaceOrUnprintable (Char
'\t' :| [Char]
_) = Token -> Maybe Token
forall a. a -> Maybe a
Just (Token -> Maybe Token) -> Token -> Maybe Token
forall a b. (a -> b) -> a -> b
$ [Char] -> Word -> Token
Named [Char]
"tab" Word
1
whitespaceOrUnprintable (Char
' ' :| [Char]
_) = Token -> Maybe Token
forall a. a -> Maybe a
Just (Token -> Maybe Token) -> Token -> Maybe Token
forall a b. (a -> b) -> a -> b
$ [Char] -> Word -> Token
Named [Char]
"space" Word
1
whitespaceOrUnprintable (Char
c :| [Char]
_)
| Char -> Bool
Char.isSpace Char
c = Token -> Maybe Token
forall a. a -> Maybe a
Just (Token -> Maybe Token) -> Token -> Maybe Token
forall a b. (a -> b) -> a -> b
$ [Char] -> Word -> Token
Named [Char]
"whitespace character" Word
1
| Bool
otherwise = case Char -> GeneralCategory
generalCategory Char
c of
GeneralCategory
Format -> Maybe Token
unprintable
GeneralCategory
Surrogate -> Maybe Token
unprintable
GeneralCategory
PrivateUse -> Maybe Token
unprintable
GeneralCategory
NotAssigned -> Maybe Token
unprintable
GeneralCategory
Control -> Maybe Token
unprintable
GeneralCategory
_ -> Maybe Token
forall a. Maybe a
Nothing
where unprintable :: Maybe Token
unprintable = Token -> Maybe Token
forall a. a -> Maybe a
Just (Token -> Maybe Token) -> Token -> Maybe Token
forall a b. (a -> b) -> a -> b
$ [Char] -> Word -> Token
Named ([Char]
"non-printable character (\\x" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char] -> [Char]
forall a. Integral a => a -> [Char] -> [Char]
showHex (Char -> Int
ord Char
c) [Char]
")") Word
1
{-# INLINABLE lexToken #-}
lexToken :: [Parsec String]
-> TokenExtractor
-> TokenExtractor
lexToken :: [Parsec [Char]] -> TokenExtractor -> TokenExtractor
lexToken = (NonEmpty ([Char], Word) -> ([Char], Word))
-> [Parsec [Char]] -> TokenExtractor -> TokenExtractor
lexTokenWithSelect ((([Char], Word) -> ([Char], Word) -> Ordering)
-> NonEmpty ([Char], Word) -> ([Char], Word)
forall (t :: * -> *) a.
Foldable t =>
(a -> a -> Ordering) -> t a -> a
maximumBy (Word -> Word -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Word -> Word -> Ordering)
-> (([Char], Word) -> Word)
-> ([Char], Word)
-> ([Char], Word)
-> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` ([Char], Word) -> Word
forall a b. (a, b) -> b
snd))
{-# INLINABLE lexTokenWithSelect #-}
lexTokenWithSelect :: (NonEmpty (String, Word) -> (String, Word))
-> [Parsec String]
-> TokenExtractor
-> TokenExtractor
lexTokenWithSelect :: (NonEmpty ([Char], Word) -> ([Char], Word))
-> [Parsec [Char]] -> TokenExtractor -> TokenExtractor
lexTokenWithSelect NonEmpty ([Char], Word) -> ([Char], Word)
selectToken [Parsec [Char]]
tokens TokenExtractor
extractItem NonEmpty Char
cs Word
parserDemanded Bool
lexical
| Bool
lexical = TokenExtractor
extractItem NonEmpty Char
cs Word
parserDemanded Bool
True
| Bool
otherwise = NonEmpty Char -> Token
extractToken NonEmpty Char
cs
where
extractToken :: NonEmpty Char -> Token
extractToken :: NonEmpty Char -> Token
extractToken NonEmpty Char
inp =
let Success Either (NonEmpty ([Char], Word)) [Char]
rawOrToks = forall err a.
ErrorBuilder err =>
Parsec a -> [Char] -> Result err a
parse @Void Parsec (Either (NonEmpty ([Char], Word)) [Char])
parser (NonEmpty Char -> [Char]
forall a. NonEmpty a -> [a]
NonEmpty.toList NonEmpty Char
inp)
in (NonEmpty ([Char], Word) -> Token)
-> ([Char] -> Token)
-> Either (NonEmpty ([Char], Word)) [Char]
-> Token
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (([Char] -> Word -> Token) -> ([Char], Word) -> Token
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry [Char] -> Word -> Token
Named (([Char], Word) -> Token)
-> (NonEmpty ([Char], Word) -> ([Char], Word))
-> NonEmpty ([Char], Word)
-> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. NonEmpty ([Char], Word) -> ([Char], Word)
selectToken) [Char] -> Token
Raw Either (NonEmpty ([Char], Word)) [Char]
rawOrToks
parser :: Parsec (Either (NonEmpty (String, Word)) String)
parser :: Parsec (Either (NonEmpty ([Char], Word)) [Char])
parser =
let toks :: Parsec (NonEmpty ([Char], Word))
toks = ([Maybe ([Char], Word)] -> Maybe (NonEmpty ([Char], Word)))
-> Parsec [Maybe ([Char], Word)]
-> Parsec (NonEmpty ([Char], Word))
forall a b. (a -> Maybe b) -> Parsec a -> Parsec b
mapMaybeS ([([Char], Word)] -> Maybe (NonEmpty ([Char], Word))
forall a. [a] -> Maybe (NonEmpty a)
nonEmpty ([([Char], Word)] -> Maybe (NonEmpty ([Char], Word)))
-> ([Maybe ([Char], Word)] -> [([Char], Word)])
-> [Maybe ([Char], Word)]
-> Maybe (NonEmpty ([Char], Word))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Maybe ([Char], Word)] -> [([Char], Word)]
forall a. [Maybe a] -> [a]
catMaybes)
((Parsec [Char] -> Parsec (Maybe ([Char], Word)))
-> [Parsec [Char]] -> Parsec [Maybe ([Char], Word)]
forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
forall (f :: * -> *) a b.
Applicative f =>
(a -> f b) -> [a] -> f [b]
traverse (\Parsec [Char]
t -> Parsec ([Char], Word) -> Parsec (Maybe ([Char], Word))
forall a. Parsec a -> Parsec (Maybe a)
option (Parsec ([Char], Word) -> Parsec ([Char], Word)
forall a. Parsec a -> Parsec a
lookAhead (Parsec [Char] -> Parsec [Char]
forall a. Parsec a -> Parsec a
atomic Parsec [Char]
t Parsec [Char] -> Parsec Word -> Parsec ([Char], Word)
forall a b. Parsec a -> Parsec b -> Parsec (a, b)
<~> Parsec Word
offset))) [Parsec [Char]]
tokens)
rawTok :: Parsec [Char]
rawTok = Parsec Char -> Parsec [Char]
forall a. Parsec a -> Parsec [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
some ((Parsec [Char] -> Parsec ()) -> [Parsec [Char]] -> Parsec [()]
forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
forall (f :: * -> *) a b.
Applicative f =>
(a -> f b) -> [a] -> f [b]
traverse Parsec [Char] -> Parsec ()
forall a. Parsec a -> Parsec ()
notFollowedBy [Parsec [Char]]
tokens Parsec [()] -> Parsec Char -> Parsec Char
forall a b. Parsec a -> Parsec b -> Parsec b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parsec Char
item)
in Parsec (NonEmpty ([Char], Word))
toks Parsec (NonEmpty ([Char], Word))
-> Parsec [Char]
-> Parsec (Either (NonEmpty ([Char], Word)) [Char])
forall a b. Parsec a -> Parsec b -> Parsec (Either a b)
<+> Parsec [Char]
rawTok