{-# LANGUAGE Safe #-}
{-# LANGUAGE RecordWildCards, OverloadedLists #-}
{-# OPTIONS_HADDOCK hide #-}
module Text.Gigaparsec.Internal.Token.Symbol (
    Symbol, softKeyword, softOperator, mkSymbol, mkSym, lexeme
  ) where

import Text.Gigaparsec (Parsec, void, notFollowedBy, atomic, (<|>), empty)
import Text.Gigaparsec.Char (string, satisfy, char, strings)
import Text.Gigaparsec.Token.Descriptions ( SymbolDesc(SymbolDesc, hardKeywords, hardOperators, caseSensitive)
                                          , NameDesc(NameDesc, identifierLetter, operatorLetter)
                                          , CharPredicate
                                          )

import Data.Set qualified as Set (member, toList, fromList, null)
import Data.Char (toUpper, toLower, isLetter)
import Text.Gigaparsec.Errors.Combinator (amend, emptyWide, (<?>), label)
import Data.Set (Set)
import Data.Map qualified as Map (findWithDefault)
import Data.Maybe (mapMaybe)
import Text.Gigaparsec.Internal.Require (require)
import Text.Gigaparsec.Token.Errors (ErrorConfig (labelSymbolEndOfKeyword, labelSymbolEndOfOperator, labelSymbol), notConfigured)
import Text.Gigaparsec.Internal.Token.Errors (annotate)

{-|
This contains lexing functionality relevant to the parsing of atomic symbols.

Symbols are characterised by their "unitness", that is, every parser inside returns Unit. 
This is because they all parse a specific known entity, and, as such, the result of the parse is irrelevant. 
These can be things such as reserved names, or small symbols like parentheses. 

This type also contains a means of creating new symbols as well as implicit conversions 
to allow for Haskell's string literals (with @OverloadedStringLiterals@ enabled) to serve as symbols within a parser.
-}
type Symbol :: *
data Symbol = Symbol { 
  {- | This combinator parses a given soft keyword atomically: 
  the keyword is only valid if it is not followed directly by a character 
  which would make it a larger valid identifier.

  Soft keywords are keywords that are only reserved within certain contexts. 
  The 'Text.Gigaparsec.Token.Lexer.apply' combinator handles so-called hard keywords automatically, 
  as the given string is checked to see what class of symbol it might belong to.
  However, soft keywords are not included in this set, 
  as they are not always reserved in all situations. 
  As such, when a soft keyword does need to be parsed, 
  this combinator should be used to do it explicitly. 
  Care should be taken to ensure that soft keywords take
  parsing priority over identifiers when they do occur.
  -}
    Symbol -> String -> Parsec ()
softKeyword :: !(String -> Parsec ())
  {-|
  This combinator parses a given soft operator atomically:
  the operator is only valid if it is not followed directly by a character which 
  would make it a larger valid operator (reserved or otherwise).

  Soft operators are operators that are only reserved within certain contexts. 
  The apply combinator handles so-called hard operators automatically, 
  as the given string is checked to see what class of symbol it might belong to. 
  However, soft operators are not included in this set, 
  as they are not always reserved in all situations.
  As such, when a soft operator does need to be parsed, 
  this combinator should be used to do it explicitly.
  -}
  , Symbol -> String -> Parsec ()
softOperator :: !(String -> Parsec ())
  }

{-|
Create a 'Symbol' -- an interface for parsing atomic symbols -- according to the given descriptions.
-}
mkSymbol  :: SymbolDesc  -- ^ the description of symbols (keywords and operators).
          -> NameDesc    -- ^ the description of identifiers.
          -> ErrorConfig -- ^ how errors should be produced on failed parses.
          -> Symbol      -- ^ a collection of parsers for keywords and operators as described by the given descriptions.
mkSymbol :: SymbolDesc -> NameDesc -> ErrorConfig -> Symbol
mkSymbol SymbolDesc{Bool
Set String
hardKeywords :: SymbolDesc -> Set String
hardOperators :: SymbolDesc -> Set String
caseSensitive :: SymbolDesc -> Bool
hardKeywords :: Set String
hardOperators :: Set String
caseSensitive :: Bool
..} NameDesc{CharPredicate
identifierLetter :: NameDesc -> CharPredicate
operatorLetter :: NameDesc -> CharPredicate
identifierLetter :: CharPredicate
operatorLetter :: CharPredicate
..} !ErrorConfig
err = Symbol {String -> Parsec ()
softKeyword :: String -> Parsec ()
softOperator :: String -> Parsec ()
softKeyword :: String -> Parsec ()
softOperator :: String -> Parsec ()
..}
  where softKeyword :: String -> Parsec ()
softKeyword String
name = Bool -> String -> String -> Parsec () -> Parsec ()
forall a. Bool -> String -> String -> a -> a
require (Bool -> Bool
not (String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
name)) String
"softKeyword" String
"keywords may not be empty"
          (Bool -> CharPredicate -> String -> ErrorConfig -> Parsec ()
_softKeyword Bool
caseSensitive CharPredicate
identifierLetter String
name ErrorConfig
err Parsec () -> Set String -> Parsec ()
forall a. Parsec a -> Set String -> Parsec a
<?> [String
Item (Set String)
name])
        softOperator :: String -> Parsec ()
softOperator String
name = Bool -> String -> String -> Parsec () -> Parsec ()
forall a. Bool -> String -> String -> a -> a
require (Bool -> Bool
not (String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
name)) String
"softOperator" String
"operators may not be empty"
          (Set String -> CharPredicate -> String -> ErrorConfig -> Parsec ()
_softOperator Set String
hardOperators CharPredicate
operatorLetter String
name ErrorConfig
err Parsec () -> Set String -> Parsec ()
forall a. Parsec a -> Set String -> Parsec a
<?> [String
Item (Set String)
name])

{-|
Create a parser that parses the given string, but first checks if this is a hard keyword or operator, 
in which case these are parsed using the 'softKeyword' and 'softOperator' parsers instead.
-}
mkSym :: SymbolDesc             -- ^ @symbolDesc@, the description of symbols (keywords and operators)
      -> Symbol                 -- ^ @symbol@, a collection of parsers for symbols described by @symbolDesc@
      -> ErrorConfig            -- ^ how errors should be produced on failed parses.
      -> (String -> Parsec ())  -- ^ a function which parses the given string; if this string is a keyword or operator, 
                                -- ^ it is parsed using @symbol@.
mkSym :: SymbolDesc -> Symbol -> ErrorConfig -> String -> Parsec ()
mkSym SymbolDesc{Bool
Set String
hardKeywords :: SymbolDesc -> Set String
hardOperators :: SymbolDesc -> Set String
caseSensitive :: SymbolDesc -> Bool
hardKeywords :: Set String
hardOperators :: Set String
caseSensitive :: Bool
..} Symbol{String -> Parsec ()
softKeyword :: Symbol -> String -> Parsec ()
softOperator :: Symbol -> String -> Parsec ()
softKeyword :: String -> Parsec ()
softOperator :: String -> Parsec ()
..} !ErrorConfig
err String
str =
  LabelWithExplainConfig -> Parsec () -> Parsec ()
forall config a. Annotate config => config -> Parsec a -> Parsec a
forall a. LabelWithExplainConfig -> Parsec a -> Parsec a
annotate (LabelWithExplainConfig
-> String
-> Map String LabelWithExplainConfig
-> LabelWithExplainConfig
forall k a. Ord k => a -> k -> Map k a -> a
Map.findWithDefault LabelWithExplainConfig
forall config. NotConfigurable config => config
notConfigured String
str (ErrorConfig -> Map String LabelWithExplainConfig
labelSymbol ErrorConfig
err)) (Parsec () -> Parsec ()) -> Parsec () -> Parsec ()
forall a b. (a -> b) -> a -> b
$
    if | String -> Set String -> Bool
forall a. Ord a => a -> Set a -> Bool
Set.member String
str Set String
hardKeywords  -> String -> Parsec ()
softKeyword String
str
       | String -> Set String -> Bool
forall a. Ord a => a -> Set a -> Bool
Set.member String
str Set String
hardOperators -> String -> Parsec ()
softOperator String
str
       | Bool
otherwise                    -> Parsec String -> Parsec ()
forall a. Parsec a -> Parsec ()
void (Parsec String -> Parsec String
forall a. Parsec a -> Parsec a
atomic (String -> Parsec String
string String
str))

{-|
Given a whitespace consumer (or any function on a parser) and a 'Symbol' parser, create a 'Symbol' parser
where each constituent parser also applies this whitespace consumer after parsing.
-}
lexeme  :: (forall a. Parsec a -> Parsec a) -- ^ @f@, a parser transformer, usually consumes whitespace after running the parser
        -> Symbol -- ^ the symbol parser
        -> Symbol -- ^ a symbol parser with each constituent parser transformed by @f@.
lexeme :: (forall a. Parsec a -> Parsec a) -> Symbol -> Symbol
lexeme forall a. Parsec a -> Parsec a
lexe Symbol{String -> Parsec ()
softKeyword :: Symbol -> String -> Parsec ()
softOperator :: Symbol -> String -> Parsec ()
softKeyword :: String -> Parsec ()
softOperator :: String -> Parsec ()
..} = Symbol { softKeyword :: String -> Parsec ()
softKeyword = Parsec () -> Parsec ()
forall a. Parsec a -> Parsec a
lexe (Parsec () -> Parsec ())
-> (String -> Parsec ()) -> String -> Parsec ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Parsec ()
softKeyword
                                , softOperator :: String -> Parsec ()
softOperator = Parsec () -> Parsec ()
forall a. Parsec a -> Parsec a
lexe (Parsec () -> Parsec ())
-> (String -> Parsec ()) -> String -> Parsec ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Parsec ()
softOperator
                                }

_softKeyword :: Bool -> CharPredicate -> String -> ErrorConfig -> Parsec ()
_softKeyword :: Bool -> CharPredicate -> String -> ErrorConfig -> Parsec ()
_softKeyword !Bool
caseSensitive !CharPredicate
letter !String
kw !ErrorConfig
err
  | Bool -> Bool
not Bool
caseSensitive = Parsec () -> Parsec ()
forall a. Parsec a -> Parsec a
atomic (CharPredicate -> Parsec String -> Parsec ()
forall {a}. CharPredicate -> Parsec a -> Parsec ()
nfb CharPredicate
letter Parsec String
caseString) Parsec () -> Set String -> Parsec ()
forall a. Parsec a -> Set String -> Parsec a
<?> [String
Item (Set String)
kw]
  | Bool
otherwise         = Parsec () -> Parsec ()
forall a. Parsec a -> Parsec a
atomic (CharPredicate -> Parsec String -> Parsec ()
forall {a}. CharPredicate -> Parsec a -> Parsec ()
nfb CharPredicate
letter (String -> Parsec String
string String
kw)) Parsec () -> Set String -> Parsec ()
forall a. Parsec a -> Set String -> Parsec a
<?> [String
Item (Set String)
kw]
  where nfb :: CharPredicate -> Parsec a -> Parsec ()
nfb CharPredicate
Nothing Parsec a
p = Parsec a -> Parsec ()
forall a. Parsec a -> Parsec ()
void Parsec a
p
        nfb (Just Char -> Bool
c) Parsec a
p = Parsec a
p Parsec a -> Parsec () -> Parsec ()
forall a b. Parsec a -> Parsec b -> Parsec b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Parsec Char -> Parsec ()
forall a. Parsec a -> Parsec ()
notFollowedBy ((Char -> Bool) -> Parsec Char
satisfy Char -> Bool
c) Parsec () -> Set String -> Parsec ()
forall a. Parsec a -> Set String -> Parsec a
<?> [ErrorConfig -> String -> String
labelSymbolEndOfKeyword ErrorConfig
err String
kw])
        n :: Int
n = String -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
kw
        caseChar :: Char -> Parsec Char
caseChar Char
c
          | Char -> Bool
isLetter Char
c = Char -> Parsec Char
char (Char -> Char
toUpper Char
c) Parsec Char -> Parsec Char -> Parsec Char
forall a. Parsec a -> Parsec a -> Parsec a
<|> Char -> Parsec Char
char (Char -> Char
toLower Char
c)
          | Bool
otherwise  = Char -> Parsec Char
char Char
c
        caseString :: Parsec String
caseString = Parsec String -> Parsec String
forall a. Parsec a -> Parsec a
atomic (Parsec String -> Parsec String
forall a. Parsec a -> Parsec a
amend ((Char -> Parsec Char) -> String -> Parsec String
forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
forall (f :: * -> *) a b.
Applicative f =>
(a -> f b) -> [a] -> f [b]
traverse Char -> Parsec Char
caseChar String
kw))
                 Parsec String -> Parsec String -> Parsec String
forall a. Parsec a -> Parsec a -> Parsec a
<|> Word -> Parsec String
forall a. Word -> Parsec a
emptyWide (Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n)

-- TODO: trie-based implementation
_softOperator :: Set String -> CharPredicate -> String -> ErrorConfig -> Parsec ()
_softOperator :: Set String -> CharPredicate -> String -> ErrorConfig -> Parsec ()
_softOperator !Set String
hardOperators !CharPredicate
letter !String
op !ErrorConfig
err = Set String -> Parsec () -> Parsec ()
forall a. Set String -> Parsec a -> Parsec a
label [String
Item (Set String)
op] (Parsec () -> Parsec ()) -> Parsec () -> Parsec ()
forall a b. (a -> b) -> a -> b
$
  if Set String -> Bool
forall a. Set a -> Bool
Set.null Set String
ends then Parsec () -> Parsec ()
forall a. Parsec a -> Parsec a
atomic (String -> Parsec String
string String
op Parsec String -> Parsec () -> Parsec ()
forall a b. Parsec a -> Parsec b -> Parsec b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parsec Char -> Parsec ()
forall a. Parsec a -> Parsec ()
notFollowedBy Parsec Char
letter')
  else Parsec () -> Parsec ()
forall a. Parsec a -> Parsec a
atomic (String -> Parsec String
string String
op Parsec String -> Parsec () -> Parsec ()
forall a b. Parsec a -> Parsec b -> Parsec b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Parsec () -> Parsec ()
forall a. Parsec a -> Parsec ()
notFollowedBy (Parsec Char -> Parsec ()
forall a. Parsec a -> Parsec ()
void Parsec Char
letter' Parsec () -> Parsec () -> Parsec ()
forall a. Parsec a -> Parsec a -> Parsec a
<|> Parsec String -> Parsec ()
forall a. Parsec a -> Parsec ()
void (Set String -> Parsec String
strings Set String
ends)) Parsec () -> Set String -> Parsec ()
forall a. Parsec a -> Set String -> Parsec a
<?> [ErrorConfig -> String -> String
labelSymbolEndOfOperator ErrorConfig
err String
op]))
  where ends :: Set String
ends = [String] -> Set String
forall a. Ord a => [a] -> Set a
Set.fromList ((String -> Maybe String) -> [String] -> [String]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe ((String -> String -> Maybe String)
-> String -> String -> Maybe String
forall a b c. (a -> b -> c) -> b -> a -> c
flip String -> String -> Maybe String
forall {a}. Eq a => [a] -> [a] -> Maybe [a]
strip String
op) (Set String -> [String]
forall a. Set a -> [a]
Set.toList Set String
hardOperators))
        letter' :: Parsec Char
letter' = Parsec Char
-> ((Char -> Bool) -> Parsec Char) -> CharPredicate -> Parsec Char
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Parsec Char
forall a. Parsec a
empty (Char -> Bool) -> Parsec Char
satisfy CharPredicate
letter
        strip :: [a] -> [a] -> Maybe [a]
strip []      str :: [a]
str@(:){}          = [a] -> Maybe [a]
forall a. a -> Maybe a
Just [a]
str
        strip (a
c:[a]
pre) (a
c':[a]
str) | a
c a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
c' = [a] -> [a] -> Maybe [a]
strip [a]
pre [a]
str
        strip [a]
_       [a]
_                  = Maybe [a]
forall a. Maybe a
Nothing

-- TODO: HasField instances for the dot/comma/etc?
-- FIXME: to make these work, well need to move sym into Symbol?
{-dot :: Symbol -> Parsec ()
dot =
-}