{-# LANGUAGE Safe #-}
{-# LANGUAGE OverloadedLists #-}
{-# OPTIONS_GHC -Wno-partial-fields #-}
-- TODO: In next major, don't expose the constructors of the descriptions,
-- we want them built up by record copy for forwards compatible evolution
-- We can move this into an internal module to accommodate that if we want
{-|
Module      : Text.Gigaparsec.Token.Descriptions
Description : This module contains the descriptions of various lexical structures to configure the lexer.
License     : BSD-3-Clause
Maintainer  : Jamie Willis, Gigaparsec Maintainers
Stability   : experimental

This module contains the descriptions of various lexical structures to configure the lexer.

Many languages share common lexical tokens, such as numeric and string literals.
Writing lexers turning these strings into tokens is effectively boilerplate.
A __Description__ encodes how to lex one of these common tokens.
Feeding a 'LexicalDesc' to a 'Text.Gigaparsec.Token.Lexer.Lexer' provides many combinators
for dealing with these tokens.

==== Usage
Rather than use the internal constructors, such as @NameDesc@, one should extend the \'@plain@\' definitions with record field updates.
For example,

@
myLexicalDesc = plain
  { nameDesc = myNameDesc
  , textDesc = myTextDesc
  }
@

will produce a description that overrides the default name and text descriptions by those given.
See 'plainName', 'plainSymbol', 'plainNumeric', 'plainText' and 'plainSpace' for further examples.

@since 0.2.2.0
-}
module Text.Gigaparsec.Token.Descriptions (
  -- * Lexical Descriptions
  {-|
  A lexer is configured by extending the default 'plain' template, producing a 'LexicalDesc'.

  * 'LexicalDesc'
  * 'plain'

  -}
  -- ** Name Descriptions
  {-|
  A 'NameDesc' configures the lexing of name-like tokens, such as variable and function names.
  To create a 'NameDesc', use 'plainName', and configure it to your liking with record updates.

  * 'NameDesc'

      * 'identifierStart'
      * 'identifierLetter'
      * 'operatorStart'
      * 'operatorLetter'

  * 'plainName'

  -}
  -- ** Symbol Descriptions
  {-|
  A 'SymbolDesc' configures the lexing of \'symbols\' (textual literals), such as keywords and operators.
  To create a 'SymbolDesc', use 'plainSymbol' and configure it to your liking with record updates.

  * 'SymbolDesc'

      * 'hardKeywords'
      * 'hardOperators'
      * 'caseSensitive'

  * 'plainSymbol'

  -}
  -- ** Numeric Descriptions
  {-|
  A 'NumericDesc' configures the lexing of numeric literals, such as integer and floating point literals.
  To create a 'NumericDesc', use 'plainNumeric' and configure it to your liking with record updates.
  Also see 'ExponentDesc', 'BreakCharDesc', and 'PlusSignPresence', for further configuration options.

  * 'NumericDesc'

      * 'literalBreakChar'
      * 'leadingDotAllowed'
      * 'trailingDotAllowed'
      * 'leadingZerosAllowed'
      * 'positiveSign'
      * 'integerNumbersCanBeHexadecimal'
      * 'integerNumbersCanBeOctal'
      * 'integerNumbersCanBeBinary'
      * 'realNumbersCanBeHexadecimal'
      * 'realNumbersCanBeOctal'
      * 'realNumbersCanBeBinary'
      * 'hexadecimalLeads'
      * 'octalLeads'
      * 'binaryLeads'
      * 'decimalExponentDesc'
      * 'hexadecimalExponentDesc'
      * 'octalExponentDesc'
      * 'binaryExponentDesc'

  * 'plainNumeric'
  -}
  -- *** Exponent Descriptions
  {-|
  An 'ExponentDesc' configures scientific exponent notation.

    * 'ExponentDesc'

        * 'NoExponents'
        * 'ExponentsSupported'

            * 'compulsory'
            * 'chars'
            * 'base'
            * 'expSign'
            * 'expLeadingZerosAllowd'
  -}
  -- *** Break-Characters in Numeric Literals
  {-|
  Some languages allow a single numeric literal to be separated by a \'break\' symbol.

    * 'BreakCharDesc'

      * 'NoBreakChar'
      * 'BreakCharSupported'

          * 'breakChar'
          * 'allowedAfterNonDecimalPrefix'
  -}
  -- *** Numeric Literal Prefix Configuration
  {-|

  * 'PlusSignPresence'

      * 'PlusRequired'
      * 'PlusOptional'
      * 'PlusIllegal'
  -}
  -- ** Text Descriptions
  {-|
  A 'TextDesc' configures the lexing of string and character literals, as well as escaped numeric literals.
  To create a 'TextDesc', use 'plainText' and configure it to your liking with record updates.
  See 'EscapeDesc', 'NumericEscape' and 'NumberOfDigits' for further configuration of escape sequences and escaped numeric literals.

  * 'TextDesc'

      * 'escapeSequences'
      * 'characterLiteralEnd'
      * 'stringEnds'
      * 'multiStringEnds'
      * 'graphicCharacter'

  * 'plainText'
  -}
  -- *** Escape Character Descriptions
  {-|
  Configuration of escape sequences, such as tabs @\t@ and newlines @\n@, and
  escaped numbers, such as hexadecimals @0x...@ and binary @0b...@.

  * 'EscapeDesc'

      * 'escBegin'
      * 'literals'
      * 'mapping'
      * 'decimalEscape'
      * 'hexadecimalEscape'
      * 'octalEscape'
      * 'binaryEscape'
      * 'emptyEscape'
      * 'gapsSupported'

  * 'plainEscape'
  -}
  -- *** Numeric Escape Sequences
  {-|
  Configuration of escaped numeric literals.
  For example, hexadecimals, @0x...@.

  * 'NumericEscape'

      * 'NumericIllegal'
      * 'NumericSupported'

          * 'prefix'
          * 'numDigits'
          * 'maxValue'

  * 'NumberOfDigits'

      * 'Unbounded'
      * 'Exactly'
      * 'AtMost'

  -}
  -- ** Whitespace and Comment Descriptions
  {-|
  A 'SpaceDesc' configures the lexing whitespace and comments.
  To create a 'SpaceDesc', use 'plainSpace' and configure it to your liking with record updates.

  * 'SpaceDesc'

      * 'lineCommentStart'
      * 'lineCommentAllowsEOF'
      * 'multiLineCommentStart'
      * 'multiLineCommentEnd'
      * 'multiLineNestedComments'
      * 'space'
      * 'whitespaceIsContextDependent'

  * 'plainSpace'
  * 'CharPredicate'

  -}
  module Text.Gigaparsec.Token.Descriptions
  ) where

import Data.Char (isSpace)
import Data.Set (Set)
import Data.Map (Map)
import Data.List.NonEmpty (NonEmpty)

{-|
This type describes the aggregation of a bunch of different sub-configurations for lexing a specific language.

See the 'plain' smart constructor to define a @LexicalDesc@.
-}
type LexicalDesc :: *
data LexicalDesc = LexicalDesc {
  -- | the description of name-like lexemes
     LexicalDesc -> NameDesc
nameDesc :: {-# UNPACK #-} !NameDesc
  -- | the description of specific symbolic lexemes
  , LexicalDesc -> SymbolDesc
symbolDesc :: {-# UNPACK #-} !SymbolDesc
  -- | the description of numeric literals
  , LexicalDesc -> NumericDesc
numericDesc :: {-# UNPACK #-} !NumericDesc
  -- | the description of text literals
  , LexicalDesc -> TextDesc
textDesc :: {-# UNPACK #-} !TextDesc
  -- | the description of whitespace
  , LexicalDesc -> SpaceDesc
spaceDesc :: {-# UNPACK #-} !SpaceDesc
  }

{-|
This lexical description contains the template @plain\<...\>@ descriptions defined in this module.
See 'plainName', 'plainSymbol', 'plainNumeric', 'plainText' and 'plainSpace' for how this description configures the lexer.
-}
plain :: LexicalDesc
plain :: LexicalDesc
plain = LexicalDesc { nameDesc :: NameDesc
nameDesc = NameDesc
plainName
                    , symbolDesc :: SymbolDesc
symbolDesc = SymbolDesc
plainSymbol
                    , numericDesc :: NumericDesc
numericDesc = NumericDesc
plainNumeric
                    , textDesc :: TextDesc
textDesc = TextDesc
plainText
                    , spaceDesc :: SpaceDesc
spaceDesc = SpaceDesc
plainSpace
                    }

{-|
This type describes how name-like things are described lexically.

In particular, this defines which characters will constitute identifiers and operators.

See the 'plainName' smart constructor for how to implement a custom name description.
-}
type NameDesc :: *
data NameDesc = NameDesc {
  -- | the characters that start an identifier
    NameDesc -> CharPredicate
identifierStart :: !CharPredicate
  -- | the characters that continue an identifier
  , NameDesc -> CharPredicate
identifierLetter :: !CharPredicate
  -- | the characters that start a user-defined operator
  , NameDesc -> CharPredicate
operatorStart :: !CharPredicate
  -- | the characters that continue a user-defined operator
  , NameDesc -> CharPredicate
operatorLetter :: !CharPredicate
  }

{-|
This is a blank name description template, which should be extended to form a custom name description.

In its default state, 'plainName' makes no characters able to be part of an identifier or operator.
To change this, one should use record field copies, for example:

@
myNameDesc :: NameDesc
myNameDesc = plainName
  { identifierStart = myIdentifierStartPredicate
  , identifierLetter = myIdentifierLetterPredicate
  }
@

@myNameDesc@ with then lex identifiers according to the given predicates.

-}
plainName :: NameDesc
plainName :: NameDesc
plainName = NameDesc { identifierStart :: CharPredicate
identifierStart = CharPredicate
forall a. Maybe a
Nothing
                     , identifierLetter :: CharPredicate
identifierLetter = CharPredicate
forall a. Maybe a
Nothing
                     , operatorStart :: CharPredicate
operatorStart = CharPredicate
forall a. Maybe a
Nothing
                     , operatorLetter :: CharPredicate
operatorLetter = CharPredicate
forall a. Maybe a
Nothing
                     }


{-|
This type describes how symbols (textual literals in a BNF) should be processed lexically, including keywords and operators.

This includes keywords and (hard) operators that are reserved by the language.
For example, in Haskell, "data" is a keyword, and "->" is a hard operator.

See the 'plainSymbol' smart constructor for how to implement a custom name description.
-}
type SymbolDesc :: *
data SymbolDesc = SymbolDesc {
  -- | what keywords are always treated as keywords within the language.
    SymbolDesc -> Set String
hardKeywords :: !(Set String)
  -- | what operators are always treated as reserved operators within the language.
  , SymbolDesc -> Set String
hardOperators :: !(Set String)
  -- | @True@ if the keywords are case sensitive, @False@ if not (so that e.g. @IF = if@).
  , SymbolDesc -> Bool
caseSensitive :: !Bool
  }

{-|
This is a blank symbol description template, which should be extended to form a custom symbol description.

In its default state, 'plainSymbol' has no keywords or reserved/hard operators.
To change this, one should use record field copies, for example:

@
{-# LANGUAGE OverloadedLists #-} -- This lets us write @[a,b]@ to get a 'Data.Set' containing @a@ and @b@
                                 -- If you don't want to use this, just use @'Data.Set.fromList' [a,b]@
mySymbolDesc :: SymbolDesc
mySymbolDesc = plainSymbol
  { hardKeywords = ["data", "where"]
  , hardOperators = ["->"]
  , caseSensitive = True
  }
@

@mySymbolDesc@ with then treat @data@ and @where@ as keywords, and @->@ as a reserved operator.

-}
plainSymbol :: SymbolDesc
plainSymbol :: SymbolDesc
plainSymbol = SymbolDesc { hardKeywords :: Set String
hardKeywords = []
                         , hardOperators :: Set String
hardOperators = []
                         , caseSensitive :: Bool
caseSensitive = Bool
True
                         }

{-|
This type describes how numeric literals (integers, decimals, hexadecimals, etc...), should be lexically processed.
-}
type NumericDesc :: *
data NumericDesc = NumericDesc {
  -- | can breaks be found within numeric literals? (see 'BreakCharDesc')
    NumericDesc -> BreakCharDesc
literalBreakChar :: !BreakCharDesc
  -- | can a real number omit a leading 0 before the point?
  , NumericDesc -> Bool
leadingDotAllowed :: !Bool
  -- | can a real number omit a trailing 0 after the point?
  , NumericDesc -> Bool
trailingDotAllowed :: !Bool
  -- | are extraneous zeros allowed at the start of decimal numbers?
  , NumericDesc -> Bool
leadingZerosAllowed :: !Bool
  -- | describes if positive (+) signs are allowed, compulsory, or illegal.
  , NumericDesc -> PlusSignPresence
positiveSign :: !PlusSignPresence
  -- generic number
  -- | can generic "integer numbers" to be hexadecimal?
  , NumericDesc -> Bool
integerNumbersCanBeHexadecimal :: !Bool
  -- | can generic "integer numbers" to be octal?
  , NumericDesc -> Bool
integerNumbersCanBeOctal :: !Bool
  -- | can generic "integer numbers" to be binary?
  , NumericDesc -> Bool
integerNumbersCanBeBinary :: !Bool
  -- | can generic "real numbers" to be hexadecimal?
  , NumericDesc -> Bool
realNumbersCanBeHexadecimal :: !Bool
  -- | can generic "real numbers" to be octal?
  , NumericDesc -> Bool
realNumbersCanBeOctal :: !Bool
  -- | can generic "real numbers" to be binary?
  , NumericDesc -> Bool
realNumbersCanBeBinary :: !Bool
  -- special literals
  -- | the characters that begin a hexadecimal literal following a 0 (may be empty).
  , NumericDesc -> Set Char
hexadecimalLeads :: !(Set Char)
  -- | the characters that begin an octal literal following a 0 (may be empty).
  , NumericDesc -> Set Char
octalLeads :: !(Set Char)
  -- | the characters that begin a binary literal following a 0 (may be empty).
  , NumericDesc -> Set Char
binaryLeads :: !(Set Char)

  -- exponents
  -- | describes how scientific exponent notation should work for decimal literals.
  , NumericDesc -> ExponentDesc
decimalExponentDesc :: !ExponentDesc
  -- | describes how scientific exponent notation should work for hexadecimal literals.
  , NumericDesc -> ExponentDesc
hexadecimalExponentDesc :: !ExponentDesc
  -- | describes how scientific exponent notation should work for octal literals.
  , NumericDesc -> ExponentDesc
octalExponentDesc :: !ExponentDesc
  -- | describes how scientific exponent notation should work for binary literals.
  , NumericDesc -> ExponentDesc
binaryExponentDesc :: !ExponentDesc
  }

{-|
This is a blank numeric description template, which should be extended to form a custom numeric description.

In its default state, 'plainNumeric' allows for hex-, oct-, and bin-ary numeric literals,
with the standard prefixes.
To change this, one should use record field copies.
-}
plainNumeric :: NumericDesc
plainNumeric :: NumericDesc
plainNumeric = NumericDesc { literalBreakChar :: BreakCharDesc
literalBreakChar = BreakCharDesc
NoBreakChar
                           , leadingDotAllowed :: Bool
leadingDotAllowed = Bool
False
                           , trailingDotAllowed :: Bool
trailingDotAllowed = Bool
False
                           , leadingZerosAllowed :: Bool
leadingZerosAllowed = Bool
True
                           , positiveSign :: PlusSignPresence
positiveSign = PlusSignPresence
PlusOptional
                           -- generic number
                           , integerNumbersCanBeHexadecimal :: Bool
integerNumbersCanBeHexadecimal = Bool
True
                           , integerNumbersCanBeOctal :: Bool
integerNumbersCanBeOctal = Bool
True
                           , integerNumbersCanBeBinary :: Bool
integerNumbersCanBeBinary = Bool
False
                           , realNumbersCanBeHexadecimal :: Bool
realNumbersCanBeHexadecimal = Bool
False
                           , realNumbersCanBeOctal :: Bool
realNumbersCanBeOctal = Bool
False
                           , realNumbersCanBeBinary :: Bool
realNumbersCanBeBinary = Bool
False
                           -- special literals
                           , hexadecimalLeads :: Set Char
hexadecimalLeads = [Char
Item (Set Char)
'x', Char
Item (Set Char)
'X']
                           , octalLeads :: Set Char
octalLeads = [Char
Item (Set Char)
'o', Char
Item (Set Char)
'O']
                           , binaryLeads :: Set Char
binaryLeads = [Char
Item (Set Char)
'b', Char
Item (Set Char)
'B']
                           -- exponents
                           , decimalExponentDesc :: ExponentDesc
decimalExponentDesc = ExponentsSupported { compulsory :: Bool
compulsory = Bool
False
                                                                      , chars :: Set Char
chars = [Char
Item (Set Char)
'e', Char
Item (Set Char)
'E']
                                                                      , base :: Int
base = Int
10
                                                                      , expSign :: PlusSignPresence
expSign = PlusSignPresence
PlusOptional
                                                                      , expLeadingZerosAllowd :: Bool
expLeadingZerosAllowd = Bool
True
                                                                      }
                           , hexadecimalExponentDesc :: ExponentDesc
hexadecimalExponentDesc = ExponentsSupported { compulsory :: Bool
compulsory = Bool
True
                                                                          , chars :: Set Char
chars = [Char
Item (Set Char)
'p', Char
Item (Set Char)
'P']
                                                                          , base :: Int
base = Int
2
                                                                          , expSign :: PlusSignPresence
expSign = PlusSignPresence
PlusOptional
                                                                          , expLeadingZerosAllowd :: Bool
expLeadingZerosAllowd = Bool
True
                                                                          }
                           , octalExponentDesc :: ExponentDesc
octalExponentDesc = ExponentsSupported { compulsory :: Bool
compulsory = Bool
True
                                                                    , chars :: Set Char
chars = [Char
Item (Set Char)
'e', Char
Item (Set Char)
'E', Char
Item (Set Char)
'p', Char
Item (Set Char)
'P']
                                                                    , base :: Int
base = Int
2
                                                                    , expSign :: PlusSignPresence
expSign = PlusSignPresence
PlusOptional
                                                                    , expLeadingZerosAllowd :: Bool
expLeadingZerosAllowd = Bool
True
                                                                    }
                           , binaryExponentDesc :: ExponentDesc
binaryExponentDesc = ExponentsSupported { compulsory :: Bool
compulsory = Bool
True
                                                                     , chars :: Set Char
chars = [Char
Item (Set Char)
'e', Char
Item (Set Char)
'E', Char
Item (Set Char)
'p', Char
Item (Set Char)
'P']
                                                                     , base :: Int
base = Int
2
                                                                     , expSign :: PlusSignPresence
expSign = PlusSignPresence
PlusOptional
                                                                     , expLeadingZerosAllowd :: Bool
expLeadingZerosAllowd = Bool
True
                                                                     }
                           }

{-|
Describe how scientific exponent notation can be used within real literals.

A common notation would be @1.6e3@ for @1.6 × 10³@, which the following @ExponentDesc@ describes:

@
{-# LANGUAGE OverloadedLists #-} -- Lets us write @[a]@ to generate a singleton 'Data.Set' containing @a@.
usualNotation :: ExponentDesc
usualNotation = ExponentsSupported
  { compulsory = False
  , chars = [\'e\']  -- The letter \'e\' separates the significand from the exponent
  , base  = 10   -- The base of the exponent is 10, so that @2.3e5@ means @2.3 × 10⁵@
  , expSign = PlusOptional -- A positive exponent does not need a plus sign, but can have one.
  , expLeadingZerosAllowd = True -- We allow leading zeros on exponents; so @1.2e005@ is valid.
  }
@

-}
type ExponentDesc :: *
data ExponentDesc
  = NoExponents         -- ^ The language does not allow exponent notation.
  | ExponentsSupported  -- ^ The language does allow exponent notation, according to the following fields:
    { ExponentDesc -> Bool
compulsory :: !Bool            -- ^ Is exponent notation required for real literals?
    , ExponentDesc -> Set Char
chars :: !(Set Char)           -- ^ The characters that separate the significand from the exponent
    , ExponentDesc -> Int
base :: !Int                   -- ^ The base of the exponent; this is usually base ten.
    , ExponentDesc -> PlusSignPresence
expSign :: !PlusSignPresence   -- ^ Is a plus (@+@) sign required for positive exponents?
    , ExponentDesc -> Bool
expLeadingZerosAllowd :: !Bool -- ^ Can the exponent contain leading zeros; for example is @3.2e005@ valid?
    }

{-|
Prescribes whether or not numeric literals can be broken up by a specific symbol.

For example, can one write @300.2_3@?
-}
type BreakCharDesc :: *
data BreakCharDesc
  = NoBreakChar                             -- ^ Literals cannot be broken.
  | BreakCharSupported                      -- ^ Literals can be broken.
    { BreakCharDesc -> Char
breakChar :: !Char                    -- ^ the character allowed to break a literal (often _).
    , BreakCharDesc -> Bool
allowedAfterNonDecimalPrefix :: !Bool -- ^ can non-decimals be broken; e.g. can one write, 0x_300?
    }

{-|
Whether or not a plus sign (@+@) can prefix a numeric literal.
-}
type PlusSignPresence :: *
data PlusSignPresence
  = PlusRequired -- ^ (@+@) must always precede a positive numeric literal
  | PlusOptional -- ^ (@+@) may precede a positive numeric literal, but is not necessary
  | PlusIllegal  -- ^ (@+@) cannot precede a numeric literal as a prefix (this is separate to allowing an infix binary @+@ operator).

{-|
  This type describes how to parse string and character literals.
-}
type TextDesc :: *
data TextDesc = TextDesc
  { TextDesc -> EscapeDesc
escapeSequences :: {-# UNPACK #-} !EscapeDesc -- ^ the description of escape sequences in literals.
  , TextDesc -> Char
characterLiteralEnd :: !Char -- ^ the character that starts and ends a character literal.
  , TextDesc -> Set (String, String)
stringEnds :: !(Set (String, String)) -- ^ the sequences that may begin and end a string literal.
  , TextDesc -> Set (String, String)
multiStringEnds :: !(Set (String, String)) -- ^ the sequences that may begin and end a multi-line string literal.
  , TextDesc -> CharPredicate
graphicCharacter :: !CharPredicate -- ^ the characters that can be written verbatim into a character or string literal.
  }

{-|
This is a blank text description template, which should be extended to form a custom text description.

In its default state, 'plainText' parses characters as symbols between @\'@ and @\'@, and strings between @"@ and @"@.
To change this, one should use record field copies, for example:

@

{-# LANGUAGE OverloadedLists #-} -- This lets us write @[a,b]@ to get a 'Data.Set' containing @a@ and @b@
                                 -- If you don't want to use this, just use @'Data.Set.fromList' [a,b]@
myPlainText:: TextDesc
myPlainText= plainText
  { characterLiteralEnd = a
  , stringEnds = [(b, c)]
  }

@

@myPlainText@ with then parse characters as a single character between @a@ and @a@, and a string as characters between @b@ and @c@.
-}
plainText :: TextDesc
plainText :: TextDesc
plainText = TextDesc { escapeSequences :: EscapeDesc
escapeSequences = EscapeDesc
plainEscape
                     , characterLiteralEnd :: Char
characterLiteralEnd = Char
'\''
                     , stringEnds :: Set (String, String)
stringEnds = [(String
"\"", String
"\"")]
                     , multiStringEnds :: Set (String, String)
multiStringEnds = []
                     , graphicCharacter :: CharPredicate
graphicCharacter = (Char -> Bool) -> CharPredicate
forall a. a -> Maybe a
Just (Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
' ')
                     }

{-|
Defines the escape characters, and their meaning.

This includes character escapes (e.g. tabs, carriage returns), and numeric escapes, such as binary (usually \"0b\") and hexadecimal, \"0x\".
-}
type EscapeDesc :: *
data EscapeDesc = EscapeDesc
  { EscapeDesc -> Char
escBegin :: !Char                   -- ^ the character that begins an escape sequence: this is usually @\\@.
  , EscapeDesc -> Set Char
literals :: !(Set Char)             -- ^ the characters that can be directly escaped, but still represent themselves, for instance \'"\', or \'\\\'.
  , EscapeDesc -> Map String Char
mapping :: !(Map String Char)       -- ^ the possible escape sequences that map to a character other than themselves and the (full UTF-16) character they map to, for instance "n" -> 0xa
  , EscapeDesc -> NumericEscape
decimalEscape :: !NumericEscape     -- ^ if allowed, the description of how numeric escape sequences work for base 10.
  , EscapeDesc -> NumericEscape
hexadecimalEscape :: !NumericEscape -- ^ if allowed, the description of how numeric escape sequences work for base 16
  , EscapeDesc -> NumericEscape
octalEscape :: !NumericEscape       -- ^ if allowed, the description of how numeric escape sequences work for base 8
  , EscapeDesc -> NumericEscape
binaryEscape :: !NumericEscape      -- ^ if allowed, the description of how numeric escape sequences work for base 2
  , EscapeDesc -> Maybe Char
emptyEscape :: !(Maybe Char)        -- ^ if one should exist, the character which has no effect on
                                        -- the string but can be used to disambiguate other escape sequences: in Haskell this would be \&
  , EscapeDesc -> Bool
gapsSupported :: !Bool              -- ^ specifies whether or not string gaps are supported:
                                        -- this is where whitespace can be injected between two escBegin characters and this will all be ignored in the final string,
                                        -- such that @"hello \ \world"@ is "hello world"
  }

{-|
This is a blank escape description template, which should be extended to form a custom escape description.

In its default state, 'plainEscape' the only escape symbol is a backslash, \"\\\\".
To change this, one should use record field copies, for example:

@

{-# LANGUAGE OverloadedLists #-} -- This lets us write @[a,b]@ to get a 'Data.Set' containing @a@ and @b@,
                                 -- and [(a,b),(c,d)] for a 'Data.Map' which sends @a ↦ b@ and @c ↦ d@
myPlainEscape:: EscapeDesc
myPlainEscape= plainEscape
  { literals = a
  , stringEnds = [(b, c)]
  , mapping = [("t",0x0009), ("r",0x000D)]
  , hexadecimalEscape = NumericSupported TODO
  }

@

@myPlainText@ with then parse characters as a single character between @a@ and @a@, and a string as characters between @b@ and @c@.
-}
plainEscape :: EscapeDesc
plainEscape :: EscapeDesc
plainEscape = EscapeDesc { escBegin :: Char
escBegin = Char
'\\'
                         , literals :: Set Char
literals = [Char
Item (Set Char)
'\\']
                         , mapping :: Map String Char
mapping = []
                         , decimalEscape :: NumericEscape
decimalEscape = NumericEscape
NumericIllegal
                         , hexadecimalEscape :: NumericEscape
hexadecimalEscape = NumericEscape
NumericIllegal
                         , octalEscape :: NumericEscape
octalEscape = NumericEscape
NumericIllegal
                         , binaryEscape :: NumericEscape
binaryEscape = NumericEscape
NumericIllegal
                         , emptyEscape :: Maybe Char
emptyEscape = Maybe Char
forall a. Maybe a
Nothing
                         , gapsSupported :: Bool
gapsSupported = Bool
False
                         }

-- TODO: haskellEscape

{-|
Describes how numeric escape sequences should work for a given base.
-}
type NumericEscape :: *
data NumericEscape
  = NumericIllegal    -- ^ Numeric literals are disallowed for this specific base.
  | NumericSupported  -- ^ Numeric literals are supported for this specific base.
    { NumericEscape -> Maybe Char
prefix :: !(Maybe Char)      -- ^ the character, if any, that is required to start the literal (like x for hexadecimal escapes in some languages).
    , NumericEscape -> NumberOfDigits
numDigits :: !NumberOfDigits -- ^ the number of digits required for this literal: this may be unbounded, an exact number, or up to a specific number.
    , NumericEscape -> Char
maxValue :: !Char            -- ^ the largest character value that can be expressed by this numeric escape.
    }

{-|
Describes how many digits a numeric escape sequence is allowed.
-}
type NumberOfDigits :: *
data NumberOfDigits
  = Unbounded -- ^ there is no limit on the number of digits that may appear in this sequence.
  | Exactly !(NonEmpty Word) -- ^ the number of digits in the literal must be one of the given values.
  | AtMost -- ^ there must be at most @n@ digits in the numeric escape literal, up to and including the value given.
    !Word  -- ^ the maximum (inclusive) number of digits allowed in the literal..

{-|
This type describes how whitespace and comments should be handled lexically.
-}
type SpaceDesc :: *
data SpaceDesc = SpaceDesc
  { SpaceDesc -> String
lineCommentStart :: !String           -- ^ how to start single-line comments (empty for no single-line comments).
  , SpaceDesc -> Bool
lineCommentAllowsEOF :: !Bool         -- ^ can a single-line comment be terminated by the end-of-file (@True@), or must it end with a newline (@False@)?
  , SpaceDesc -> String
multiLineCommentStart :: !String      -- ^ how to start multi-line comments (empty for no multi-line comments).
  , SpaceDesc -> String
multiLineCommentEnd :: !String        -- ^ how to end multi-line comments (empty for no multi-line comments).
  , SpaceDesc -> Bool
multiLineNestedComments :: !Bool      -- ^ @True@ when multi-line comments can be nested, @False@ otherwise.
  , SpaceDesc -> CharPredicate
space :: !CharPredicate               -- ^ the characters to be treated as whitespace
  , SpaceDesc -> Bool
whitespaceIsContextDependent :: !Bool -- ^ does the context change the definition of whitespace (@True@), or not (@False@)?
                                          --  (e.g. in Python, newlines are valid whitespace within parentheses, but are significant outside of them)
  }
{-|
This is a blank whitespace description template, which should be extended to form the desired whitespace descriptions.

In its default state, 'plainName' makes no comments possible, and the only whitespace characters are those
defined by 'GHC.Unicode.isSpace'
-}
plainSpace :: SpaceDesc
plainSpace :: SpaceDesc
plainSpace = SpaceDesc { lineCommentStart :: String
lineCommentStart = String
""
                       , lineCommentAllowsEOF :: Bool
lineCommentAllowsEOF = Bool
True
                       , multiLineCommentStart :: String
multiLineCommentStart = String
""
                       , multiLineCommentEnd :: String
multiLineCommentEnd = String
""
                       , multiLineNestedComments :: Bool
multiLineNestedComments = Bool
False
                       , space :: CharPredicate
space = (Char -> Bool) -> CharPredicate
forall a. a -> Maybe a
Just Char -> Bool
isSpace
                       , whitespaceIsContextDependent :: Bool
whitespaceIsContextDependent = Bool
False
                       }

{-|
An optional predicate on characters:
if @pred :: CharPredicate@ and @pred x = Just True@, then the lexer should accept the character @x@.

==== __Examples__
- A predicate that only accepts alphabetical or numbers:

  @
    isAlphaNumPred = Just . isAlphaNum
  @

- A predicate that only accepts capital letters:

  @
    isCapital = Just . isAsciiUpper
  @

-}
type CharPredicate :: *
type CharPredicate = Maybe (Char -> Bool)