{-# LANGUAGE Safe #-} {-# LANGUAGE OverloadedLists #-} {-# OPTIONS_GHC -Wno-partial-fields #-} -- TODO: In next major, don't expose the constructors of the descriptions, -- we want them built up by record copy for forwards compatible evolution -- We can move this into an internal module to accommodate that if we want {-| Module : Text.Gigaparsec.Token.Descriptions Description : This module contains the descriptions of various lexical structures to configure the lexer. License : BSD-3-Clause Maintainer : Jamie Willis, Gigaparsec Maintainers Stability : experimental This module contains the descriptions of various lexical structures to configure the lexer. Many languages share common lexical tokens, such as numeric and string literals. Writing lexers turning these strings into tokens is effectively boilerplate. A __Description__ encodes how to lex one of these common tokens. Feeding a 'LexicalDesc' to a 'Text.Gigaparsec.Token.Lexer.Lexer' provides many combinators for dealing with these tokens. ==== Usage Rather than use the internal constructors, such as @NameDesc@, one should extend the \'@plain@\' definitions with record field updates. For example, @ myLexicalDesc = plain { nameDesc = myNameDesc , textDesc = myTextDesc } @ will produce a description that overrides the default name and text descriptions by those given. See 'plainName', 'plainSymbol', 'plainNumeric', 'plainText' and 'plainSpace' for further examples. @since 0.2.2.0 -} module Text.Gigaparsec.Token.Descriptions ( -- * Lexical Descriptions {-| A lexer is configured by extending the default 'plain' template, producing a 'LexicalDesc'. * 'LexicalDesc' * 'plain' -} -- ** Name Descriptions {-| A 'NameDesc' configures the lexing of name-like tokens, such as variable and function names. To create a 'NameDesc', use 'plainName', and configure it to your liking with record updates. * 'NameDesc' * 'identifierStart' * 'identifierLetter' * 'operatorStart' * 'operatorLetter' * 'plainName' -} -- ** Symbol Descriptions {-| A 'SymbolDesc' configures the lexing of \'symbols\' (textual literals), such as keywords and operators. To create a 'SymbolDesc', use 'plainSymbol' and configure it to your liking with record updates. * 'SymbolDesc' * 'hardKeywords' * 'hardOperators' * 'caseSensitive' * 'plainSymbol' -} -- ** Numeric Descriptions {-| A 'NumericDesc' configures the lexing of numeric literals, such as integer and floating point literals. To create a 'NumericDesc', use 'plainNumeric' and configure it to your liking with record updates. Also see 'ExponentDesc', 'BreakCharDesc', and 'PlusSignPresence', for further configuration options. * 'NumericDesc' * 'literalBreakChar' * 'leadingDotAllowed' * 'trailingDotAllowed' * 'leadingZerosAllowed' * 'positiveSign' * 'integerNumbersCanBeHexadecimal' * 'integerNumbersCanBeOctal' * 'integerNumbersCanBeBinary' * 'realNumbersCanBeHexadecimal' * 'realNumbersCanBeOctal' * 'realNumbersCanBeBinary' * 'hexadecimalLeads' * 'octalLeads' * 'binaryLeads' * 'decimalExponentDesc' * 'hexadecimalExponentDesc' * 'octalExponentDesc' * 'binaryExponentDesc' * 'plainNumeric' -} -- *** Exponent Descriptions {-| An 'ExponentDesc' configures scientific exponent notation. * 'ExponentDesc' * 'NoExponents' * 'ExponentsSupported' * 'compulsory' * 'chars' * 'base' * 'expSign' * 'expLeadingZerosAllowd' -} -- *** Break-Characters in Numeric Literals {-| Some languages allow a single numeric literal to be separated by a \'break\' symbol. * 'BreakCharDesc' * 'NoBreakChar' * 'BreakCharSupported' * 'breakChar' * 'allowedAfterNonDecimalPrefix' -} -- *** Numeric Literal Prefix Configuration {-| * 'PlusSignPresence' * 'PlusRequired' * 'PlusOptional' * 'PlusIllegal' -} -- ** Text Descriptions {-| A 'TextDesc' configures the lexing of string and character literals, as well as escaped numeric literals. To create a 'TextDesc', use 'plainText' and configure it to your liking with record updates. See 'EscapeDesc', 'NumericEscape' and 'NumberOfDigits' for further configuration of escape sequences and escaped numeric literals. * 'TextDesc' * 'escapeSequences' * 'characterLiteralEnd' * 'stringEnds' * 'multiStringEnds' * 'graphicCharacter' * 'plainText' -} -- *** Escape Character Descriptions {-| Configuration of escape sequences, such as tabs @\t@ and newlines @\n@, and escaped numbers, such as hexadecimals @0x...@ and binary @0b...@. * 'EscapeDesc' * 'escBegin' * 'literals' * 'mapping' * 'decimalEscape' * 'hexadecimalEscape' * 'octalEscape' * 'binaryEscape' * 'emptyEscape' * 'gapsSupported' * 'plainEscape' -} -- *** Numeric Escape Sequences {-| Configuration of escaped numeric literals. For example, hexadecimals, @0x...@. * 'NumericEscape' * 'NumericIllegal' * 'NumericSupported' * 'prefix' * 'numDigits' * 'maxValue' * 'NumberOfDigits' * 'Unbounded' * 'Exactly' * 'AtMost' -} -- ** Whitespace and Comment Descriptions {-| A 'SpaceDesc' configures the lexing whitespace and comments. To create a 'SpaceDesc', use 'plainSpace' and configure it to your liking with record updates. * 'SpaceDesc' * 'lineCommentStart' * 'lineCommentAllowsEOF' * 'multiLineCommentStart' * 'multiLineCommentEnd' * 'multiLineNestedComments' * 'space' * 'whitespaceIsContextDependent' * 'plainSpace' * 'CharPredicate' -} module Text.Gigaparsec.Token.Descriptions ) where import Data.Char (isSpace) import Data.Set (Set) import Data.Map (Map) import Data.List.NonEmpty (NonEmpty) {-| This type describes the aggregation of a bunch of different sub-configurations for lexing a specific language. See the 'plain' smart constructor to define a @LexicalDesc@. -} type LexicalDesc :: * data LexicalDesc = LexicalDesc { -- | the description of name-like lexemes LexicalDesc -> NameDesc nameDesc :: {-# UNPACK #-} !NameDesc -- | the description of specific symbolic lexemes , LexicalDesc -> SymbolDesc symbolDesc :: {-# UNPACK #-} !SymbolDesc -- | the description of numeric literals , LexicalDesc -> NumericDesc numericDesc :: {-# UNPACK #-} !NumericDesc -- | the description of text literals , LexicalDesc -> TextDesc textDesc :: {-# UNPACK #-} !TextDesc -- | the description of whitespace , LexicalDesc -> SpaceDesc spaceDesc :: {-# UNPACK #-} !SpaceDesc } {-| This lexical description contains the template @plain\<...\>@ descriptions defined in this module. See 'plainName', 'plainSymbol', 'plainNumeric', 'plainText' and 'plainSpace' for how this description configures the lexer. -} plain :: LexicalDesc plain :: LexicalDesc plain = LexicalDesc { nameDesc :: NameDesc nameDesc = NameDesc plainName , symbolDesc :: SymbolDesc symbolDesc = SymbolDesc plainSymbol , numericDesc :: NumericDesc numericDesc = NumericDesc plainNumeric , textDesc :: TextDesc textDesc = TextDesc plainText , spaceDesc :: SpaceDesc spaceDesc = SpaceDesc plainSpace } {-| This type describes how name-like things are described lexically. In particular, this defines which characters will constitute identifiers and operators. See the 'plainName' smart constructor for how to implement a custom name description. -} type NameDesc :: * data NameDesc = NameDesc { -- | the characters that start an identifier NameDesc -> CharPredicate identifierStart :: !CharPredicate -- | the characters that continue an identifier , NameDesc -> CharPredicate identifierLetter :: !CharPredicate -- | the characters that start a user-defined operator , NameDesc -> CharPredicate operatorStart :: !CharPredicate -- | the characters that continue a user-defined operator , NameDesc -> CharPredicate operatorLetter :: !CharPredicate } {-| This is a blank name description template, which should be extended to form a custom name description. In its default state, 'plainName' makes no characters able to be part of an identifier or operator. To change this, one should use record field copies, for example: @ myNameDesc :: NameDesc myNameDesc = plainName { identifierStart = myIdentifierStartPredicate , identifierLetter = myIdentifierLetterPredicate } @ @myNameDesc@ with then lex identifiers according to the given predicates. -} plainName :: NameDesc plainName :: NameDesc plainName = NameDesc { identifierStart :: CharPredicate identifierStart = CharPredicate forall a. Maybe a Nothing , identifierLetter :: CharPredicate identifierLetter = CharPredicate forall a. Maybe a Nothing , operatorStart :: CharPredicate operatorStart = CharPredicate forall a. Maybe a Nothing , operatorLetter :: CharPredicate operatorLetter = CharPredicate forall a. Maybe a Nothing } {-| This type describes how symbols (textual literals in a BNF) should be processed lexically, including keywords and operators. This includes keywords and (hard) operators that are reserved by the language. For example, in Haskell, "data" is a keyword, and "->" is a hard operator. See the 'plainSymbol' smart constructor for how to implement a custom name description. -} type SymbolDesc :: * data SymbolDesc = SymbolDesc { -- | what keywords are always treated as keywords within the language. SymbolDesc -> Set String hardKeywords :: !(Set String) -- | what operators are always treated as reserved operators within the language. , SymbolDesc -> Set String hardOperators :: !(Set String) -- | @True@ if the keywords are case sensitive, @False@ if not (so that e.g. @IF = if@). , SymbolDesc -> Bool caseSensitive :: !Bool } {-| This is a blank symbol description template, which should be extended to form a custom symbol description. In its default state, 'plainSymbol' has no keywords or reserved/hard operators. To change this, one should use record field copies, for example: @ {-# LANGUAGE OverloadedLists #-} -- This lets us write @[a,b]@ to get a 'Data.Set' containing @a@ and @b@ -- If you don't want to use this, just use @'Data.Set.fromList' [a,b]@ mySymbolDesc :: SymbolDesc mySymbolDesc = plainSymbol { hardKeywords = ["data", "where"] , hardOperators = ["->"] , caseSensitive = True } @ @mySymbolDesc@ with then treat @data@ and @where@ as keywords, and @->@ as a reserved operator. -} plainSymbol :: SymbolDesc plainSymbol :: SymbolDesc plainSymbol = SymbolDesc { hardKeywords :: Set String hardKeywords = [] , hardOperators :: Set String hardOperators = [] , caseSensitive :: Bool caseSensitive = Bool True } {-| This type describes how numeric literals (integers, decimals, hexadecimals, etc...), should be lexically processed. -} type NumericDesc :: * data NumericDesc = NumericDesc { -- | can breaks be found within numeric literals? (see 'BreakCharDesc') NumericDesc -> BreakCharDesc literalBreakChar :: !BreakCharDesc -- | can a real number omit a leading 0 before the point? , NumericDesc -> Bool leadingDotAllowed :: !Bool -- | can a real number omit a trailing 0 after the point? , NumericDesc -> Bool trailingDotAllowed :: !Bool -- | are extraneous zeros allowed at the start of decimal numbers? , NumericDesc -> Bool leadingZerosAllowed :: !Bool -- | describes if positive (+) signs are allowed, compulsory, or illegal. , NumericDesc -> PlusSignPresence positiveSign :: !PlusSignPresence -- generic number -- | can generic "integer numbers" to be hexadecimal? , NumericDesc -> Bool integerNumbersCanBeHexadecimal :: !Bool -- | can generic "integer numbers" to be octal? , NumericDesc -> Bool integerNumbersCanBeOctal :: !Bool -- | can generic "integer numbers" to be binary? , NumericDesc -> Bool integerNumbersCanBeBinary :: !Bool -- | can generic "real numbers" to be hexadecimal? , NumericDesc -> Bool realNumbersCanBeHexadecimal :: !Bool -- | can generic "real numbers" to be octal? , NumericDesc -> Bool realNumbersCanBeOctal :: !Bool -- | can generic "real numbers" to be binary? , NumericDesc -> Bool realNumbersCanBeBinary :: !Bool -- special literals -- | the characters that begin a hexadecimal literal following a 0 (may be empty). , NumericDesc -> Set Char hexadecimalLeads :: !(Set Char) -- | the characters that begin an octal literal following a 0 (may be empty). , NumericDesc -> Set Char octalLeads :: !(Set Char) -- | the characters that begin a binary literal following a 0 (may be empty). , NumericDesc -> Set Char binaryLeads :: !(Set Char) -- exponents -- | describes how scientific exponent notation should work for decimal literals. , NumericDesc -> ExponentDesc decimalExponentDesc :: !ExponentDesc -- | describes how scientific exponent notation should work for hexadecimal literals. , NumericDesc -> ExponentDesc hexadecimalExponentDesc :: !ExponentDesc -- | describes how scientific exponent notation should work for octal literals. , NumericDesc -> ExponentDesc octalExponentDesc :: !ExponentDesc -- | describes how scientific exponent notation should work for binary literals. , NumericDesc -> ExponentDesc binaryExponentDesc :: !ExponentDesc } {-| This is a blank numeric description template, which should be extended to form a custom numeric description. In its default state, 'plainNumeric' allows for hex-, oct-, and bin-ary numeric literals, with the standard prefixes. To change this, one should use record field copies. -} plainNumeric :: NumericDesc plainNumeric :: NumericDesc plainNumeric = NumericDesc { literalBreakChar :: BreakCharDesc literalBreakChar = BreakCharDesc NoBreakChar , leadingDotAllowed :: Bool leadingDotAllowed = Bool False , trailingDotAllowed :: Bool trailingDotAllowed = Bool False , leadingZerosAllowed :: Bool leadingZerosAllowed = Bool True , positiveSign :: PlusSignPresence positiveSign = PlusSignPresence PlusOptional -- generic number , integerNumbersCanBeHexadecimal :: Bool integerNumbersCanBeHexadecimal = Bool True , integerNumbersCanBeOctal :: Bool integerNumbersCanBeOctal = Bool True , integerNumbersCanBeBinary :: Bool integerNumbersCanBeBinary = Bool False , realNumbersCanBeHexadecimal :: Bool realNumbersCanBeHexadecimal = Bool False , realNumbersCanBeOctal :: Bool realNumbersCanBeOctal = Bool False , realNumbersCanBeBinary :: Bool realNumbersCanBeBinary = Bool False -- special literals , hexadecimalLeads :: Set Char hexadecimalLeads = [Char Item (Set Char) 'x', Char Item (Set Char) 'X'] , octalLeads :: Set Char octalLeads = [Char Item (Set Char) 'o', Char Item (Set Char) 'O'] , binaryLeads :: Set Char binaryLeads = [Char Item (Set Char) 'b', Char Item (Set Char) 'B'] -- exponents , decimalExponentDesc :: ExponentDesc decimalExponentDesc = ExponentsSupported { compulsory :: Bool compulsory = Bool False , chars :: Set Char chars = [Char Item (Set Char) 'e', Char Item (Set Char) 'E'] , base :: Int base = Int 10 , expSign :: PlusSignPresence expSign = PlusSignPresence PlusOptional , expLeadingZerosAllowd :: Bool expLeadingZerosAllowd = Bool True } , hexadecimalExponentDesc :: ExponentDesc hexadecimalExponentDesc = ExponentsSupported { compulsory :: Bool compulsory = Bool True , chars :: Set Char chars = [Char Item (Set Char) 'p', Char Item (Set Char) 'P'] , base :: Int base = Int 2 , expSign :: PlusSignPresence expSign = PlusSignPresence PlusOptional , expLeadingZerosAllowd :: Bool expLeadingZerosAllowd = Bool True } , octalExponentDesc :: ExponentDesc octalExponentDesc = ExponentsSupported { compulsory :: Bool compulsory = Bool True , chars :: Set Char chars = [Char Item (Set Char) 'e', Char Item (Set Char) 'E', Char Item (Set Char) 'p', Char Item (Set Char) 'P'] , base :: Int base = Int 2 , expSign :: PlusSignPresence expSign = PlusSignPresence PlusOptional , expLeadingZerosAllowd :: Bool expLeadingZerosAllowd = Bool True } , binaryExponentDesc :: ExponentDesc binaryExponentDesc = ExponentsSupported { compulsory :: Bool compulsory = Bool True , chars :: Set Char chars = [Char Item (Set Char) 'e', Char Item (Set Char) 'E', Char Item (Set Char) 'p', Char Item (Set Char) 'P'] , base :: Int base = Int 2 , expSign :: PlusSignPresence expSign = PlusSignPresence PlusOptional , expLeadingZerosAllowd :: Bool expLeadingZerosAllowd = Bool True } } {-| Describe how scientific exponent notation can be used within real literals. A common notation would be @1.6e3@ for @1.6 × 10³@, which the following @ExponentDesc@ describes: @ {-# LANGUAGE OverloadedLists #-} -- Lets us write @[a]@ to generate a singleton 'Data.Set' containing @a@. usualNotation :: ExponentDesc usualNotation = ExponentsSupported { compulsory = False , chars = [\'e\'] -- The letter \'e\' separates the significand from the exponent , base = 10 -- The base of the exponent is 10, so that @2.3e5@ means @2.3 × 10⁵@ , expSign = PlusOptional -- A positive exponent does not need a plus sign, but can have one. , expLeadingZerosAllowd = True -- We allow leading zeros on exponents; so @1.2e005@ is valid. } @ -} type ExponentDesc :: * data ExponentDesc = NoExponents -- ^ The language does not allow exponent notation. | ExponentsSupported -- ^ The language does allow exponent notation, according to the following fields: { ExponentDesc -> Bool compulsory :: !Bool -- ^ Is exponent notation required for real literals? , ExponentDesc -> Set Char chars :: !(Set Char) -- ^ The characters that separate the significand from the exponent , ExponentDesc -> Int base :: !Int -- ^ The base of the exponent; this is usually base ten. , ExponentDesc -> PlusSignPresence expSign :: !PlusSignPresence -- ^ Is a plus (@+@) sign required for positive exponents? , ExponentDesc -> Bool expLeadingZerosAllowd :: !Bool -- ^ Can the exponent contain leading zeros; for example is @3.2e005@ valid? } {-| Prescribes whether or not numeric literals can be broken up by a specific symbol. For example, can one write @300.2_3@? -} type BreakCharDesc :: * data BreakCharDesc = NoBreakChar -- ^ Literals cannot be broken. | BreakCharSupported -- ^ Literals can be broken. { BreakCharDesc -> Char breakChar :: !Char -- ^ the character allowed to break a literal (often _). , BreakCharDesc -> Bool allowedAfterNonDecimalPrefix :: !Bool -- ^ can non-decimals be broken; e.g. can one write, 0x_300? } {-| Whether or not a plus sign (@+@) can prefix a numeric literal. -} type PlusSignPresence :: * data PlusSignPresence = PlusRequired -- ^ (@+@) must always precede a positive numeric literal | PlusOptional -- ^ (@+@) may precede a positive numeric literal, but is not necessary | PlusIllegal -- ^ (@+@) cannot precede a numeric literal as a prefix (this is separate to allowing an infix binary @+@ operator). {-| This type describes how to parse string and character literals. -} type TextDesc :: * data TextDesc = TextDesc { TextDesc -> EscapeDesc escapeSequences :: {-# UNPACK #-} !EscapeDesc -- ^ the description of escape sequences in literals. , TextDesc -> Char characterLiteralEnd :: !Char -- ^ the character that starts and ends a character literal. , TextDesc -> Set (String, String) stringEnds :: !(Set (String, String)) -- ^ the sequences that may begin and end a string literal. , TextDesc -> Set (String, String) multiStringEnds :: !(Set (String, String)) -- ^ the sequences that may begin and end a multi-line string literal. , TextDesc -> CharPredicate graphicCharacter :: !CharPredicate -- ^ the characters that can be written verbatim into a character or string literal. } {-| This is a blank text description template, which should be extended to form a custom text description. In its default state, 'plainText' parses characters as symbols between @\'@ and @\'@, and strings between @"@ and @"@. To change this, one should use record field copies, for example: @ {-# LANGUAGE OverloadedLists #-} -- This lets us write @[a,b]@ to get a 'Data.Set' containing @a@ and @b@ -- If you don't want to use this, just use @'Data.Set.fromList' [a,b]@ myPlainText:: TextDesc myPlainText= plainText { characterLiteralEnd = a , stringEnds = [(b, c)] } @ @myPlainText@ with then parse characters as a single character between @a@ and @a@, and a string as characters between @b@ and @c@. -} plainText :: TextDesc plainText :: TextDesc plainText = TextDesc { escapeSequences :: EscapeDesc escapeSequences = EscapeDesc plainEscape , characterLiteralEnd :: Char characterLiteralEnd = Char '\'' , stringEnds :: Set (String, String) stringEnds = [(String "\"", String "\"")] , multiStringEnds :: Set (String, String) multiStringEnds = [] , graphicCharacter :: CharPredicate graphicCharacter = (Char -> Bool) -> CharPredicate forall a. a -> Maybe a Just (Char -> Char -> Bool forall a. Ord a => a -> a -> Bool >= Char ' ') } {-| Defines the escape characters, and their meaning. This includes character escapes (e.g. tabs, carriage returns), and numeric escapes, such as binary (usually \"0b\") and hexadecimal, \"0x\". -} type EscapeDesc :: * data EscapeDesc = EscapeDesc { EscapeDesc -> Char escBegin :: !Char -- ^ the character that begins an escape sequence: this is usually @\\@. , EscapeDesc -> Set Char literals :: !(Set Char) -- ^ the characters that can be directly escaped, but still represent themselves, for instance \'"\', or \'\\\'. , EscapeDesc -> Map String Char mapping :: !(Map String Char) -- ^ the possible escape sequences that map to a character other than themselves and the (full UTF-16) character they map to, for instance "n" -> 0xa , EscapeDesc -> NumericEscape decimalEscape :: !NumericEscape -- ^ if allowed, the description of how numeric escape sequences work for base 10. , EscapeDesc -> NumericEscape hexadecimalEscape :: !NumericEscape -- ^ if allowed, the description of how numeric escape sequences work for base 16 , EscapeDesc -> NumericEscape octalEscape :: !NumericEscape -- ^ if allowed, the description of how numeric escape sequences work for base 8 , EscapeDesc -> NumericEscape binaryEscape :: !NumericEscape -- ^ if allowed, the description of how numeric escape sequences work for base 2 , EscapeDesc -> Maybe Char emptyEscape :: !(Maybe Char) -- ^ if one should exist, the character which has no effect on -- the string but can be used to disambiguate other escape sequences: in Haskell this would be \& , EscapeDesc -> Bool gapsSupported :: !Bool -- ^ specifies whether or not string gaps are supported: -- this is where whitespace can be injected between two escBegin characters and this will all be ignored in the final string, -- such that @"hello \ \world"@ is "hello world" } {-| This is a blank escape description template, which should be extended to form a custom escape description. In its default state, 'plainEscape' the only escape symbol is a backslash, \"\\\\". To change this, one should use record field copies, for example: @ {-# LANGUAGE OverloadedLists #-} -- This lets us write @[a,b]@ to get a 'Data.Set' containing @a@ and @b@, -- and [(a,b),(c,d)] for a 'Data.Map' which sends @a ↦ b@ and @c ↦ d@ myPlainEscape:: EscapeDesc myPlainEscape= plainEscape { literals = a , stringEnds = [(b, c)] , mapping = [("t",0x0009), ("r",0x000D)] , hexadecimalEscape = NumericSupported TODO } @ @myPlainText@ with then parse characters as a single character between @a@ and @a@, and a string as characters between @b@ and @c@. -} plainEscape :: EscapeDesc plainEscape :: EscapeDesc plainEscape = EscapeDesc { escBegin :: Char escBegin = Char '\\' , literals :: Set Char literals = [Char Item (Set Char) '\\'] , mapping :: Map String Char mapping = [] , decimalEscape :: NumericEscape decimalEscape = NumericEscape NumericIllegal , hexadecimalEscape :: NumericEscape hexadecimalEscape = NumericEscape NumericIllegal , octalEscape :: NumericEscape octalEscape = NumericEscape NumericIllegal , binaryEscape :: NumericEscape binaryEscape = NumericEscape NumericIllegal , emptyEscape :: Maybe Char emptyEscape = Maybe Char forall a. Maybe a Nothing , gapsSupported :: Bool gapsSupported = Bool False } -- TODO: haskellEscape {-| Describes how numeric escape sequences should work for a given base. -} type NumericEscape :: * data NumericEscape = NumericIllegal -- ^ Numeric literals are disallowed for this specific base. | NumericSupported -- ^ Numeric literals are supported for this specific base. { NumericEscape -> Maybe Char prefix :: !(Maybe Char) -- ^ the character, if any, that is required to start the literal (like x for hexadecimal escapes in some languages). , NumericEscape -> NumberOfDigits numDigits :: !NumberOfDigits -- ^ the number of digits required for this literal: this may be unbounded, an exact number, or up to a specific number. , NumericEscape -> Char maxValue :: !Char -- ^ the largest character value that can be expressed by this numeric escape. } {-| Describes how many digits a numeric escape sequence is allowed. -} type NumberOfDigits :: * data NumberOfDigits = Unbounded -- ^ there is no limit on the number of digits that may appear in this sequence. | Exactly !(NonEmpty Word) -- ^ the number of digits in the literal must be one of the given values. | AtMost -- ^ there must be at most @n@ digits in the numeric escape literal, up to and including the value given. !Word -- ^ the maximum (inclusive) number of digits allowed in the literal.. {-| This type describes how whitespace and comments should be handled lexically. -} type SpaceDesc :: * data SpaceDesc = SpaceDesc { SpaceDesc -> String lineCommentStart :: !String -- ^ how to start single-line comments (empty for no single-line comments). , SpaceDesc -> Bool lineCommentAllowsEOF :: !Bool -- ^ can a single-line comment be terminated by the end-of-file (@True@), or must it end with a newline (@False@)? , SpaceDesc -> String multiLineCommentStart :: !String -- ^ how to start multi-line comments (empty for no multi-line comments). , SpaceDesc -> String multiLineCommentEnd :: !String -- ^ how to end multi-line comments (empty for no multi-line comments). , SpaceDesc -> Bool multiLineNestedComments :: !Bool -- ^ @True@ when multi-line comments can be nested, @False@ otherwise. , SpaceDesc -> CharPredicate space :: !CharPredicate -- ^ the characters to be treated as whitespace , SpaceDesc -> Bool whitespaceIsContextDependent :: !Bool -- ^ does the context change the definition of whitespace (@True@), or not (@False@)? -- (e.g. in Python, newlines are valid whitespace within parentheses, but are significant outside of them) } {-| This is a blank whitespace description template, which should be extended to form the desired whitespace descriptions. In its default state, 'plainName' makes no comments possible, and the only whitespace characters are those defined by 'GHC.Unicode.isSpace' -} plainSpace :: SpaceDesc plainSpace :: SpaceDesc plainSpace = SpaceDesc { lineCommentStart :: String lineCommentStart = String "" , lineCommentAllowsEOF :: Bool lineCommentAllowsEOF = Bool True , multiLineCommentStart :: String multiLineCommentStart = String "" , multiLineCommentEnd :: String multiLineCommentEnd = String "" , multiLineNestedComments :: Bool multiLineNestedComments = Bool False , space :: CharPredicate space = (Char -> Bool) -> CharPredicate forall a. a -> Maybe a Just Char -> Bool isSpace , whitespaceIsContextDependent :: Bool whitespaceIsContextDependent = Bool False } {-| An optional predicate on characters: if @pred :: CharPredicate@ and @pred x = Just True@, then the lexer should accept the character @x@. ==== __Examples__ - A predicate that only accepts alphabetical or numbers: @ isAlphaNumPred = Just . isAlphaNum @ - A predicate that only accepts capital letters: @ isCapital = Just . isAsciiUpper @ -} type CharPredicate :: * type CharPredicate = Maybe (Char -> Bool)