From 67edc77677d366d4dc0ff42154c79a1d87c2dd29 Mon Sep 17 00:00:00 2001 From: Jed Barber Date: Thu, 10 Jan 2019 15:27:56 +1100 Subject: Packrat.Lexer.Combinators specification --- packrat_parser_lib_notes.txt | 51 +++++++++- src/packrat-lexer-combinators.adb | 190 ++++++++++++++++++++++++++++++++++++++ src/packrat-lexer-combinators.ads | 187 +++++++++++++++++++++++++++++++++++++ src/packrat-lexer.ads | 30 ++++++ src/packrat.ads | 7 +- 5 files changed, 460 insertions(+), 5 deletions(-) create mode 100644 src/packrat-lexer-combinators.adb create mode 100644 src/packrat-lexer-combinators.ads create mode 100644 src/packrat-lexer.ads diff --git a/packrat_parser_lib_notes.txt b/packrat_parser_lib_notes.txt index d9aca31..40b2820 100644 --- a/packrat_parser_lib_notes.txt +++ b/packrat_parser_lib_notes.txt @@ -8,9 +8,13 @@ Packrat.Parser.Combinators Packrat.Lexer (generic over stamp enum, input item type, array of input items, array of output items wrapped as tokens) Packrat.Lexer.Combinators Packrat.Util -Packrat.Error (nested) +Packrat.Errors (nested) Packrat.Graphs (nested, generic over leaf array type) Packrat.Tokens (nested, generic over contained array) +Packrat.Instant +Packrat.Instant.Standard (nested, generic over parser/lexer label enums) +Packrat.Instant.Wide (nested, generic over parser/lexer label enums) +Packrat.Instant.Wide_Wide (nested, generic over parser/lexer label enums) Ratnest.Tests Ratnest.Examples @@ -27,14 +31,17 @@ planned order of writing: (Ratnest not mentioned since that's where all the testing functions will go) Packrat.Util -Packrat.Error +Packrat.Errors Packrat.Tokens Packrat.Lexer Packrat.Lexer.Combinators Packrat.Graphs Packrat.Parser Packrat.Parser.Combinators -Packrat (any remaining) +Packrat.Instant +Packrat.Instant.Standard +Packrat.Instant.Wide +Packrat.Instant.Wide_Wide Calculator Tomita Off_Side @@ -152,16 +159,53 @@ Packrat.Lexer sized output array of tokens (and possibly a statically sized input array) List of funcs: +(each of these is generic over an array of lexer_component functions, either Stamp or Ignore as below) Scan + - function that returns an array of lexed tokens + - uses first applicable lexer component to lex each token + - if all lexer components return "partial" then also returns with a partial status + - if all lexer components fail then raises a lexer_error + - lexer status can be fed back into any of these Scan functions to resume with further input + - if end of input is reached without any lexer components returning "partial" then returns + a complete status and feeding the lexer status back into these functions will be the same + as starting with a blank status +Scan_Set + - as above, except is a procedure that uses a fixed size array as output with a padding token Scan_Only + - function that returns an array of lexed tokens + - takes a lexer status as input to resuem a lex, but will treat it as a constant unlike the others + - if all lexer components return "partial" or fail then raises a lexer_error +Scan_Set_Only + - as above, except is a procedure that uses a fixed size array as output with a padding token Scan_With + - function that returns an array of lexed tokens + - when it runs out of input it uses the supplied function to get more input until that function + returns an empty array + - may also return with a partial status as with Scan +Scan_Set_With + - as above, except is a procedure that uses a fixed size array as output with a padding token + +(type signature of these are: + input of an opaque lex_component_input type + output of an opaque lex_component_output type + return of a Fail/Partial/Success result enum) Stamp + - one of two available lexer component functions + - generic over a value of the enum used for labelling lexemes and a lexer combinator +Ignore + - the other available lexer component function + - generic over a lexer combinator, as this function will scan for the given token and then ignore it Packrat.Lexer.Combinators +type signature of these are: + inputs of input_array and a starting position index + outputs of the number of elements consumed and the value lexed + return of a Fail/Partial/Success result enum + List of funcs: Sequence Count @@ -173,7 +217,6 @@ Satisfy_With Match Match_With Multimatch -Multimatch_With Take Take_While Take_Until diff --git a/src/packrat-lexer-combinators.adb b/src/packrat-lexer-combinators.adb new file mode 100644 index 0000000..ef53bb1 --- /dev/null +++ b/src/packrat-lexer-combinators.adb @@ -0,0 +1,190 @@ + + +package body Packrat.Lexer.Combinators is + + + function Sequence + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result is + begin + return Failure; + end Sequence; + + + function Count + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result is + begin + return Failure; + end Count; + + + function Many + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result is + begin + return Failure; + end Many; + + + function Many_Until + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result is + begin + return Failure; + end Many_Until; + + + + + + function Satisfy + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result is + begin + return Failure; + end Satisfy; + + + function Satisfy_With + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result is + begin + return Failure; + end Satisfy_With; + + + function Match + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result is + begin + return Failure; + end Match; + + + function Match_With + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result is + begin + return Failure; + end Match_With; + + + function Multimatch + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result is + begin + return Failure; + end Multimatch; + + + function Take + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result is + begin + return Failure; + end Take; + + + function Take_While + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result is + begin + return Failure; + end Take_While; + + + function Take_Until + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result is + begin + return Failure; + end Take_Until; + + + + + + function Start_Of_Line + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result is + begin + return Failure; + end Start_Of_Line; + + + function End_Of_Line + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result is + begin + return Failure; + end End_Of_Line; + + + function Start_Of_Input + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result is + begin + return Failure; + end Start_Of_Input; + + + function End_Of_Input + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result is + begin + return Failure; + end End_Of_Input; + + +end Packrat.Lexer.Combinators; + + diff --git a/src/packrat-lexer-combinators.ads b/src/packrat-lexer-combinators.ads new file mode 100644 index 0000000..764887c --- /dev/null +++ b/src/packrat-lexer-combinators.ads @@ -0,0 +1,187 @@ + + +generic +package Packrat.Lexer.Combinators is + + + generic + Params : in Combinator_Array; + function Sequence + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result; + + generic + with function Param + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result; + Number : in Positive; + function Count + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result; + + generic + with function Param + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result; + Minimum : in Natural := 0; + function Many + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result; + + generic + with function Param + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result; + with function Test + (Item : in Element) + return Boolean; + Minimum : in Natural := 0; + function Many_Until + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result; + + + + + generic + with function Test + (Item : in Element) + return Boolean; + function Satisfy + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result; + + generic + with function Test + (Item : in Element) + return Boolean; + with function Change + (From : in Element) + return Element; + function Satisfy_With + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result; + + generic + Item : in Element; + function Match + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result; + + generic + Item : in Element; + with function Change + (From : in Element) + return Element; + function Match_With + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result; + + generic + Items : in Element_Array; + function Multimatch + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result; + + generic + Number : in Positive := 1; + function Take + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result; + + generic + with function Test + (Item : in Element) + return Boolean; + function Take_While + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result; + + generic + with function Test + (Item : in Element) + return Boolean; + function Take_Until + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result; + + + + + function Start_Of_Line + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result; + + function End_Of_Line + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result; + + function Start_Of_Input + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result; + + function End_Of_Input + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result; + + +end Packrat.Lexer.Combinators; + + diff --git a/src/packrat-lexer.ads b/src/packrat-lexer.ads new file mode 100644 index 0000000..611a407 --- /dev/null +++ b/src/packrat-lexer.ads @@ -0,0 +1,30 @@ + + +generic + + type Label_Enum is (<>); + type Element is private; + type Element_Array is array (Positive range <>) of Element; + + with package Gen_Tokens is new Tokens (Label_Enum, Element, Element_Array); + +package Packrat.Lexer is + + + type Combinator is access function + (Input : in Element_Array; + Start : in Positive; + Length : out Natural; + Value : out Element_Array) + return Result; + + + type Combinator_Array is array (Positive range <>) of Combinator; + + +private + + +end Packrat.Lexer; + + diff --git a/src/packrat.ads b/src/packrat.ads index a17e499..467c463 100644 --- a/src/packrat.ads +++ b/src/packrat.ads @@ -9,7 +9,11 @@ with package Packrat is - Parse_Error : exception; + type Result is (Failure, Partial, Success); + + + Parser_Error : exception; + Lexer_Error : exception; @@ -103,6 +107,7 @@ package Packrat is type Token is new Ada.Finalization.Controlled with private; + type Token_Array is array (Positive range <>) of Token; function Create -- cgit