summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJed Barber <jjbarber@y7mail.com>2019-01-10 15:27:56 +1100
committerJed Barber <jjbarber@y7mail.com>2019-01-10 15:27:56 +1100
commit67edc77677d366d4dc0ff42154c79a1d87c2dd29 (patch)
treed708a160d257fe26a910930df618e41acc6928d4
parentd13d3d981de2d178b7a27f2b6db78146c9d560ff (diff)
Packrat.Lexer.Combinators specification
-rw-r--r--packrat_parser_lib_notes.txt51
-rw-r--r--src/packrat-lexer-combinators.adb190
-rw-r--r--src/packrat-lexer-combinators.ads187
-rw-r--r--src/packrat-lexer.ads30
-rw-r--r--src/packrat.ads7
5 files changed, 460 insertions, 5 deletions
diff --git a/packrat_parser_lib_notes.txt b/packrat_parser_lib_notes.txt
index d9aca31..40b2820 100644
--- a/packrat_parser_lib_notes.txt
+++ b/packrat_parser_lib_notes.txt
@@ -8,9 +8,13 @@ Packrat.Parser.Combinators
Packrat.Lexer (generic over stamp enum, input item type, array of input items, array of output items wrapped as tokens)
Packrat.Lexer.Combinators
Packrat.Util
-Packrat.Error (nested)
+Packrat.Errors (nested)
Packrat.Graphs (nested, generic over leaf array type)
Packrat.Tokens (nested, generic over contained array)
+Packrat.Instant
+Packrat.Instant.Standard (nested, generic over parser/lexer label enums)
+Packrat.Instant.Wide (nested, generic over parser/lexer label enums)
+Packrat.Instant.Wide_Wide (nested, generic over parser/lexer label enums)
Ratnest.Tests
Ratnest.Examples
@@ -27,14 +31,17 @@ planned order of writing:
(Ratnest not mentioned since that's where all the testing functions will go)
Packrat.Util
-Packrat.Error
+Packrat.Errors
Packrat.Tokens
Packrat.Lexer
Packrat.Lexer.Combinators
Packrat.Graphs
Packrat.Parser
Packrat.Parser.Combinators
-Packrat (any remaining)
+Packrat.Instant
+Packrat.Instant.Standard
+Packrat.Instant.Wide
+Packrat.Instant.Wide_Wide
Calculator
Tomita
Off_Side
@@ -152,16 +159,53 @@ Packrat.Lexer
sized output array of tokens (and possibly a statically sized input array)
List of funcs:
+(each of these is generic over an array of lexer_component functions, either Stamp or Ignore as below)
Scan
+ - function that returns an array of lexed tokens
+ - uses first applicable lexer component to lex each token
+ - if all lexer components return "partial" then also returns with a partial status
+ - if all lexer components fail then raises a lexer_error
+ - lexer status can be fed back into any of these Scan functions to resume with further input
+ - if end of input is reached without any lexer components returning "partial" then returns
+ a complete status and feeding the lexer status back into these functions will be the same
+ as starting with a blank status
+Scan_Set
+ - as above, except is a procedure that uses a fixed size array as output with a padding token
Scan_Only
+ - function that returns an array of lexed tokens
+ - takes a lexer status as input to resuem a lex, but will treat it as a constant unlike the others
+ - if all lexer components return "partial" or fail then raises a lexer_error
+Scan_Set_Only
+ - as above, except is a procedure that uses a fixed size array as output with a padding token
Scan_With
+ - function that returns an array of lexed tokens
+ - when it runs out of input it uses the supplied function to get more input until that function
+ returns an empty array
+ - may also return with a partial status as with Scan
+Scan_Set_With
+ - as above, except is a procedure that uses a fixed size array as output with a padding token
+
+(type signature of these are:
+ input of an opaque lex_component_input type
+ output of an opaque lex_component_output type
+ return of a Fail/Partial/Success result enum)
Stamp
+ - one of two available lexer component functions
+ - generic over a value of the enum used for labelling lexemes and a lexer combinator
+Ignore
+ - the other available lexer component function
+ - generic over a lexer combinator, as this function will scan for the given token and then ignore it
Packrat.Lexer.Combinators
+type signature of these are:
+ inputs of input_array and a starting position index
+ outputs of the number of elements consumed and the value lexed
+ return of a Fail/Partial/Success result enum
+
List of funcs:
Sequence
Count
@@ -173,7 +217,6 @@ Satisfy_With
Match
Match_With
Multimatch
-Multimatch_With
Take
Take_While
Take_Until
diff --git a/src/packrat-lexer-combinators.adb b/src/packrat-lexer-combinators.adb
new file mode 100644
index 0000000..ef53bb1
--- /dev/null
+++ b/src/packrat-lexer-combinators.adb
@@ -0,0 +1,190 @@
+
+
+package body Packrat.Lexer.Combinators is
+
+
+ function Sequence
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result is
+ begin
+ return Failure;
+ end Sequence;
+
+
+ function Count
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result is
+ begin
+ return Failure;
+ end Count;
+
+
+ function Many
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result is
+ begin
+ return Failure;
+ end Many;
+
+
+ function Many_Until
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result is
+ begin
+ return Failure;
+ end Many_Until;
+
+
+
+
+
+ function Satisfy
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result is
+ begin
+ return Failure;
+ end Satisfy;
+
+
+ function Satisfy_With
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result is
+ begin
+ return Failure;
+ end Satisfy_With;
+
+
+ function Match
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result is
+ begin
+ return Failure;
+ end Match;
+
+
+ function Match_With
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result is
+ begin
+ return Failure;
+ end Match_With;
+
+
+ function Multimatch
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result is
+ begin
+ return Failure;
+ end Multimatch;
+
+
+ function Take
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result is
+ begin
+ return Failure;
+ end Take;
+
+
+ function Take_While
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result is
+ begin
+ return Failure;
+ end Take_While;
+
+
+ function Take_Until
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result is
+ begin
+ return Failure;
+ end Take_Until;
+
+
+
+
+
+ function Start_Of_Line
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result is
+ begin
+ return Failure;
+ end Start_Of_Line;
+
+
+ function End_Of_Line
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result is
+ begin
+ return Failure;
+ end End_Of_Line;
+
+
+ function Start_Of_Input
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result is
+ begin
+ return Failure;
+ end Start_Of_Input;
+
+
+ function End_Of_Input
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result is
+ begin
+ return Failure;
+ end End_Of_Input;
+
+
+end Packrat.Lexer.Combinators;
+
+
diff --git a/src/packrat-lexer-combinators.ads b/src/packrat-lexer-combinators.ads
new file mode 100644
index 0000000..764887c
--- /dev/null
+++ b/src/packrat-lexer-combinators.ads
@@ -0,0 +1,187 @@
+
+
+generic
+package Packrat.Lexer.Combinators is
+
+
+ generic
+ Params : in Combinator_Array;
+ function Sequence
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result;
+
+ generic
+ with function Param
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result;
+ Number : in Positive;
+ function Count
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result;
+
+ generic
+ with function Param
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result;
+ Minimum : in Natural := 0;
+ function Many
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result;
+
+ generic
+ with function Param
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result;
+ with function Test
+ (Item : in Element)
+ return Boolean;
+ Minimum : in Natural := 0;
+ function Many_Until
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result;
+
+
+
+
+ generic
+ with function Test
+ (Item : in Element)
+ return Boolean;
+ function Satisfy
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result;
+
+ generic
+ with function Test
+ (Item : in Element)
+ return Boolean;
+ with function Change
+ (From : in Element)
+ return Element;
+ function Satisfy_With
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result;
+
+ generic
+ Item : in Element;
+ function Match
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result;
+
+ generic
+ Item : in Element;
+ with function Change
+ (From : in Element)
+ return Element;
+ function Match_With
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result;
+
+ generic
+ Items : in Element_Array;
+ function Multimatch
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result;
+
+ generic
+ Number : in Positive := 1;
+ function Take
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result;
+
+ generic
+ with function Test
+ (Item : in Element)
+ return Boolean;
+ function Take_While
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result;
+
+ generic
+ with function Test
+ (Item : in Element)
+ return Boolean;
+ function Take_Until
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result;
+
+
+
+
+ function Start_Of_Line
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result;
+
+ function End_Of_Line
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result;
+
+ function Start_Of_Input
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result;
+
+ function End_Of_Input
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result;
+
+
+end Packrat.Lexer.Combinators;
+
+
diff --git a/src/packrat-lexer.ads b/src/packrat-lexer.ads
new file mode 100644
index 0000000..611a407
--- /dev/null
+++ b/src/packrat-lexer.ads
@@ -0,0 +1,30 @@
+
+
+generic
+
+ type Label_Enum is (<>);
+ type Element is private;
+ type Element_Array is array (Positive range <>) of Element;
+
+ with package Gen_Tokens is new Tokens (Label_Enum, Element, Element_Array);
+
+package Packrat.Lexer is
+
+
+ type Combinator is access function
+ (Input : in Element_Array;
+ Start : in Positive;
+ Length : out Natural;
+ Value : out Element_Array)
+ return Result;
+
+
+ type Combinator_Array is array (Positive range <>) of Combinator;
+
+
+private
+
+
+end Packrat.Lexer;
+
+
diff --git a/src/packrat.ads b/src/packrat.ads
index a17e499..467c463 100644
--- a/src/packrat.ads
+++ b/src/packrat.ads
@@ -9,7 +9,11 @@ with
package Packrat is
- Parse_Error : exception;
+ type Result is (Failure, Partial, Success);
+
+
+ Parser_Error : exception;
+ Lexer_Error : exception;
@@ -103,6 +107,7 @@ package Packrat is
type Token is new Ada.Finalization.Controlled with private;
+ type Token_Array is array (Positive range <>) of Token;
function Create