From 4e42761354c2de557c0166d46aabe9dcd9b77073 Mon Sep 17 00:00:00 2001 From: Jed Barber Date: Tue, 15 Jan 2019 19:34:37 +1100 Subject: Redesigned Lexer Scan functions slightly, added tests for them --- packrat_parser_lib_notes.txt | 5 +- src/packrat-lexer.adb | 31 ++-- src/packrat-lexer.ads | 41 +++--- test/ratnest-tests.adb | 327 +++++++++++++++++++++++++++++++++++++++++-- test/ratnest-tests.ads | 19 ++- 5 files changed, 361 insertions(+), 62 deletions(-) diff --git a/packrat_parser_lib_notes.txt b/packrat_parser_lib_notes.txt index e48ebe8..1ae9707 100644 --- a/packrat_parser_lib_notes.txt +++ b/packrat_parser_lib_notes.txt @@ -182,8 +182,6 @@ Scan_Only - function that returns an array of lexed tokens - takes a lexer status as input to resuem a lex, but will treat it as a constant unlike the others - if all lexer components return "partial" or fail then raises a lexer_error -Scan_Set_Only - - as above, except is a procedure that uses a fixed size array as output with a padding token Scan_With - function that returns an array of lexed tokens - when it runs out of input it uses the supplied function to get more input until that function @@ -192,6 +190,9 @@ Scan_With Scan_Set_With - as above, except is a procedure that uses a fixed size array as output with a padding token +(determined to be redundant) +Scan_Set_Only + (type signature of these are: input of an opaque lex_component_input type output of an opaque lex_component_output type diff --git a/src/packrat-lexer.adb b/src/packrat-lexer.adb index a563f69..7cbd527 100644 --- a/src/packrat-lexer.adb +++ b/src/packrat-lexer.adb @@ -164,15 +164,6 @@ package body Packrat.Lexer is end Scan; - procedure Scan_Set - (Input : in Element_Array; - Context : in out Lexer_Context; - Output : out Gen_Tokens.Token_Array) is - begin - null; - end Scan_Set; - - function Scan_Only (Input : in Element_Array; Context : in out Lexer_Context) @@ -184,17 +175,8 @@ package body Packrat.Lexer is end Scan_Only; - procedure Scan_Set_Only - (Input : in Element_Array; - Context : in out Lexer_Context; - Output : out Gen_Tokens.Token_Array) is - begin - null; - end Scan_Set_Only; - - function Scan_With - (Input : in Element_Array; + (Input : in Lexer_With_Input; Context : in out Lexer_Context) return Gen_Tokens.Token_Array is @@ -204,12 +186,21 @@ package body Packrat.Lexer is end Scan_With; - procedure Scan_Set_With + procedure Scan_Set (Input : in Element_Array; Context : in out Lexer_Context; Output : out Gen_Tokens.Token_Array) is begin null; + end Scan_Set; + + + procedure Scan_Set_With + (Input : in Lexer_With_Input; + Context : in out Lexer_Context; + Output : out Gen_Tokens.Token_Array) is + begin + null; end Scan_Set_With; diff --git a/src/packrat-lexer.ads b/src/packrat-lexer.ads index 1da9662..2d152bf 100644 --- a/src/packrat-lexer.ads +++ b/src/packrat-lexer.ads @@ -48,6 +48,12 @@ package Packrat.Lexer is + type Lexer_With_Input is access function + return Element_Array; + + + + generic Label : in Label_Enum; with function Combo @@ -59,6 +65,7 @@ package Packrat.Lexer is Context : in out Lexer_Context); generic + Label : in Label_Enum; with function Combo (Input : in Element_Array; Start : in Positive) @@ -79,43 +86,33 @@ package Packrat.Lexer is generic Components : in Component_Array; - Padding : in Gen_Tokens.Token; - procedure Scan_Set + function Scan_Only (Input : in Element_Array; - Context : in out Lexer_Context; - Output : out Gen_Tokens.Token_Array); + Context : in out Lexer_Context) + return Gen_Tokens.Token_Array; generic Components : in Component_Array; - function Scan_Only - (Input : in Element_Array; + function Scan_With + (Input : in Lexer_With_Input; Context : in out Lexer_Context) return Gen_Tokens.Token_Array; generic - Gomponents : in Component_Array; - Padding : in Gen_Tokens.Token; - procedure Scan_Set_Only + Components : in Component_Array; + Pad_In : in Element; + Pad_Out : in Gen_Tokens.Token; + procedure Scan_Set (Input : in Element_Array; Context : in out Lexer_Context; Output : out Gen_Tokens.Token_Array); generic Components : in Component_Array; - with function More - return Element_Array; - function Scan_With - (Input : in Element_Array; - Context : in out Lexer_Context) - return Gen_Tokens.Token_Array; - - generic - Components : in Component_Array; - Padding : in Gen_Tokens.Token; - with function More - return Element_Array; + Pad_In : in Element; + Pad_Out : in Gen_Tokens.Token; procedure Scan_Set_With - (Input : in Element_Array; + (Input : in Lexer_With_Input; Context : in out Lexer_Context; Output : out Gen_Tokens.Token_Array); diff --git a/test/ratnest-tests.adb b/test/ratnest-tests.adb index f9353f3..9a493da 100644 --- a/test/ratnest-tests.adb +++ b/test/ratnest-tests.adb @@ -4,6 +4,7 @@ with Ada.Characters.Latin_1, Ada.Strings.Maps, + Ada.Exceptions, Packrat.Lexer.Debug, Packrat.Util; @@ -13,6 +14,7 @@ package body Ratnest.Tests is package Latin renames Ada.Characters.Latin_1; package Strmaps renames Ada.Strings.Maps; + package Except renames Ada.Exceptions; package PE renames Packrat.Errors; package PU renames Packrat.Util; @@ -823,7 +825,7 @@ package body Ratnest.Tests is use type Packrat.Result_Status; function Match_Abc is new Slexy.Multimatch ("abc"); - procedure My_Ignore is new Slexy.Ignore (Match_Abc); + procedure My_Ignore is new Slexy.Ignore (Two, Match_Abc); Test_Str1 : String := "abcdefghi"; Test_Str2 : String := "ab"; @@ -859,67 +861,366 @@ package body Ratnest.Tests is - type Word_Enum is (Word); + type Word_Enum is (Blank, Word, Whitespace); package Word_Tokens is new Packrat.Tokens (Word_Enum, Character, String); package Swordy is new Packrat.Lexer (Word_Enum, Character, String, Word_Tokens); package Swolbug is new Swordy.Debug; + use type Word_Tokens.Token; + use type Word_Tokens.Token_Array; + function Satisfy_Letter is new Swordy.Satisfy (PU.Is_Letter); function Many_Letter is new Swordy.Many (Satisfy_Letter, 1); function Satisfy_Whitespace is new Swordy.Satisfy (PU.Is_Whitespace); function Many_Whitespace is new Swordy.Many (Satisfy_Whitespace, 1); procedure Stamp_Word is new Swordy.Stamp (Word, Many_Letter); - procedure Ignore_Whitespace is new Swordy.Ignore (Many_Whitespace); + procedure Ignore_Whitespace is new Swordy.Ignore (Whitespace, Many_Whitespace); function Scan_Check return Test_Result is + function My_Scan is new Swordy.Scan + ((Stamp_Word'Access, Ignore_Whitespace'Access)); + + Test_Str : String := "one fine day"; + Test_Context : Swordy.Lexer_Context := Swordy.Empty_Context; + + Intended_Result1 : Word_Tokens.Token_Array := + (1 => Word_Tokens.Create (Word, 1, 3, "one"), + 2 => Word_Tokens.Create (Word, 5, 8, "fine")); + Intended_Result2 : Word_Tokens.Token_Array := + (1 => Word_Tokens.Create (Word, 10, 12, "day")); + + Actual_Result1 : Word_Tokens.Token_Array := + My_Scan (Test_Str, Test_Context); + Actual_Result2 : Word_Tokens.Token_Array := + My_Scan ("", Test_Context); begin - return Fail; + if Actual_Result1 /= Intended_Result1 or Actual_Result2 /= Intended_Result2 then + return Fail; + end if; + return Pass; end Scan_Check; + function Scan_Only_Check + return Test_Result + is + function My_Scan is new Swordy.Scan_Only + ((Stamp_Word'Access, Ignore_Whitespace'Access)); + + Test_Str : String := "one fine day"; + Test_Context : Swordy.Lexer_Context := Swordy.Empty_Context; + + Intended_Result : Word_Tokens.Token_Array := + (1 => Word_Tokens.Create (Word, 1, 3, "one"), + 2 => Word_Tokens.Create (Word, 5, 8, "fine"), + 3 => Word_Tokens.Create (Word, 10, 12, "day")); + + Actual_Result : Word_Tokens.Token_Array := + My_Scan (Test_Str, Test_Context); + begin + if Actual_Result /= Intended_Result then + return Fail; + end if; + return Pass; + end Scan_Only_Check; + + + function Scan_With_Check + return Test_Result + is + Sentinel : Natural := 2; + function More_Input + return String is + begin + if Sentinel > 1 then + Sentinel := 1; + return "it will happen again "; + elsif Sentinel > 0 then + Sentinel := 0; + return " and again and again"; + else + return ""; + end if; + end More_Input; + + function My_Scan is new Swordy.Scan_With + ((Stamp_Word'Access, Ignore_Whitespace'Access)); + + Test_Context : Swordy.Lexer_Context := Swordy.Empty_Context; + + Intended_Result : Word_Tokens.Token_Array := + (1 => Word_Tokens.Create (Word, 1, 2, "it"), + 2 => Word_Tokens.Create (Word, 4, 7, "will"), + 3 => Word_Tokens.Create (Word, 9, 14, "happen"), + 4 => Word_Tokens.Create (Word, 16, 20, "again"), + 5 => Word_Tokens.Create (Word, 22, 24, "and"), + 6 => Word_Tokens.Create (Word, 26, 30, "again"), + 7 => Word_Tokens.Create (Word, 32, 34, "and"), + 8 => Word_Tokens.Create (Word, 36, 40, "again")); + + Actual_Result : Word_Tokens.Token_Array := + My_Scan (More_Input'Unrestricted_Access, Test_Context); + begin + if Actual_Result /= Intended_Result then + return Fail; + end if; + return Pass; + end Scan_With_Check; + + function Scan_Set_Check return Test_Result is + procedure My_Scan is new Swordy.Scan_Set + ((Stamp_Word'Access, Ignore_Whitespace'Access), + Latin.EOT, Word_Tokens.Create (Blank, 1, 0, "")); + + Test_Str1 : String (1 .. 10) := "one tw"; + Test_Str2 : String (1 .. 10) := "o three"; + Test_Str3 : String (1 .. 10) := Latin.EOT & " "; + Test_Context : Swordy.Lexer_Context := Swordy.Empty_Context; + + Intended_Result1 : Word_Tokens.Token_Array := + (1 => Word_Tokens.Create (Word, 1, 3, "one"), + 2 => Word_Tokens.Create (Blank, 1, 0, "")); + Intended_Result2 : Word_Tokens.Token_Array := + (1 => Word_Tokens.Create (Word, 9, 11, "two"), + 2 => Word_Tokens.Create (Blank, 1, 0, "")); + Intended_Result3 : Word_Tokens.Token_Array := + (1 => Word_Tokens.Create (Word, 16, 20, "three"), + 2 => Word_Tokens.Create (Blank, 1, 0, "")); + + Actual_Result : Word_Tokens.Token_Array (1 .. 2); begin - return Fail; + My_Scan (Test_Str1, Test_Context, Actual_Result); + if Actual_Result /= Intended_Result1 then + return Fail; + end if; + My_Scan (Test_Str2, Test_Context, Actual_Result); + if Actual_Result /= Intended_Result2 then + return Fail; + end if; + My_Scan (Test_Str3, Test_Context, Actual_Result); + if Actual_Result /= Intended_Result3 then + return Fail; + end if; + return Pass; end Scan_Set_Check; - function Scan_Only_Check + function Scan_Set_With_Check + return Test_Result + is + Sentinel : Natural := 2; + function More_Input + return String is + begin + if Sentinel > 1 then + Sentinel := 1; + return "it will happen again"; + elsif Sentinel > 0 then + Sentinel := 0; + return " and again and again"; + else + return ""; + end if; + end More_Input; + + procedure My_Scan is new Swordy.Scan_Set_With + ((Stamp_Word'Access, Ignore_Whitespace'Access), + Latin.EOT, Word_Tokens.Create (Blank, 1, 0, "")); + + Test_Context : Swordy.Lexer_Context := Swordy.Empty_Context; + + Intended_Result1 : Word_Tokens.Token_Array := + (1 => Word_Tokens.Create (Word, 1, 2, "it"), + 2 => Word_Tokens.Create (Word, 4, 7, "will"), + 3 => Word_Tokens.Create (Word, 9, 14, "happen"), + 4 => Word_Tokens.Create (Word, 16, 20, "again"), + 5 => Word_Tokens.Create (Word, 22, 24, "and")); + Intended_Result2 : Word_Tokens.Token_Array := + (1 => Word_Tokens.Create (Word, 26, 30, "again"), + 2 => Word_Tokens.Create (Word, 32, 34, "and"), + 3 => Word_Tokens.Create (Word, 36, 40, "again"), + 4 => Word_Tokens.Create (Blank, 1, 0, ""), + 5 => Word_Tokens.Create (Blank, 1, 0, "")); + + Actual_Result : Word_Tokens.Token_Array (1 .. 5); + begin + My_Scan (More_Input'Unrestricted_Access, Test_Context, Actual_Result); + if Actual_Result /= Intended_Result1 then + return Fail; + end if; + My_Scan (More_Input'Unrestricted_Access, Test_Context, Actual_Result); + if Actual_Result /= Intended_Result2 then + return Fail; + end if; + return Pass; + end Scan_Set_With_Check; + + + function Scan_Error_Check return Test_Result is + use type Packrat.Errors.Error_Info_Array; + + function My_Scan is new Swordy.Scan + ((Stamp_Word'Access, Ignore_Whitespace'Access)); + + Test_Str : String := "()()"; + Test_Context : Swordy.Lexer_Context := Swordy.Empty_Context; + + Result : Word_Tokens.Token_Array := + My_Scan (Test_Str, Test_Context); + + Expected_Errors : Packrat.Errors.Error_Info_Array := + ((+"WORD", 1), (+"WHITESPACE", 1)); begin return Fail; - end Scan_Only_Check; + exception + when Msg : Packrat.Lexer_Error => + if Packrat.Errors.Decode (Except.Exception_Message (Msg)) /= Expected_Errors then + return Fail; + end if; + return Pass; + end Scan_Error_Check; - function Scan_Set_Only_Check + function Scan_Only_Error_Check return Test_Result is + use type Packrat.Errors.Error_Info_Array; + + function My_Scan is new Swordy.Scan_Only + ((Stamp_Word'Access, Ignore_Whitespace'Access)); + + Test_Str : String := "()()"; + Test_Context : Swordy.Lexer_Context := Swordy.Empty_Context; + + Result : Word_Tokens.Token_Array := + My_Scan (Test_Str, Test_Context); + + Expected_Errors : Packrat.Errors.Error_Info_Array := + ((+"WORD", 1), (+"WHITESPACE", 1)); begin return Fail; - end Scan_Set_Only_Check; + exception + when Msg : Packrat.Lexer_Error => + if Packrat.Errors.Decode (Except.Exception_Message (Msg)) /= Expected_Errors then + return Fail; + end if; + return Pass; + end Scan_Only_Error_Check; - function Scan_With_Check + function Scan_With_Error_Check return Test_Result is + use type Packrat.Errors.Error_Info_Array; + + Sentinel : Integer := 1; + function Get_Input + return String is + begin + if Sentinel > 0 then + Sentinel := 0; + return "()()"; + else + return ""; + end if; + end Get_Input; + + function My_Scan is new Swordy.Scan_With + ((Stamp_Word'Access, Ignore_Whitespace'Access)); + + Test_Context : Swordy.Lexer_Context := Swordy.Empty_Context; + + Result : Word_Tokens.Token_Array := + My_Scan (Get_Input'Unrestricted_Access, Test_Context); + + Expected_Errors : Packrat.Errors.Error_Info_Array := + ((+"WORD", 1), (+"WHITESPACE", 1)); begin return Fail; - end Scan_With_Check; + exception + when Msg : Packrat.Lexer_Error => + if Packrat.Errors.Decode (Except.Exception_Message (Msg)) /= Expected_Errors then + return Fail; + end if; + return Pass; + end Scan_With_Error_Check; - function Scan_Set_With_Check + function Scan_Set_Error_Check return Test_Result is + use type Packrat.Errors.Error_Info_Array; + + procedure My_Scan is new Swordy.Scan_Set + ((Stamp_Word'Access, Ignore_Whitespace'Access), + Latin.EOT, Word_Tokens.Create (Blank, 1, 0, "")); + + Test_Str : String := "()()"; + Test_Context : Swordy.Lexer_Context := Swordy.Empty_Context; + + Result : Word_Tokens.Token_Array (1 .. 5); + + Expected_Errors : Packrat.Errors.Error_Info_Array := + ((+"WORD", 1), (+"WHITESPACE", 1)); begin + My_Scan (Test_Str, Test_Context, Result); return Fail; - end Scan_Set_With_Check; + exception + when Msg : Packrat.Lexer_Error => + if Packrat.Errors.Decode (Except.Exception_Message (Msg)) /= Expected_Errors then + return Fail; + end if; + return Pass; + end Scan_Set_Error_Check; + + + function Scan_Set_With_Error_Check + return Test_Result + is + use type Packrat.Errors.Error_Info_Array; + + Sentinel : Integer := 1; + function Get_Input + return String is + begin + if Sentinel > 0 then + Sentinel := 0; + return "()()"; + else + return ""; + end if; + end Get_Input; + + procedure My_Scan is new Swordy.Scan_Set_With + ((Stamp_Word'Access, Ignore_Whitespace'Access), + Latin.EOT, Word_Tokens.Create (Blank, 1, 0, "")); + + Test_Context : Swordy.Lexer_Context := Swordy.Empty_Context; + + Result : Word_Tokens.Token_Array (1 .. 5); + + Expected_Errors : Packrat.Errors.Error_Info_Array := + ((+"WORD", 1), (+"WHITESPACE", 1)); + begin + My_Scan (Get_Input'Unrestricted_Access, Test_Context, Result); + return Fail; + exception + when Msg : Packrat.Lexer_Error => + if Packrat.Errors.Decode (Except.Exception_Message (Msg)) /= Expected_Errors then + return Fail; + end if; + return Pass; + end Scan_Set_With_Error_Check; end Lexer; diff --git a/test/ratnest-tests.ads b/test/ratnest-tests.ads index f795340..e72bfe7 100644 --- a/test/ratnest-tests.ads +++ b/test/ratnest-tests.ads @@ -90,21 +90,30 @@ package Ratnest.Tests is function Ignore_Check return Test_Result; function Scan_Check return Test_Result; - function Scan_Set_Check return Test_Result; function Scan_Only_Check return Test_Result; - function Scan_Set_Only_Check return Test_Result; function Scan_With_Check return Test_Result; + function Scan_Set_Check return Test_Result; function Scan_Set_With_Check return Test_Result; + function Scan_Error_Check return Test_Result; + function Scan_Only_Error_Check return Test_Result; + function Scan_With_Error_Check return Test_Result; + function Scan_Set_Error_Check return Test_Result; + function Scan_Set_With_Error_Check return Test_Result; + Lexer_Tests : Test_Array := ((+"Stamp", Stamp_Check'Access), (+"Ignore", Ignore_Check'Access), (+"Scan", Scan_Check'Access), - (+"Scan_Set", Scan_Set_Check'Access), (+"Scan_Only", Scan_Only_Check'Access), - (+"Scan_Set_Only", Scan_Set_Only_Check'Access), (+"Scan_With", Scan_With_Check'Access), - (+"Scan_Set_With", Scan_Set_With_Check'Access)); + (+"Scan_Set", Scan_Set_Check'Access), + (+"Scan_Set_With", Scan_Set_With_Check'Access), + (+"Scan Exception", Scan_Error_Check'Access), + (+"Scan_Only Exception", Scan_Only_Error_Check'Access), + (+"Scan_With Exception", Scan_With_Error_Check'Access), + (+"Scan_Set Exception", Scan_Set_Error_Check'Access), + (+"Scan_Set_With Exception", Scan_Set_With_Error_Check'Access)); end Lexer; -- cgit