From 8828e68cb86c865d625961c07c7ce2eb4ae191bc Mon Sep 17 00:00:00 2001 From: Jed Barber Date: Sat, 7 Nov 2020 01:21:54 +1100 Subject: Parse_Graphs complete aside from isomorphism and testing --- src/packrat-lexer.ads | 38 +- src/packrat-parse_graphs.adb | 1062 +++++++++++++++++++++++++++++++++++++----- src/packrat-parse_graphs.ads | 770 ++++++++++++++---------------- src/packrat-tokens.adb | 39 ++ src/packrat.ads | 12 +- 5 files changed, 1363 insertions(+), 558 deletions(-) (limited to 'src') diff --git a/src/packrat-lexer.ads b/src/packrat-lexer.ads index a797d7f..9499d50 100644 --- a/src/packrat-lexer.ads +++ b/src/packrat-lexer.ads @@ -9,10 +9,12 @@ private with generic type Label_Enum is (<>); - type Element is private; - type Element_Array is array (Positive range <>) of Element; + type Element_Type is private; + type Element_Array is array (Positive range <>) of Element_Type; - with package Gen_Tokens is new Tokens (Label_Enum, Element, Element_Array); + with function "<" (Left, Right : in Element_Type) return Boolean is <>; + + with package Gen_Tokens is new Tokens (Label_Enum, Element_Type, Element_Array); package Packrat.Lexer is @@ -102,7 +104,7 @@ package Packrat.Lexer is generic Components : in Component_Array; - Pad_In : in Element; + Pad_In : in Element_Type; Pad_Out : in Gen_Tokens.Token; procedure Scan_Set (Input : in Element_Array; @@ -111,7 +113,7 @@ package Packrat.Lexer is generic Components : in Component_Array; - Pad_In : in Element; + Pad_In : in Element_Type; Pad_Out : in Gen_Tokens.Token; procedure Scan_Set_With (Input : in With_Input; @@ -156,7 +158,7 @@ package Packrat.Lexer is Start : in Positive) return Combinator_Result; with function Test - (Item : in Element) + (Item : in Element_Type) return Boolean; Minimum : in Natural := 0; function Many_Until @@ -169,7 +171,7 @@ package Packrat.Lexer is generic with function Test - (Item : in Element) + (Item : in Element_Type) return Boolean; function Satisfy (Input : in Element_Array; @@ -178,28 +180,28 @@ package Packrat.Lexer is generic with function Test - (Item : in Element) + (Item : in Element_Type) return Boolean; with function Change - (From : in Element) - return Element; + (From : in Element_Type) + return Element_Type; function Satisfy_With (Input : in Element_Array; Start : in Positive) return Combinator_Result; generic - Item : in Element; + Item : in Element_Type; function Match (Input : in Element_Array; Start : in Positive) return Combinator_Result; generic - Item : in Element; + Item : in Element_Type; with function Change - (From : in Element) - return Element; + (From : in Element_Type) + return Element_Type; function Match_With (Input : in Element_Array; Start : in Positive) @@ -221,7 +223,7 @@ package Packrat.Lexer is generic with function Test - (Item : in Element) + (Item : in Element_Type) return Boolean; function Take_While (Input : in Element_Array; @@ -230,7 +232,7 @@ package Packrat.Lexer is generic with function Test - (Item : in Element) + (Item : in Element_Type) return Boolean; function Take_Until (Input : in Element_Array; @@ -241,14 +243,14 @@ package Packrat.Lexer is generic - EOL_Item : in Element; + EOL_Item : in Element_Type; function Line_End (Input : in Element_Array; Start : in Positive) return Combinator_Result; generic - EOF_Item : in Element; + EOF_Item : in Element_Type; function Input_End (Input : in Element_Array; Start : in Positive) diff --git a/src/packrat-parse_graphs.adb b/src/packrat-parse_graphs.adb index c911fb4..c9171c6 100644 --- a/src/packrat-parse_graphs.adb +++ b/src/packrat-parse_graphs.adb @@ -1,20 +1,17 @@ -package body Packrat.Parse_Graphs is +with + -- Ada.Strings.Fixed, + Ada.Characters.Latin_1; - function "<" - (Left, Right : in Edge_Label_Type) - return Boolean is - begin - if Left.Finish = Right.Finish then - return Left.Order < Right.Order; - else - return Left.Finish < Right.Finish; - end if; - end "<"; + +package body Packrat.Parse_Graphs is + package SU renames Ada.Strings.Unbounded; + package Latin renames Ada.Characters.Latin_1; + @@ -23,47 +20,90 @@ package body Packrat.Parse_Graphs is return Boolean is use type Base.Graph; + use type Finish_Vectors.Vector; + use type Node_Label_Maps.Map; begin - return Base.Graph (Left) = Base.Graph (Right) and - Left.Root_Node = Right.Root_Node; + return Left.Internal_Graph = Right.Internal_Graph and + Left.Root_Node = Right.Root_Node and + Left.Root_Finishes = Right.Root_Finishes and + Left.Label_Map = Right.Label_Map; end "="; + function "<" + (Left, Right : Finished_Token) + return Boolean + is + Left_Index, Right_Index : Positive; + begin + if Gen_Tokens.Start (Left.Token) = Gen_Tokens.Start (Right.Token) then + if Left.Finish = Right.Finish then + if Gen_Tokens.Label (Left.Token) = Gen_Tokens.Label (Right.Token) then + Left_Index := Gen_Tokens.Value (Left.Token)'First; + Right_Index := Gen_Tokens.Value (Right.Token)'First; + while Left_Index <= Gen_Tokens.Value (Left.Token)'Last and + Right_Index <= Gen_Tokens.Value (Right.Token)'Last + loop + if Gen_Tokens.Value (Left.Token) (Left_Index) < + Gen_Tokens.Value (Right.Token) (Right_Index) + then + return True; + elsif Gen_Tokens.Value (Left.Token) (Left_Index) /= + Gen_Tokens.Value (Right.Token) (Right_Index) + then + return False; + end if; + Left_Index := Left_Index + 1; + Right_Index := Right_Index + 1; + end loop; + return Gen_Tokens.Value (Left.Token)'Length < + Gen_Tokens.Value (Right.Token)'Length; + else + return Gen_Tokens.Label (Left.Token) < Gen_Tokens.Label (Right.Token); + end if; + else + return Left.Finish < Right.Finish; + end if; + else + return Gen_Tokens.Start (Left.Token) < Gen_Tokens.Start (Right.Token); + end if; + end "<"; - - function To_Graph - (Nodes : in Node_Array; - Edges : in Edge_Array) - return Parse_Graph is + function "<" + (Left, Right : in Finished_Token_Array) + return Boolean + is + Left_Index : Positive := Left'First; + Right_Index : Positive := Right'First; begin - return G : Parse_Graph := - (Base.To_Graph (Nodes, Edges) with Root_Node => No_Node); - end To_Graph; + while Left_Index <= Left'Last and Right_Index <= Right'Last loop + if Left (Left_Index) < Right (Right_Index) then + return True; + elsif Left (Left_Index) /= Right (Right_Index) then + return False; + end if; + Left_Index := Left_Index + 1; + Right_Index := Right_Index + 1; + end loop; + return Left'Length < Right'Length; + end "<"; - function To_Graph - (Nodes : in Node_Array; - Edges : in Edge_Array; - Root : in Extended_Node_ID_Type) - return Parse_Graph - is - Valid : Boolean := False; + function "<" + (Left, Right : in Token_Group) + return Boolean is begin - if Root /= No_Node then - for N of Nodes loop - if Root = N then - Valid := True; - exit; - end if; - end loop; - if not Valid then - raise Constraint_Error with "Root node not in graph"; + if Gen_Tokens.Start (Left.Parent.Token) = Gen_Tokens.Start (Right.Parent.Token) then + if Finish (Left) = Finish (Right) then + return Left.Elems.Element < Right.Elems.Element; + else + return Finish (Left) < Finish (Right); end if; + else + return Gen_Tokens.Start (Left.Parent.Token) < Gen_Tokens.Start (Right.Parent.Token); end if; - return G : Parse_Graph := - (Base.To_Graph (Nodes, Edges) with Root_Node => Root); - end To_Graph; + end "<"; @@ -73,8 +113,10 @@ package body Packrat.Parse_Graphs is (Target : in out Parse_Graph; Source : in Parse_Graph) is begin - Base.Assign (Base.Graph (Target), Base.Graph (Source)); + Target.Internal_Graph.Assign (Source.Internal_Graph); Target.Root_Node := Source.Root_Node; + Target.Root_Finishes.Assign (Source.Root_Finishes); + Target.Label_Map.Assign (Source.Label_Map); end Assign; @@ -83,170 +125,940 @@ package body Packrat.Parse_Graphs is return Parse_Graph is begin return G : Parse_Graph := - (Base.Copy (Base.Graph (Source)) with Root_Node => Source.Root_Node); + (Internal_Graph => Source.Internal_Graph.Copy, + Root_Node => Source.Root_Node, + Root_Finishes => Source.Root_Finishes.Copy, + Label_Map => Source.Label_Map.Copy); end Copy; procedure Move (Target, Source : in out Parse_Graph) is begin - Base.Move (Base.Graph (Target), Base.Graph (Source)); + Target.Internal_Graph.Move (Source.Internal_Graph); Target.Root_Node := Source.Root_Node; - Source.Root_Node := No_Node; + Source.Root_Node := Base.No_Node; + Target.Root_Finishes.Move (Source.Root_Finishes); + Target.Label_Map.Move (Source.Label_Map); end Move; - function Root + function Is_Empty (Container : in Parse_Graph) - return Cursor is + return Boolean is begin - if not Container.Contains (Container.Root_Node) then - return No_Element; - else - return Container.To_Cursor (Container.Root_Node); - end if; - end Root; + return Container.Label_Map.Is_Empty; + end Is_Empty; - procedure Set_Root - (Container : in out Parse_Graph; - Node : in Extended_Node_ID_Type) is + procedure Clear + (Container : in out Parse_Graph) is begin - Container.Root_Node := Node; - end Set_Root; + Container.Internal_Graph.Clear; + Container.Root_Node := Base.No_Node; + Container.Root_Finishes.Clear; + Container.Label_Map.Clear; + end Clear; - function Finish_List + function In_Finishes (Container : in Parse_Graph; Node : in Node_ID_Type) - return Finish_Array is + return Finish_Vectors.Vector + is + Result : Finish_Vectors.Vector; + Current : Finish_Type; begin - return Finish_List (Container.To_Cursor (Node)); - end Finish_List; + if Node = Container.Root_Node then + Result := Container.Root_Finishes; + end if; + for Edge of Container.Internal_Graph.Inbound (Node) loop + Current := Container.Internal_Graph.Label (Edge).Subnode_Finish; + if not Result.Contains (Current) then + Result.Append (Current); + end if; + end loop; + return Result; + end In_Finishes; - function Finish_List - (Position : in Cursor) - return Finish_Array + function Out_Finishes + (Container : in Parse_Graph; + Node : in Node_ID_Type) + return Finish_Vectors.Vector is - function V2A is new Vector_To_Array (Finish_Type, Finish_Array, Finish_Vectors); - Fins : Finish_Vectors.Vector; - Current : Edge_Label_Type; - begin - for E of Outbound (Position) loop - if Has_Label (Position, E) then - Current := Label (Position, E); - if not Fins.Contains (Current.Finish) then - Fins.Append (Current.Finish); - end if; + Result : Finish_Vectors.Vector; + Current : Finish_Type; + begin + for Edge of Container.Internal_Graph.Outbound (Node) loop + Current := Container.Internal_Graph.Label (Edge).Group_Finish; + if not Result.Contains (Current) then + Result.Append (Current); end if; end loop; - Finsort.Sort (Fins); - return V2A (Fins); - end Finish_List; + return Result; + end Out_Finishes; + + + + + + function Debug_String + (Container : in Parse_Graph) + return String + is + Mapping : Enum_Node_Maps.Map; + Current : Gen_Tokens.Token; + Result : SU.Unbounded_String; + begin + for Node of Container.Internal_Graph.Nodes loop + declare + Current : Label_Enum := + Gen_Tokens.Label (Container.Internal_Graph.Label (Node)); + begin + if not Mapping.Contains (Current) then + Mapping.Insert (Current, Node_Vectors.Empty_Vector); + end if; + Mapping.Reference (Current).Append (Node); + end; + end loop; + for Iter in Mapping.Iterate loop + SU.Append (Result, Label_Enum'Image (Enum_Node_Maps.Key (Iter)) & Latin.HT); + for Node of Enum_Node_Maps.Element (Iter) loop + Current := Container.Internal_Graph.Label (Node); + SU.Append (Result, Positive'Image (Gen_Tokens.Start (Current)) & " ->" & Latin.HT); + for Fin of In_Finishes (Container, Node) loop + SU.Append (Result, Finish_Type'Image (Fin) & " ->" & Latin.HT); + declare + Groupings : Token_Group_Array := Container.Subgroups ((Current, Fin)); + begin + if Groupings'Length = 0 then + SU.Append (Result, "Leaf"); + else + for Grouping of Groupings loop + for Fin_Token of Elements (Grouping) loop + SU.Append (Result, "Subnode " & + Label_Enum'Image (Gen_Tokens.Label (Fin_Token.Token)) & + " (" & Positive'Image (Gen_Tokens.Start (Fin_Token.Token)) & + "," & Finish_Type'Image (Fin_Token.Finish) & "), "); + end loop; + SU.Delete (Result, SU.Length (Result) - 1, SU.Length (Result)); + SU.Append (Result, Latin.HT); + SU.Append (Result, SU."*" (3, Latin.HT)); + end loop; + end if; + end; + SU.Delete (Result, SU.Length (Result), SU.Length (Result)); + end loop; + SU.Delete (Result, SU.Length (Result), SU.Length (Result)); + end loop; + -- what delete goes here? + end loop; + return SU.To_String (Result); + end Debug_String; + - function Sub_Nodes + + + function Contains (Container : in Parse_Graph; - Node : in Node_ID_Type; - Finish_At : in Finish_Type) - return Node_Array is + Token : in Gen_Tokens.Token) + return Boolean is begin - return Sub_Nodes (Container.To_Cursor (Node), Finish_At); - end Sub_Nodes; + return Container.Label_Map.Contains (Token); + end Contains; - function Sub_Nodes - (Position : in Cursor; - Finish_At : in Finish_Type) - return Node_Array + function Contains + (Container : in Parse_Graph; + Position : in Finished_Token) + return Boolean is - function V2A is new Vector_To_Array (Node_ID_Type, Node_Array, Node_Vectors); - Nodes : Node_Vectors.Vector; - Current_Label : Edge_Label_Type; - begin - for E of Outbound (Position) loop - if Has_Label (Position, E) then - Current_Label := Label (Position, E); - if Current_Label.Finish = Finish_At then - Nodes.Reference (Current_Label.Order) := E.To; + Node : Node_ID_Type; + begin + if not Container.Contains (Position.Token) then + return False; + end if; + Node := Container.Label_Map.Element (Position.Token); + if Node = Container.Root_Node then + for F of Container.Root_Finish_List loop + if F = Position.Finish then + return True; end if; + end loop; + end if; + return (for some Edge of Container.Internal_Graph.Inbound (Node) => + Container.Internal_Graph.Label (Edge).Subnode_Finish = Position.Finish); + end Contains; + + + function Contains + (Container : in Parse_Graph; + Grouping : in Token_Group) + return Boolean + is + Groups : Group_ID_Vectors.Vector; + Next_ID : Group_ID_Type; + Parent_Node : Node_ID_Type; + begin + if not Container.Contains (Grouping.Parent) then + return False; + end if; + for Fin_Token of Elements (Grouping) loop + if not Container.Contains (Fin_Token) then + return False; + end if; + end loop; + Parent_Node := Container.Label_Map.Element (Grouping.Parent.Token); + for Edge of Container.Internal_Graph.Outbound (Parent_Node) loop + Next_ID := Container.Internal_Graph.Label (Edge).Group_ID; + if not Groups.Contains (Next_ID) then + Groups.Append (Next_ID); end if; end loop; - return V2A (Nodes); - end Sub_Nodes; + return (for some ID of Groups => + (for all Sub of Elements (Grouping) => + (for some Edge of Container.Internal_Graph.Between + (Parent_Node, Container.Label_Map.Element (Sub.Token)) => + Container.Internal_Graph.Label (Edge) = + (Group_ID => ID, + Group_Finish => Finish (Grouping), + Subnode_Finish => Sub.Finish)))); + end Contains; + + + function Reachable + (Container : in Parse_Graph; + Position : in Finished_Token) + return Boolean + is + -- This is basically a depth first search function. + function Finder + (Current : in Finished_Token) + return Boolean is + begin + return Current = Position or else + (for some Grouping of Container.Subgroups (Current) => + (for some Fin_Token of Elements (Grouping) => Finder (Fin_Token))); + end Finder; + begin + return (for some Finish of Container.Root_Finish_List => + Finder (Container.Root_Element (Finish))); + end Reachable; + + + function Locally_Reachable + (Container : in Parse_Graph; + Node : in Node_ID_Type) + return Boolean + is + use type Ada.Containers.Count_Type; + In_Subnodes, Out_Groups : Finish_Vectors.Vector; + In_Pos, Out_Pos : Positive := 1; + begin + In_Subnodes := In_Finishes (Container, Node); + if In_Subnodes.Length = 0 then + return False; + end if; + Out_Groups := Out_Finishes (Container, Node); + Finish_Sort.Sort (In_Subnodes); + Finish_Sort.Sort (Out_Groups); + while Out_Pos <= Out_Groups.Last_Index loop + if In_Pos > In_Subnodes.Last_Index or else + In_Subnodes.Element (In_Pos) > Out_Groups.Element (Out_Pos) + then + return False; + elsif In_Subnodes.Element (In_Pos) = Out_Groups.Element (Out_Pos) then + Out_Pos := Out_Pos + 1; + end if; + In_Pos := In_Pos + 1; + end loop; + return True; + end Locally_Reachable; + + + function All_Reachable + (Container : in Parse_Graph) + return Boolean is + begin + return (for all Node of Container.Internal_Graph.Nodes => + Container.Locally_Reachable (Node)); + end All_Reachable; + + + function Valid_Starts_Finishes + (Parent : in Finished_Token; + Subtokens : in Finished_Token_Array) + return Boolean + is + Subvec : Finished_Token_Vectors.Vector; + begin + for Sub of Subtokens loop + if Gen_Tokens.Start (Sub.Token) > Sub.Finish + 1 then + return False; + end if; + Subvec.Append (Sub); + end loop; + Finished_Token_Sort.Sort (Subvec); + for Index in Subvec.First_Index .. Subvec.Last_Index - 1 loop + if Subvec (Index).Finish >= Gen_Tokens.Start (Subvec (Index + 1).Token) then + return False; + end if; + end loop; + if Parent.Finish < Subvec.Last_Element.Finish or else + Gen_Tokens.Start (Parent.Token) > Gen_Tokens.Start (Subvec.First_Element.Token) + then + return False; + end if; + return True; + end Valid_Starts_Finishes; + + + function No_Loops_Introduced + (Container : in Parse_Graph; + Parent : in Finished_Token; + Subtokens : in Finished_Token_Array) + return Boolean + is + function Looper + (Current : in Finished_Token) + return Boolean is + begin + if not Container.Contains (Current.Token) then + return False; + elsif Current = Parent then + return True; + elsif Gen_Tokens.Start (Current.Token) > Gen_Tokens.Start (Parent.Token) then + return False; + else + return (for some Grouping of Container.Subgroups (Current) => + (for some Sub of Elements (Grouping) => Looper (Sub))); + end if; + end Looper; + begin + return not Container.Contains (Parent.Token) or else + (for all Sub of Subtokens => not Looper (Sub)); + end No_Loops_Introduced; + + + function Is_Sorted + (Finishes : in Finish_Array) + return Boolean + is + function Actual is new Sorted (Finish_Type, Finish_Array); + begin + return Actual (Finishes); + end Is_Sorted; + + + function Is_Sorted + (Positions : in Finished_Token_Array) + return Boolean + is + function Actual is new Sorted (Finished_Token, Finished_Token_Array); + begin + return Actual (Positions); + end Is_Sorted; + + + function Is_Sorted + (Groupings : in Token_Group_Array) + return Boolean + is + function Actual is new Sorted (Token_Group, Token_Group_Array); + begin + return Actual (Groupings); + end Is_Sorted; + + + function No_Duplicates + (Finishes : in Finish_Array) + return Boolean + is + function Actual is new No_Dups (Finish_Type, Finish_Array); + begin + return Actual (Finishes); + end No_Duplicates; + + + function No_Duplicates + (Positions : in Finished_Token_Array) + return Boolean + is + function Actual is new No_Dups (Finished_Token, Finished_Token_Array); + begin + return Actual (Positions); + end No_Duplicates; + + + function No_Duplicates + (Groupings : in Token_Group_Array) + return Boolean + is + function Actual is new No_Dups (Token_Group, Token_Group_Array); + begin + return Actual (Groupings); + end No_Duplicates; + + procedure Include + (Container : in out Parse_Graph; + Token : in Gen_Tokens.Token) + is + Node_ID : Node_ID_Type; + begin + if not Container.Contains (Token) then + Node_ID := Container.Internal_Graph.Unused; + Container.Internal_Graph.Insert (Base.Labeled_Node_Type'(Node_ID, Token)); + Container.Label_Map.Insert (Token, Node_ID); + end if; + end Include; + + + procedure Connect + (Container : in out Parse_Graph; + Parent : in Finished_Token; + Subtokens : in Finished_Token_Array) + is + Parent_ID : Node_ID_Type; + Use_GID : Group_ID_Type; + New_Edge : Base.Edge_Type; + New_Label : Edge_Label_Type; + begin + Container.Include (Parent.Token); + for Sub of Subtokens loop + Container.Include (Sub.Token); + end loop; + Parent_ID := Container.Label_Map.Element (Parent.Token); + declare + Outedges : Base.Edge_Array := Container.Internal_Graph.Outbound (Parent_ID); + begin + Use_GID := Group_ID_Type'First; + while (for some E of Outedges => + Container.Internal_Graph.Label (E).Group_ID = Use_GID) + loop + Use_GID := Use_GID + 1; + end loop; + end; + for Sub of Subtokens loop + New_Edge := + (ID => Container.Internal_Graph.Unused, + From => Parent_ID, + To => Container.Label_Map.Element (Sub.Token)); + New_Label := (Use_GID, Parent.Finish, Sub.Finish); + Container.Internal_Graph.Insert (Base.Labeled_Edge_Type'(New_Edge, New_Label)); + end loop; + end Connect; + procedure Prune (Container : in out Parse_Graph; - Node : in Node_ID_Type) + Token : in Gen_Tokens.Token) is + begin + if not Container.Contains (Token) then + return; + end if; + if Container.Label_Map (Token) = Container.Root_Node then + Container.Clear_Root; + end if; + Container.Internal_Graph.Delete (Container.Label_Map (Token)); + Container.Label_Map.Delete (Token); + end Prune; + + + procedure Prune + (Container : in out Parse_Graph; + Position : in Finished_Token) is - My_Cursor : Cursor := Container.To_Cursor (Node); + Node : Node_ID_Type; begin - Prune (My_Cursor); + if not Container.Contains (Position.Token) then + return; + end if; + Node := Container.Label_Map.Element (Position.Token); + if Node = Container.Root_Node then + for I in reverse 1 .. Container.Root_Finishes.Last_Index loop + if Container.Root_Finishes.Element (I) = Position.Finish then + Container.Root_Finishes.Delete (I); + end if; + end loop; + end if; + for Edge of Container.Internal_Graph.Inbound (Node) loop + if Container.Internal_Graph.Label (Edge).Subnode_Finish = Position.Finish then + Container.Internal_Graph.Delete (Edge); + end if; + end loop; end Prune; procedure Prune - (Position : in out Cursor) + (Container : in out Parse_Graph; + Grouping : in Token_Group) is - use type Ada.Containers.Count_Type; - Active : Cursor_Vectors.Vector; - Current : Cursor; + Group_IDs : Group_ID_Vectors.Vector; + Parent_Node : Node_ID_Type; + Current_ID : Group_ID_Type; begin - if not Has_Element (Position) then + -- Short circuit checks + if not Container.Contains (Grouping.Parent) then return; end if; - for N of Children (Position) loop - if N /= Element (Position) then - Active.Append (Cursor_To (Position, N)); + for Fin_Token of Elements (Grouping) loop + if not Container.Contains (Fin_Token) then + return; end if; end loop; - Delete (Position); - while not Active.Is_Empty loop - for Index in reverse 1 .. Active.Last_Index loop - Current := Active (Index); - if Indegree (Current) = 0 then - for N of Children (Current) loop - if not Active.Contains (Cursor_To (Current, N)) then - Active.Append (Cursor_To (Current, N)); - end if; - end loop; - Delete (Current); + + Parent_Node := Container.To_Node (Grouping.Parent); + + -- Gather up the IDs of groups for deletion + for Edge of Container.Internal_Graph.Between + (Parent_Node, Container.To_Node (Element (Grouping, 1))) + loop + Current_ID := Container.Internal_Graph.Label (Edge).Group_ID; + if not Group_IDs.Contains (Current_ID) and then + (for all Index in 2 .. Last_Index (Grouping) => + (for some Other of Container.Internal_Graph.Between + (Parent_Node, Container.To_Node (Element (Grouping, Index))) => + Container.Internal_Graph.Label (Other).Group_ID = Current_ID)) + then + Group_IDs.Append (Current_ID); + end if; + end loop; + + -- Delete all relevant edges + for Fin_Token of Elements (Grouping) loop + for Edge of Container.Internal_Graph.Between + (Parent_Node, Container.To_Node (Fin_Token)) + loop + if Group_IDs.Contains (Container.Internal_Graph.Label (Edge).Group_ID) then + Container.Internal_Graph.Delete (Edge); end if; - Active.Delete (Index); end loop; end loop; end Prune; + procedure Delete_Unreachable + (Container : in out Parse_Graph) + is + Examine, Next : Node_Vectors.Vector; + begin + if not Container.Has_Root then + Container.Clear; + return; + end if; + for Node of Container.Internal_Graph.Nodes loop + Examine.Append (Node); + end loop; + while not Examine.Is_Empty loop + for Node of Examine loop + if Container.Internal_Graph.Contains (Node) and then + not Locally_Reachable (Container, Node) + then + for Outnode of Container.Internal_Graph.Children (Node) loop + Next.Append (Outnode); + end loop; + Container.Internal_Graph.Delete (Node); + end if; + end loop; + Examine.Move (Next); + end loop; + end Delete_Unreachable; - function Is_Ambiguous + + + function Has_Root (Container : in Parse_Graph) return Boolean is begin - for N of Container.Nodes loop - if Finish_List (Container, N)'Length > 1 then - return True; + return Container.Root_Node /= Base.No_Node; + end Has_Root; + + + procedure Set_Root + (Container : in out Parse_Graph; + Token : in Gen_Tokens.Token; + Finishes : in Finish_Array) is + begin + Container.Root_Node := Container.Label_Map.Element (Token); + Container.Root_Finishes.Clear; + for F of Finishes loop + if not Container.Root_Finishes.Contains (F) then + Container.Root_Finishes.Append (F); + end if; + end loop; + Finish_Sort.Sort (Container.Root_Finishes); + end Set_Root; + + + procedure Clear_Root + (Container : in out Parse_Graph) is + begin + Container.Root_Node := Base.No_Node; + Container.Root_Finishes.Clear; + end Clear_Root; + + + function Root_Token + (Container : in Parse_Graph) + return Gen_Tokens.Token is + begin + return Container.Internal_Graph.Label (Container.Root_Node); + end Root_Token; + + + function Root_Finish_List + (Container : in Parse_Graph) + return Finish_Array + is + function V2A is new Vector_To_Array (Finish_Type, Finish_Array, Finish_Vectors); + begin + return V2A (Container.Root_Finishes); + end Root_Finish_List; + + + function Root_Element + (Container : in Parse_Graph; + Finish_At : in Finish_Type) + return Finished_Token is + begin + return Root : Finished_Token := + (Token => Container.Internal_Graph.Label (Container.Root_Node), + Finish => Finish_At); + end Root_Element; + + + + + + function Finish_List + (Container : in Parse_Graph; + Token : in Gen_Tokens.Token) + return Finish_Array + is + function V2A is new Vector_To_Array (Finish_Type, Finish_Array, Finish_Vectors); + Result : Finish_Vectors.Vector; + begin + for Edge of Container.Internal_Graph.Outbound (Container.Label_Map.Element (Token)) loop + if not Result.Contains (Container.Internal_Graph.Label (Edge).Group_Finish) then + Result.Append (Container.Internal_Graph.Label (Edge).Group_Finish); + end if; + end loop; + Finish_Sort.Sort (Result); + return V2A (Result); + end Finish_List; + + + function Is_Leaf + (Container : in Parse_Graph; + Position : in Finished_Token) + return Boolean is + begin + for Edge of Container.Internal_Graph.Outbound + (Container.Label_Map.Element (Position.Token)) + loop + if Container.Internal_Graph.Label (Edge).Group_Finish = Position.Finish then + return False; + end if; + end loop; + return True; + end Is_Leaf; + + + function Is_Branch + (Container : in Parse_Graph; + Position : in Finished_Token) + return Boolean is + begin + return not Container.Is_Leaf (Position); + end Is_Branch; + + + + + + function Subgroups + (Container : in Parse_Graph; + Position : in Finished_Token) + return Token_Group_Array + is + function V2A is new Vector_To_Array + (Finished_Token, Finished_Token_Array, Finished_Token_Vectors); + function V2A is new Vector_To_Array + (Token_Group, Token_Group_Array, Token_Group_Vectors); + Groupings : Group_Finished_Token_Maps.Map; + Edge_Label : Edge_Label_Type; + Next_Token : Finished_Token; + Result : Token_Group_Vectors.Vector; + begin + for Edge of Container.Internal_Graph.Outbound + (Container.Label_Map.Element (Position.Token)) + loop + Edge_Label := Container.Internal_Graph.Label (Edge); + if Edge_Label.Group_Finish = Position.Finish then + Next_Token := + (Token => Container.Internal_Graph.Label (Edge.To), + Finish => Edge_Label.Subnode_Finish); + if not Groupings.Contains (Edge_Label.Group_ID) then + Groupings.Insert (Edge_Label.Group_ID, Finished_Token_Vectors.Empty_Vector); + end if; + Groupings.Reference (Edge_Label.Group_ID).Append (Next_Token); + end if; + end loop; + for Raw_Group of Groupings loop + Finished_Token_Sort.Sort (Raw_Group); + Result.Append + ((Parent => Position, + Elems => Finished_Token_Array_Holders.To_Holder (V2A (Raw_Group)))); + end loop; + Token_Group_Sort.Sort (Result); + return V2A (Result); + end Subgroups; + + + function First_Index + (Grouping : in Token_Group) + return Positive is + begin + return Grouping.Elems.Constant_Reference.Element'First; + end First_Index; + + + function Last_Index + (Grouping : in Token_Group) + return Positive is + begin + return Grouping.Elems.Constant_Reference.Element'Last; + end Last_Index; + + + function Length + (Grouping : in Token_Group) + return Ada.Containers.Count_Type is + begin + return Ada.Containers.Count_Type + (Grouping.Elems.Constant_Reference.Element'Length); + end Length; + + + function Element + (Grouping : in Token_Group; + Index : in Positive) + return Finished_Token is + begin + return Grouping.Elems.Constant_Reference.Element (Index); + end Element; + + + function Elements + (Grouping : in Token_Group) + return Finished_Token_Array is + begin + return Grouping.Elems.Element; + end Elements; + + + function Parent + (Grouping : in Token_Group) + return Finished_Token is + begin + return Grouping.Parent; + end Parent; + + + function Finish + (Grouping : in Token_Group) + return Finish_Type is + begin + return Grouping.Parent.Finish; + end Finish; + + + + + + function Is_Root_Ambiguous + (Container : in Parse_Graph) + return Boolean + is + use type Ada.Containers.Count_Type; + First_Group : Group_ID_Type; + Seen_Group : Boolean := False; + Check_Label : Edge_Label_Type; + begin + if Container.Root_Finishes.Length = 0 then + return False; + elsif Container.Root_Finishes.Length > 1 then + return True; + end if; + for Edge of Container.Internal_Graph.Outbound (Container.Root_Node) loop + Check_Label := Container.Internal_Graph.Label (Edge); + if Container.Root_Finishes.Contains (Check_Label.Group_Finish) then + if not Seen_Group then + Seen_Group := True; + First_Group := Check_Label.Group_ID; + elsif Check_Label.Group_ID /= First_Group then + return True; + end if; end if; end loop; return False; + end Is_Root_Ambiguous; + + + function Is_Ambiguous + (Container : in Parse_Graph) + return Boolean + is + Seen_Finishes : Finish_Group_Maps.Map; + Edge_Label : Edge_Label_Type; + begin + if Container.Has_Root and then Is_Root_Ambiguous (Container) then + return True; + end if; + for Node of Container.Internal_Graph.Nodes loop + for Edge of Container.Internal_Graph.Outbound (Node) loop + Edge_Label := Container.Internal_Graph.Label (Edge); + if Seen_Finishes.Contains (Edge_Label.Group_Finish) then + if Seen_Finishes.Element (Edge_Label.Group_Finish) /= Edge_Label.Group_ID then + return True; + end if; + else + Seen_Finishes.Insert (Edge_Label.Group_Finish, Edge_Label.Group_ID); + end if; + end loop; + Seen_Finishes.Clear; + end loop; + return False; end Is_Ambiguous; + function Ambiguities + (Container : in Parse_Graph; + Ambiguous_Root : out Boolean) + return Finished_Token_Array + is + function V2A is new Vector_To_Array + (Finished_Token, Finished_Token_Array, Finished_Token_Vectors); + Seen_Finishes : Finish_Group_Maps.Map; + Edge_Label : Edge_Label_Type; + Next_Token : Finished_Token; + Result : Finished_Token_Vectors.Vector; + begin + Ambiguous_Root := Container.Has_Root and then Container.Is_Root_Ambiguous; + for Node of Container.Internal_Graph.Nodes loop + for Edge of Container.Internal_Graph.Outbound (Node) loop + Edge_Label := Container.Internal_Graph.Label (Edge); + if Seen_Finishes.Contains (Edge_Label.Group_Finish) then + if Seen_Finishes.Element (Edge_Label.Group_Finish) /= Edge_Label.Group_ID then + Next_Token := + (Token => Container.Internal_Graph.Label (Node), + Finish => Edge_Label.Group_Finish); + if not Result.Contains (Next_Token) then + Result.Append (Next_Token); + end if; + end if; + else + Seen_Finishes.Insert (Edge_Label.Group_Finish, Edge_Label.Group_ID); + end if; + end loop; + Seen_Finishes.Clear; + end loop; + Finished_Token_Sort.Sort (Result); + return V2A (Result); + end Ambiguities; + + + + + + function Isomorphic + (Left, Right : in Parse_Graph) + return Boolean + is + begin + -- to-do + return False; + end Isomorphic; + + + function Isomorphic_Subgraph + (Left_Graph : in Parse_Graph; + Left_Position : in Finished_Token; + Right_Graph : in Parse_Graph; + Right_Position : in Finished_Token) + return Boolean + is + begin + -- to-do + return False; + end Isomorphic_Subgraph; + + + + function To_Node + (Container : in Parse_Graph; + Token : in Gen_Tokens.Token) + return Node_ID_Type is + begin + return Container.Label_Map.Element (Token); + end To_Node; + + + function To_Node + (Container : in Parse_Graph; + Position : in Finished_Token) + return Node_ID_Type is + begin + return Container.Label_Map.Element (Position.Token); + end To_Node; + + + + + + function Sorted + (Input : in Array_Type) + return Boolean is + begin + for Index in Input'First .. Input'Last - 1 loop + if Input (Index + 1) < Input (Index) then + return False; + end if; + end loop; + return True; + end Sorted; + + + function No_Dups + (Input : in Array_Type) + return Boolean is + begin + for X in Input'First .. Input'Last - 1 loop + for Y in X + 1 .. Input'Last loop + if Input (X) = Input (Y) then + return False; + end if; + end loop; + end loop; + return True; + end No_Dups; + function Vector_To_Array (Input : in Type_Vectors.Vector) diff --git a/src/packrat-parse_graphs.ads b/src/packrat-parse_graphs.ads index 5663bcf..aced9d3 100644 --- a/src/packrat-parse_graphs.ads +++ b/src/packrat-parse_graphs.ads @@ -2,12 +2,14 @@ with - Ada.Containers, - Directed_Graphs; + Ada.Containers; private with - Ada.Containers.Vectors; + Ada.Containers.Indefinite_Holders, + Ada.Containers.Vectors, + Ada.Containers.Ordered_Maps, + Directed_Graphs; generic @@ -16,539 +18,483 @@ generic type Element_Type is private; type Element_Array is array (Positive range <>) of Element_Type; + with function "<" (Left, Right : in Element_Type) return Boolean is <>; + with package Gen_Tokens is new Tokens (Label_Enum, Element_Type, Element_Array); package Packrat.Parse_Graphs is - type Node_ID_Type is new Positive; - type Edge_ID_Type is new Positive; + -- get rid of Parse_Cursor and replace with Finished_Tokens, + -- rewrite Groups to also use Finished_Tokens - subtype Node_Label_Type is Gen_Tokens.Token; + -- this will allow simplification regarding checks for same graphs, etc - subtype Finish_Type is Positive; - subtype Order_Type is Positive; - type Edge_Label_Type is record + -- there should be pruning functions for Token, Finished_Token, Token_Group, + -- and for removing all bits unreachable from the root + + -- by inserting and building the parse graph from the bottom up, + -- no pruning needs to be done while parsing until the very end, + -- as even if a bit of parsing fails then the nodes will either be used later + -- or stay unreachable, and thus a single removal of all unreachable + -- nodes at the end will suffice + + -- Token_Groups should include a record of their parent node to make other things easier + + + type Parse_Graph is tagged private; + + function "=" + (Left, Right : in Parse_Graph) + return Boolean; + + Empty_Graph : constant Parse_Graph; + + subtype Finish_Type is Natural; + type Finish_Array is array (Positive range <>) of Finish_Type; + + type Finished_Token is record + Token : Gen_Tokens.Token; Finish : Finish_Type; - Order : Order_Type; end record; + type Finished_Token_Array is array (Positive range <>) of Finished_Token; + function "<" - (Left, Right : in Edge_Label_Type) + (Left, Right : in Finished_Token) return Boolean; - type Finish_Array is array (Positive range <>) of Finish_Type; + function "<" + (Left, Right : in Finished_Token_Array) + return Boolean; + type Token_Group is private with Type_Invariant => + Ada.Containers.">" (Length (Token_Group), 0); + type Token_Group_Array is array (Positive range <>) of Token_Group; + function "<" + (Left, Right : in Token_Group) + return Boolean; - -- This is to avoid some... ambiguities... with "=" - -- functions when instantiating the Base package. - use type Gen_Tokens.Token; - package Base is new Directed_Graphs - (Node_ID_Type => Node_ID_Type, - Edge_ID_Type => Edge_ID_Type, - Node_Label_Type => Node_Label_Type, - Edge_Label_Type => Edge_Label_Type); - subtype Extended_Node_ID_Type is Base.Extended_Node_ID_Type; - subtype Node_Array is Base.Node_Array; - subtype Path is Base.Path; + procedure Assign + (Target : in out Parse_Graph; + Source : in Parse_Graph); - subtype Edge_Type is Base.Edge_Type; - subtype Edge_Array is Base.Edge_Array; + function Copy + (Source : in Parse_Graph) + return Parse_Graph; - function "<" - (Left, Right : in Edge_Type) - return Boolean - renames Base."<"; + procedure Move + (Target, Source : in out Parse_Graph); - subtype Node_Label_Constant_Reference is Base.Node_Label_Constant_Reference; - subtype Node_Label_Reference is Base.Node_Label_Reference; - subtype Edge_Label_Constant_Reference is Base.Edge_Label_Constant_Reference; - subtype Edge_Label_Reference is Base.Edge_Label_Reference; + function Is_Empty + (Container : in Parse_Graph) + return Boolean; + procedure Clear + (Container : in out Parse_Graph); - type Parse_Graph is new Base.Graph with private; - subtype Cursor is Base.Cursor; - function "=" - (Left, Right : in Cursor) - return Boolean - renames Base."="; + function Debug_String + (Container : in Parse_Graph) + return String; - function "=" - (Left, Right : in Parse_Graph) + + + + function Contains + (Container : in Parse_Graph; + Token : in Gen_Tokens.Token) return Boolean; + function Contains + (Container : in Parse_Graph; + Position : in Finished_Token) + return Boolean; + function Contains + (Container : in Parse_Graph; + Grouping : in Token_Group) + return Boolean; + function Reachable + (Container : in Parse_Graph; + Position : in Finished_Token) + return Boolean + with Pre => Container.Has_Root; - No_Node : constant Extended_Node_ID_Type := Base.No_Node; - No_Element : constant Cursor := Base.No_Element; - Empty_Graph : constant Parse_Graph; + function All_Reachable + (Container : in Parse_Graph) + return Boolean + with Pre => Container.Has_Root; + function Valid_Starts_Finishes + (Parent : in Finished_Token; + Subtokens : in Finished_Token_Array) + return Boolean + with Pre => Subtokens'Length > 0; + function No_Loops_Introduced + (Container : in Parse_Graph; + Parent : in Finished_Token; + Subtokens : in Finished_Token_Array) + return Boolean + with Pre => Subtokens'Length > 0 and + Valid_Starts_Finishes (Parent, Subtokens); + function Is_Sorted + (Finishes : in Finish_Array) + return Boolean; - function To_Graph - (Nodes : in Node_Array; - Edges : in Edge_Array) - return Parse_Graph; + function Is_Sorted + (Positions : in Finished_Token_Array) + return Boolean; - function To_Graph - (Nodes : in Node_Array; - Edges : in Edge_Array; - Root : in Extended_Node_ID_Type) - return Parse_Graph; + function Is_Sorted + (Groupings : in Token_Group_Array) + return Boolean; + function No_Duplicates + (Finishes : in Finish_Array) + return Boolean; + function No_Duplicates + (Positions : in Finished_Token_Array) + return Boolean; + function No_Duplicates + (Groupings : in Token_Group_Array) + return Boolean; - procedure Assign - (Target : in out Parse_Graph; - Source : in Parse_Graph); - function Copy - (Source : in Parse_Graph) - return Parse_Graph; - procedure Move - (Target, Source : in out Parse_Graph); + + procedure Include + (Container : in out Parse_Graph; + Token : in Gen_Tokens.Token) + with Post => Container.Contains (Token); + + procedure Connect + (Container : in out Parse_Graph; + Parent : in Finished_Token; + Subtokens : in Finished_Token_Array) + with Pre => Subtokens'Length > 0 and + Valid_Starts_Finishes (Parent, Subtokens) and + Container.No_Loops_Introduced (Parent, Subtokens); + + procedure Prune + (Container : in out Parse_Graph; + Token : in Gen_Tokens.Token) + with Post => not Container.Contains (Token); + + procedure Prune + (Container : in out Parse_Graph; + Position : in Finished_Token) + with Post => not Container.Contains (Position); + + procedure Prune + (Container : in out Parse_Graph; + Grouping : in Token_Group) + with Post => not Container.Contains (Grouping); + + procedure Delete_Unreachable + (Container : in out Parse_Graph) + with Pre => Container.Has_Root, + Post => Container.All_Reachable; - -- Usually you would get a Parse_Graph by way of parsing something - -- with the parser, which would then have a root node already set, - -- being the only node in the graph that has no parents. - function Root + function Has_Root (Container : in Parse_Graph) - return Cursor; + return Boolean; - -- Not really advisable under most circumstances unless you're - -- making a Parse_Graph manually for some reason. procedure Set_Root (Container : in out Parse_Graph; - Node : in Extended_Node_ID_Type); + Token : in Gen_Tokens.Token; + Finishes : in Finish_Array) + with Pre => Container.Contains (Token) and + (for all F of Finishes => F >= Gen_Tokens.Start (Token) - 1), + Post => Container.Has_Root; + procedure Clear_Root + (Container : in out Parse_Graph) + with Post => not Container.Has_Root; + function Root_Token + (Container : in Parse_Graph) + return Gen_Tokens.Token + with Pre => Container.Has_Root; + function Root_Finish_List + (Container : in Parse_Graph) + return Finish_Array + with Pre => Container.Has_Root, + Post => Is_Sorted (Root_Finish_List'Result) and + No_Duplicates (Root_Finish_List'Result); - -- Returns an array of the all possible finish positions resulting - -- from current node's parsing, sorted from shortest to longest. - function Finish_List + function Root_Element (Container : in Parse_Graph; - Node : in Node_ID_Type) - return Finish_Array; + Finish_At : in Finish_Type) + return Finished_Token + with Pre => Container.Has_Root and then + (for some F of Container.Root_Finish_List => F = Finish_At); + + + function Finish_List - (Position : in Cursor) - return Finish_Array; + (Container : in Parse_Graph; + Token : in Gen_Tokens.Token) + return Finish_Array + with Pre => Container.Contains (Token), + Post => Is_Sorted (Finish_List'Result) and + No_Duplicates (Finish_List'Result); - -- Returns an array of children of a node made by a parse that ended - -- at a specified finish position, sorted according to parsing order. - function Sub_Nodes + function Is_Leaf (Container : in Parse_Graph; - Node : in Node_ID_Type; - Finish_At : in Finish_Type) - return Node_Array; + Position : in Finished_Token) + return Boolean + with Pre => Container.Contains (Position); - function Sub_Nodes - (Position : in Cursor; - Finish_At : in Finish_Type) - return Node_Array; + function Is_Branch + (Container : in Parse_Graph; + Position : in Finished_Token) + return Boolean + with Pre => Container.Contains (Position); - -- Deletes a node from the graph then removes any other nodes that - -- were made unreachable from the root by that deletion. - procedure Prune - (Container : in out Parse_Graph; - Node : in Node_ID_Type); + function Subgroups + (Container : in Parse_Graph; + Position : in Finished_Token) + return Token_Group_Array + with Pre => Container.Contains (Position), + Post => Is_Sorted (Subgroups'Result) and + No_Duplicates (Subgroups'Result) and + (for all G of Subgroups'Result => Finish (G) = Position.Finish); - procedure Prune - (Position : in out Cursor); + function First_Index + (Grouping : in Token_Group) + return Positive; + + function Last_Index + (Grouping : in Token_Group) + return Positive; + + function Length + (Grouping : in Token_Group) + return Ada.Containers.Count_Type; + + function Element + (Grouping : in Token_Group; + Index : in Positive) + return Finished_Token + with Pre => Index in First_Index (Grouping) .. Last_Index (Grouping); + + function Elements + (Grouping : in Token_Group) + return Finished_Token_Array + with Post => Is_Sorted (Elements'Result) and + Valid_Starts_Finishes (Parent (Grouping), Elements'Result); + function Parent + (Grouping : in Token_Group) + return Finished_Token; + function Finish + (Grouping : in Token_Group) + return Finish_Type; - -- Tests whether there are multiple potential finish points for any - -- of the nodes in the graph, and hence whether the parse was ambiguous. + + + -- An ambiguous graph means that either some node exists with multiple groups + -- attached to it with the same Group_Finish value, or the root node has multiple + -- groups of any Group_Finish value attached to it. + + function Is_Root_Ambiguous + (Container : in Parse_Graph) + return Boolean + with Pre => Container.Has_Root; + function Is_Ambiguous (Container : in Parse_Graph) return Boolean; + function Ambiguities + (Container : in Parse_Graph; + Ambiguous_Root : out Boolean) + return Finished_Token_Array + with Post => Is_Sorted (Ambiguities'Result) and + No_Duplicates (Ambiguities'Result); - -- Since this package sets the Nodes, Edges, and Labels of the Graphs - -- to be specific types, it cannot be a child package of Directed_Graphs. - -- Yet, it still is an extension of that package. To make it all work - -- conveniently we re-export all the Cursor functions and procedures here. - -- The Graph functions and procedures can be called with dot extension - -- notation regardless so aren't a concern. - function Has_Element - (Position : in Cursor) - return Boolean - renames Base.Has_Element; + function Isomorphic + (Left, Right : in Parse_Graph) + return Boolean; - function Element - (Position : in Cursor) - return Extended_Node_ID_Type - renames Base.Element; - - function Node_Count - (Container : in Cursor) - return Ada.Containers.Count_Type - renames Base.Node_Count; - - function Node_Count_In_Subgraph - (Position : in Cursor) - return Ada.Containers.Count_Type - renames Base.Node_Count_In_Subgraph; - - function Edge_Count - (Container : in Cursor) - return Ada.Containers.Count_Type - renames Base.Edge_Count; - - function Edge_Count_In_Subgraph - (Position : in Cursor) - return Ada.Containers.Count_Type - renames Base.Edge_Count_In_Subgraph; - - function Nodes - (Container : in Cursor) - return Node_Array - renames Base.Nodes; - - function Nodes_In_Subgraph - (Position : in Cursor) - return Node_Array - renames Base.Nodes_In_Subgraph; - - function Edges - (Container : in Cursor) - return Edge_Array - renames Base.Edges; - - function Edges_In_Subgraph - (Position : in Cursor) - return Edge_Array - renames Base.Edges_In_Subgraph; - - function Unused - (Container : in Cursor) - return Node_ID_Type - renames Base.Unused; - - function Unused - (Container : in Cursor) - return Edge_ID_Type - renames Base.Unused; - - procedure Insert - (Container : in Cursor; - Node : in Node_ID_Type) - renames Base.Insert; - - procedure Insert - (Container : in Cursor; - Node : in Node_ID_Type; - Label : in Node_Label_Type) - renames Base.Insert; - - procedure Insert - (Container : in Cursor; - Edge : in Edge_Type) - renames Base.Insert; - - procedure Insert - (Container : in Cursor; - Edge : in Edge_Type; - Label : in Edge_Label_Type) - renames Base.Insert; - - procedure Delete - (Position : in out Cursor) - renames Base.Delete; - - procedure Delete - (Container : in Cursor; - Edge : in Edge_Type) - renames Base.Delete; - - procedure Append_Label - (Position : in Cursor; - Label : in Node_Label_Type) - renames Base.Append_Label; - - procedure Append_Label - (Container : in Cursor; - Edge : in Edge_Type; - Label : in Edge_Label_Type) - renames Base.Append_Label; - - procedure Replace_Label - (Position : in Cursor; - Label : in Node_Label_Type) - renames Base.Replace_Label; - - procedure Replace_Label - (Container : in Cursor; - Edge : in Edge_Type; - Label : in Edge_Label_Type) - renames Base.Replace_Label; - - procedure Delete_Label - (Position : in Cursor) - renames Base.Delete_Label; - - procedure Delete_Label - (Container : in Cursor; - Edge : in Edge_Type) - renames Base.Delete_Label; - - procedure Delete_Subgraph - (Position : in out Cursor) - renames Base.Delete_Subgraph; - - procedure Swap - (Left, Right : in out Cursor) - renames Base.Swap; - - function Constant_Label_Reference - (Position : in Cursor) - return Node_Label_Constant_Reference - renames Base.Constant_Label_Reference; - - function Label_Reference - (Position : in Cursor) - return Node_Label_Reference - renames Base.Label_Reference; - - function Constant_Label_Reference - (Container : in Cursor; - Edge : in Edge_Type) - return Edge_Label_Constant_Reference - renames Base.Constant_Label_Reference; - - function Label_Reference - (Container : in Cursor; - Edge : in Edge_Type) - return Edge_Label_Reference - renames Base.Label_Reference; - - function Has_Label - (Position : in Cursor) + function Isomorphic_Subgraph + (Left_Graph : in Parse_Graph; + Left_Position : in Finished_Token; + Right_Graph : in Parse_Graph; + Right_Position : in Finished_Token) return Boolean - renames Base.Has_Label; + with Pre => Left_Graph.Contains (Left_Position) and + Right_Graph.Contains (Right_Position); - function Has_Label - (Container : in Cursor; - Edge : in Edge_Type) - return Boolean - renames Base.Has_Label; - - function Label - (Position : in Cursor) - return Node_Label_Type - renames Base.Label; - - function Label - (Container : in Cursor; - Edge : in Edge_Type) - return Edge_Label_Type - renames Base.Label; - - function Neighbors - (Position : in Cursor) - return Node_Array - renames Base.Neighbors; - - function Parents - (Position : in Cursor) - return Node_Array - renames Base.Parents; - - function Children - (Position : in Cursor) - return Node_Array - renames Base.Children; - - function Outbound - (Position : in Cursor) - return Edge_Array - renames Base.Outbound; - - function Inbound - (Position : in Cursor) - return Edge_Array - renames Base.Inbound; - - function Between - (Parent, Child : in Cursor) - return Edge_Array - renames Base.Between; - - function Outdegree - (Position : in Cursor) - return Ada.Containers.Count_Type - renames Base.Outdegree; - - function Indegree - (Position : in Cursor) - return Ada.Containers.Count_Type - renames Base.Indegree; - - function Degree - (Position : in Cursor) - return Ada.Containers.Count_Type - renames Base.Degree; - - function Has_Edge - (Parent, Child : in Cursor) - return Boolean - renames Base.Has_Edge; - function Has_Labeled_Edge - (Parent, Child : in Cursor) - return Boolean - renames Base.Has_Labeled_Edge; +private - function Has_Neighbor - (Left, Right : in Cursor) - return Boolean - renames Base.Has_Neighbor; - - function Find_In_Subgraph - (Position : in Cursor; - Label : in Node_Label_Type) - return Node_Array - renames Base.Find_In_Subgraph; - - function Find_In_Subgraph - (Position : in Cursor; - Label : in Edge_Label_Type) - return Edge_Array - renames Base.Find_In_Subgraph; - - function Contains_In_Subgraph - (Position : in Cursor; - Node : in Extended_Node_ID_Type) - return Boolean - renames Base.Contains_In_Subgraph; - function Contains_In_Subgraph - (Position : in Cursor; - Node : in Extended_Node_ID_Type; - Label : in Node_Label_Type) - return Boolean - renames Base.Contains_In_Subgraph; + type Node_ID_Type is new Positive; + type Edge_ID_Type is new Positive; - function Contains_In_Subgraph - (Position : in Cursor; - Edge_ID : in Edge_ID_Type) - return Boolean - renames Base.Contains_In_Subgraph; + subtype Node_Label_Type is Gen_Tokens.Token; - function Contains_In_Subgraph - (Position : in Cursor; - Edge : in Edge_Type) - return Boolean - renames Base.Contains_In_Subgraph; + subtype Group_ID_Type is Positive; - function Contains_In_Subgraph - (Position : in Cursor; - Edge : in Edge_Type; - Label : in Edge_Label_Type) - return Boolean - renames Base.Contains_In_Subgraph; + type Edge_Label_Type is record + Group_ID : Group_ID_Type; + Group_Finish : Finish_Type; + Subnode_Finish : Finish_Type; + end record; - function Contains_Label_In_Subgraph - (Position : in Cursor; - Label : in Node_Label_Type) - return Boolean - renames Base.Contains_Label_In_Subgraph; - function Contains_Label_In_Subgraph - (Position : in Cursor; - Label : in Edge_Label_Type) - return Boolean - renames Base.Contains_Label_In_Subgraph; - function Next - (Position : in Cursor) - return Cursor - renames Base.Next; - procedure Next - (Position : in out Cursor) - renames Base.Next; + function To_Node + (Container : in Parse_Graph; + Token : in Gen_Tokens.Token) + return Node_ID_Type; + + function To_Node + (Container : in Parse_Graph; + Position : in Finished_Token) + return Node_ID_Type; + + function Locally_Reachable + (Container : in Parse_Graph; + Node : in Node_ID_Type) + return Boolean; + + - function Previous - (Position : in Cursor) - return Cursor - renames Base.Previous; - procedure Previous - (Position : in out Cursor) - renames Base.Previous; + -- This 'use type' is to avoid some ambiguities with "=" functions when + -- instantiating the Base package. + use type Gen_Tokens.Token; - function Follow - (Position : in Cursor; - Edge : in Edge_Type) - return Cursor - renames Base.Follow; + package Base is new Directed_Graphs + (Node_ID_Type => Node_ID_Type, + Edge_ID_Type => Edge_ID_Type, + Node_Label_Type => Node_Label_Type, + Edge_Label_Type => Edge_Label_Type); - function Cursor_To - (Position : in Cursor; - Node : in Node_ID_Type) - return Cursor - renames Base.Cursor_To; + package Finish_Vectors is new Ada.Containers.Vectors + (Index_Type => Positive, + Element_Type => Finish_Type); -private + package Finish_Sort is new Finish_Vectors.Generic_Sorting; + package Node_Label_Maps is new Ada.Containers.Ordered_Maps + (Key_Type => Gen_Tokens.Token, + Element_Type => Node_ID_Type); - type Parse_Graph is new Base.Graph with record - Root_Node : Extended_Node_ID_Type := No_Node; + type Parse_Graph is tagged record + Internal_Graph : Base.Graph := Base.Empty_Graph; + Root_Node : Base.Extended_Node_ID_Type := Base.No_Node; + Root_Finishes : Finish_Vectors.Vector := Finish_Vectors.Empty_Vector; + Label_Map : Node_Label_Maps.Map := Node_Label_Maps.Empty_Map; end record; Empty_Graph : constant Parse_Graph := - (Base.Empty_Graph with Root_Node => No_Node); + (Internal_Graph => Base.Empty_Graph, + Root_Node => Base.No_Node, + Root_Finishes => Finish_Vectors.Empty_Vector, + Label_Map => Node_Label_Maps.Empty_Map); + package Finished_Token_Array_Holders is new Ada.Containers.Indefinite_Holders + (Element_Type => Finished_Token_Array); + + type Token_Group is record + Parent : Finished_Token; + Elems : Finished_Token_Array_Holders.Holder; + end record; + + + + + -- should a lot of these actually be ordered sets instead? package Node_Vectors is new Ada.Containers.Vectors (Index_Type => Positive, Element_Type => Node_ID_Type); - package Cursor_Vectors is new Ada.Containers.Vectors + package Group_ID_Vectors is new Ada.Containers.Vectors (Index_Type => Positive, - Element_Type => Cursor); + Element_Type => Group_ID_Type); - package Finish_Vectors is new Ada.Containers.Vectors + package Finished_Token_Vectors is new Ada.Containers.Vectors (Index_Type => Positive, - Element_Type => Finish_Type); + Element_Type => Finished_Token); + + package Token_Group_Vectors is new Ada.Containers.Vectors + (Index_Type => Positive, + Element_Type => Token_Group); + + package Finished_Token_Sort is new Finished_Token_Vectors.Generic_Sorting; + package Token_Group_Sort is new Token_Group_Vectors.Generic_Sorting; + + package Group_Finished_Token_Maps is new Ada.Containers.Ordered_Maps + (Key_Type => Group_ID_Type, + Element_Type => Finished_Token_Vectors.Vector, + "=" => Finished_Token_Vectors."="); - package Finsort is new Finish_Vectors.Generic_Sorting; + package Finish_Group_Maps is new Ada.Containers.Ordered_Maps + (Key_Type => Finish_Type, + Element_Type => Group_ID_Type); + package Enum_Node_Maps is new Ada.Containers.Ordered_Maps + (Key_Type => Label_Enum, + Element_Type => Node_Vectors.Vector, + "=" => Node_Vectors."="); + + generic + type Base_Type is private; + type Array_Type is array (Positive range <>) of Base_Type; + with function "<" (Left, Right : in Base_Type) return Boolean is <>; + function Sorted + (Input : in Array_Type) + return Boolean; + + generic + type Base_Type is private; + type Array_Type is array (Positive range <>) of Base_Type; + with function "=" (Left, Right : in Base_Type) return Boolean is <>; + function No_Dups + (Input : in Array_Type) + return Boolean; + generic type Base_Type is private; type Array_Type is array (Positive range <>) of Base_Type; diff --git a/src/packrat-tokens.adb b/src/packrat-tokens.adb index e0ea10d..08e0181 100644 --- a/src/packrat-tokens.adb +++ b/src/packrat-tokens.adb @@ -16,6 +16,45 @@ package body Tokens is + function "<" + (Left, Right : in Token) + return Boolean + is + Left_Index, Right_Index : Positive; + begin + if Left.Start_At = Right.Start_At then + if Left.Identifier = Right.Identifier then + Left_Index := Left.Token_Value.Constant_Reference.Element'First; + Right_Index := Right.Token_Value.Constant_Reference.Element'First; + while Left_Index <= Left.Token_Value.Constant_Reference.Element'Last and + Right_Index <= Right.Token_Value.Constant_Reference.Element'Last + loop + if Left.Token_Value.Constant_Reference.Element (Left_Index) < + Right.Token_Value.Constant_Reference.Element (Right_Index) + then + return True; + elsif Left.Token_Value.Constant_Reference.Element (Left_Index) /= + Right.Token_Value.Constant_Reference.Element (Right_Index) + then + return False; + end if; + Left_Index := Left_Index + 1; + Right_Index := Right_Index + 1; + end loop; + return Left.Token_Value.Constant_Reference.Element'Length < + Right.Token_Value.Constant_Reference.Element'Length; + else + return Left.Identifier < Right.Identifier; + end if; + else + return Left.Start_At < Right.Start_At; + end if; + end "<"; + + + + + function Create (Ident : in Label_Enum; Start : in Positive; diff --git a/src/packrat.ads b/src/packrat.ads index 365cce5..6032cb1 100644 --- a/src/packrat.ads +++ b/src/packrat.ads @@ -104,8 +104,9 @@ package Packrat is generic type Label_Enum is (<>); - type Element is private; - type Element_Array is array (Positive range <>) of Element; + type Element_Type is private; + type Element_Array is array (Positive range <>) of Element_Type; + with function "<" (Left, Right : in Element_Type) return Boolean is <>; package Tokens is @@ -113,6 +114,11 @@ package Packrat is type Token_Array is array (Positive range <>) of Token; + function "<" + (Left, Right : in Token) + return Boolean; + + function Create (Ident : in Label_Enum; Start : in Positive; @@ -120,7 +126,7 @@ package Packrat is return Token; - -- Note: The Start index indicate where the token was found + -- Note: The Start index indicates where the token was found -- in whatever array it was lexed from. The Value does *not* -- have to correspond with whatever is found there. -- cgit