diff options
Diffstat (limited to 'src/packrat-parse_graphs.ads')
-rw-r--r-- | src/packrat-parse_graphs.ads | 770 |
1 files changed, 358 insertions, 412 deletions
diff --git a/src/packrat-parse_graphs.ads b/src/packrat-parse_graphs.ads index 5663bcf..aced9d3 100644 --- a/src/packrat-parse_graphs.ads +++ b/src/packrat-parse_graphs.ads @@ -2,12 +2,14 @@ with - Ada.Containers, - Directed_Graphs; + Ada.Containers; private with - Ada.Containers.Vectors; + Ada.Containers.Indefinite_Holders, + Ada.Containers.Vectors, + Ada.Containers.Ordered_Maps, + Directed_Graphs; generic @@ -16,539 +18,483 @@ generic type Element_Type is private; type Element_Array is array (Positive range <>) of Element_Type; + with function "<" (Left, Right : in Element_Type) return Boolean is <>; + with package Gen_Tokens is new Tokens (Label_Enum, Element_Type, Element_Array); package Packrat.Parse_Graphs is - type Node_ID_Type is new Positive; - type Edge_ID_Type is new Positive; + -- get rid of Parse_Cursor and replace with Finished_Tokens, + -- rewrite Groups to also use Finished_Tokens - subtype Node_Label_Type is Gen_Tokens.Token; + -- this will allow simplification regarding checks for same graphs, etc - subtype Finish_Type is Positive; - subtype Order_Type is Positive; - type Edge_Label_Type is record + -- there should be pruning functions for Token, Finished_Token, Token_Group, + -- and for removing all bits unreachable from the root + + -- by inserting and building the parse graph from the bottom up, + -- no pruning needs to be done while parsing until the very end, + -- as even if a bit of parsing fails then the nodes will either be used later + -- or stay unreachable, and thus a single removal of all unreachable + -- nodes at the end will suffice + + -- Token_Groups should include a record of their parent node to make other things easier + + + type Parse_Graph is tagged private; + + function "=" + (Left, Right : in Parse_Graph) + return Boolean; + + Empty_Graph : constant Parse_Graph; + + subtype Finish_Type is Natural; + type Finish_Array is array (Positive range <>) of Finish_Type; + + type Finished_Token is record + Token : Gen_Tokens.Token; Finish : Finish_Type; - Order : Order_Type; end record; + type Finished_Token_Array is array (Positive range <>) of Finished_Token; + function "<" - (Left, Right : in Edge_Label_Type) + (Left, Right : in Finished_Token) return Boolean; - type Finish_Array is array (Positive range <>) of Finish_Type; + function "<" + (Left, Right : in Finished_Token_Array) + return Boolean; + type Token_Group is private with Type_Invariant => + Ada.Containers.">" (Length (Token_Group), 0); + type Token_Group_Array is array (Positive range <>) of Token_Group; + function "<" + (Left, Right : in Token_Group) + return Boolean; - -- This is to avoid some... ambiguities... with "=" - -- functions when instantiating the Base package. - use type Gen_Tokens.Token; - package Base is new Directed_Graphs - (Node_ID_Type => Node_ID_Type, - Edge_ID_Type => Edge_ID_Type, - Node_Label_Type => Node_Label_Type, - Edge_Label_Type => Edge_Label_Type); - subtype Extended_Node_ID_Type is Base.Extended_Node_ID_Type; - subtype Node_Array is Base.Node_Array; - subtype Path is Base.Path; + procedure Assign + (Target : in out Parse_Graph; + Source : in Parse_Graph); - subtype Edge_Type is Base.Edge_Type; - subtype Edge_Array is Base.Edge_Array; + function Copy + (Source : in Parse_Graph) + return Parse_Graph; - function "<" - (Left, Right : in Edge_Type) - return Boolean - renames Base."<"; + procedure Move + (Target, Source : in out Parse_Graph); - subtype Node_Label_Constant_Reference is Base.Node_Label_Constant_Reference; - subtype Node_Label_Reference is Base.Node_Label_Reference; - subtype Edge_Label_Constant_Reference is Base.Edge_Label_Constant_Reference; - subtype Edge_Label_Reference is Base.Edge_Label_Reference; + function Is_Empty + (Container : in Parse_Graph) + return Boolean; + procedure Clear + (Container : in out Parse_Graph); - type Parse_Graph is new Base.Graph with private; - subtype Cursor is Base.Cursor; - function "=" - (Left, Right : in Cursor) - return Boolean - renames Base."="; + function Debug_String + (Container : in Parse_Graph) + return String; - function "=" - (Left, Right : in Parse_Graph) + + + + function Contains + (Container : in Parse_Graph; + Token : in Gen_Tokens.Token) return Boolean; + function Contains + (Container : in Parse_Graph; + Position : in Finished_Token) + return Boolean; + function Contains + (Container : in Parse_Graph; + Grouping : in Token_Group) + return Boolean; + function Reachable + (Container : in Parse_Graph; + Position : in Finished_Token) + return Boolean + with Pre => Container.Has_Root; - No_Node : constant Extended_Node_ID_Type := Base.No_Node; - No_Element : constant Cursor := Base.No_Element; - Empty_Graph : constant Parse_Graph; + function All_Reachable + (Container : in Parse_Graph) + return Boolean + with Pre => Container.Has_Root; + function Valid_Starts_Finishes + (Parent : in Finished_Token; + Subtokens : in Finished_Token_Array) + return Boolean + with Pre => Subtokens'Length > 0; + function No_Loops_Introduced + (Container : in Parse_Graph; + Parent : in Finished_Token; + Subtokens : in Finished_Token_Array) + return Boolean + with Pre => Subtokens'Length > 0 and + Valid_Starts_Finishes (Parent, Subtokens); + function Is_Sorted + (Finishes : in Finish_Array) + return Boolean; - function To_Graph - (Nodes : in Node_Array; - Edges : in Edge_Array) - return Parse_Graph; + function Is_Sorted + (Positions : in Finished_Token_Array) + return Boolean; - function To_Graph - (Nodes : in Node_Array; - Edges : in Edge_Array; - Root : in Extended_Node_ID_Type) - return Parse_Graph; + function Is_Sorted + (Groupings : in Token_Group_Array) + return Boolean; + function No_Duplicates + (Finishes : in Finish_Array) + return Boolean; + function No_Duplicates + (Positions : in Finished_Token_Array) + return Boolean; + function No_Duplicates + (Groupings : in Token_Group_Array) + return Boolean; - procedure Assign - (Target : in out Parse_Graph; - Source : in Parse_Graph); - function Copy - (Source : in Parse_Graph) - return Parse_Graph; - procedure Move - (Target, Source : in out Parse_Graph); + + procedure Include + (Container : in out Parse_Graph; + Token : in Gen_Tokens.Token) + with Post => Container.Contains (Token); + + procedure Connect + (Container : in out Parse_Graph; + Parent : in Finished_Token; + Subtokens : in Finished_Token_Array) + with Pre => Subtokens'Length > 0 and + Valid_Starts_Finishes (Parent, Subtokens) and + Container.No_Loops_Introduced (Parent, Subtokens); + + procedure Prune + (Container : in out Parse_Graph; + Token : in Gen_Tokens.Token) + with Post => not Container.Contains (Token); + + procedure Prune + (Container : in out Parse_Graph; + Position : in Finished_Token) + with Post => not Container.Contains (Position); + + procedure Prune + (Container : in out Parse_Graph; + Grouping : in Token_Group) + with Post => not Container.Contains (Grouping); + + procedure Delete_Unreachable + (Container : in out Parse_Graph) + with Pre => Container.Has_Root, + Post => Container.All_Reachable; - -- Usually you would get a Parse_Graph by way of parsing something - -- with the parser, which would then have a root node already set, - -- being the only node in the graph that has no parents. - function Root + function Has_Root (Container : in Parse_Graph) - return Cursor; + return Boolean; - -- Not really advisable under most circumstances unless you're - -- making a Parse_Graph manually for some reason. procedure Set_Root (Container : in out Parse_Graph; - Node : in Extended_Node_ID_Type); + Token : in Gen_Tokens.Token; + Finishes : in Finish_Array) + with Pre => Container.Contains (Token) and + (for all F of Finishes => F >= Gen_Tokens.Start (Token) - 1), + Post => Container.Has_Root; + procedure Clear_Root + (Container : in out Parse_Graph) + with Post => not Container.Has_Root; + function Root_Token + (Container : in Parse_Graph) + return Gen_Tokens.Token + with Pre => Container.Has_Root; + function Root_Finish_List + (Container : in Parse_Graph) + return Finish_Array + with Pre => Container.Has_Root, + Post => Is_Sorted (Root_Finish_List'Result) and + No_Duplicates (Root_Finish_List'Result); - -- Returns an array of the all possible finish positions resulting - -- from current node's parsing, sorted from shortest to longest. - function Finish_List + function Root_Element (Container : in Parse_Graph; - Node : in Node_ID_Type) - return Finish_Array; + Finish_At : in Finish_Type) + return Finished_Token + with Pre => Container.Has_Root and then + (for some F of Container.Root_Finish_List => F = Finish_At); + + + function Finish_List - (Position : in Cursor) - return Finish_Array; + (Container : in Parse_Graph; + Token : in Gen_Tokens.Token) + return Finish_Array + with Pre => Container.Contains (Token), + Post => Is_Sorted (Finish_List'Result) and + No_Duplicates (Finish_List'Result); - -- Returns an array of children of a node made by a parse that ended - -- at a specified finish position, sorted according to parsing order. - function Sub_Nodes + function Is_Leaf (Container : in Parse_Graph; - Node : in Node_ID_Type; - Finish_At : in Finish_Type) - return Node_Array; + Position : in Finished_Token) + return Boolean + with Pre => Container.Contains (Position); - function Sub_Nodes - (Position : in Cursor; - Finish_At : in Finish_Type) - return Node_Array; + function Is_Branch + (Container : in Parse_Graph; + Position : in Finished_Token) + return Boolean + with Pre => Container.Contains (Position); - -- Deletes a node from the graph then removes any other nodes that - -- were made unreachable from the root by that deletion. - procedure Prune - (Container : in out Parse_Graph; - Node : in Node_ID_Type); + function Subgroups + (Container : in Parse_Graph; + Position : in Finished_Token) + return Token_Group_Array + with Pre => Container.Contains (Position), + Post => Is_Sorted (Subgroups'Result) and + No_Duplicates (Subgroups'Result) and + (for all G of Subgroups'Result => Finish (G) = Position.Finish); - procedure Prune - (Position : in out Cursor); + function First_Index + (Grouping : in Token_Group) + return Positive; + + function Last_Index + (Grouping : in Token_Group) + return Positive; + + function Length + (Grouping : in Token_Group) + return Ada.Containers.Count_Type; + + function Element + (Grouping : in Token_Group; + Index : in Positive) + return Finished_Token + with Pre => Index in First_Index (Grouping) .. Last_Index (Grouping); + + function Elements + (Grouping : in Token_Group) + return Finished_Token_Array + with Post => Is_Sorted (Elements'Result) and + Valid_Starts_Finishes (Parent (Grouping), Elements'Result); + function Parent + (Grouping : in Token_Group) + return Finished_Token; + function Finish + (Grouping : in Token_Group) + return Finish_Type; - -- Tests whether there are multiple potential finish points for any - -- of the nodes in the graph, and hence whether the parse was ambiguous. + + + -- An ambiguous graph means that either some node exists with multiple groups + -- attached to it with the same Group_Finish value, or the root node has multiple + -- groups of any Group_Finish value attached to it. + + function Is_Root_Ambiguous + (Container : in Parse_Graph) + return Boolean + with Pre => Container.Has_Root; + function Is_Ambiguous (Container : in Parse_Graph) return Boolean; + function Ambiguities + (Container : in Parse_Graph; + Ambiguous_Root : out Boolean) + return Finished_Token_Array + with Post => Is_Sorted (Ambiguities'Result) and + No_Duplicates (Ambiguities'Result); - -- Since this package sets the Nodes, Edges, and Labels of the Graphs - -- to be specific types, it cannot be a child package of Directed_Graphs. - -- Yet, it still is an extension of that package. To make it all work - -- conveniently we re-export all the Cursor functions and procedures here. - -- The Graph functions and procedures can be called with dot extension - -- notation regardless so aren't a concern. - function Has_Element - (Position : in Cursor) - return Boolean - renames Base.Has_Element; + function Isomorphic + (Left, Right : in Parse_Graph) + return Boolean; - function Element - (Position : in Cursor) - return Extended_Node_ID_Type - renames Base.Element; - - function Node_Count - (Container : in Cursor) - return Ada.Containers.Count_Type - renames Base.Node_Count; - - function Node_Count_In_Subgraph - (Position : in Cursor) - return Ada.Containers.Count_Type - renames Base.Node_Count_In_Subgraph; - - function Edge_Count - (Container : in Cursor) - return Ada.Containers.Count_Type - renames Base.Edge_Count; - - function Edge_Count_In_Subgraph - (Position : in Cursor) - return Ada.Containers.Count_Type - renames Base.Edge_Count_In_Subgraph; - - function Nodes - (Container : in Cursor) - return Node_Array - renames Base.Nodes; - - function Nodes_In_Subgraph - (Position : in Cursor) - return Node_Array - renames Base.Nodes_In_Subgraph; - - function Edges - (Container : in Cursor) - return Edge_Array - renames Base.Edges; - - function Edges_In_Subgraph - (Position : in Cursor) - return Edge_Array - renames Base.Edges_In_Subgraph; - - function Unused - (Container : in Cursor) - return Node_ID_Type - renames Base.Unused; - - function Unused - (Container : in Cursor) - return Edge_ID_Type - renames Base.Unused; - - procedure Insert - (Container : in Cursor; - Node : in Node_ID_Type) - renames Base.Insert; - - procedure Insert - (Container : in Cursor; - Node : in Node_ID_Type; - Label : in Node_Label_Type) - renames Base.Insert; - - procedure Insert - (Container : in Cursor; - Edge : in Edge_Type) - renames Base.Insert; - - procedure Insert - (Container : in Cursor; - Edge : in Edge_Type; - Label : in Edge_Label_Type) - renames Base.Insert; - - procedure Delete - (Position : in out Cursor) - renames Base.Delete; - - procedure Delete - (Container : in Cursor; - Edge : in Edge_Type) - renames Base.Delete; - - procedure Append_Label - (Position : in Cursor; - Label : in Node_Label_Type) - renames Base.Append_Label; - - procedure Append_Label - (Container : in Cursor; - Edge : in Edge_Type; - Label : in Edge_Label_Type) - renames Base.Append_Label; - - procedure Replace_Label - (Position : in Cursor; - Label : in Node_Label_Type) - renames Base.Replace_Label; - - procedure Replace_Label - (Container : in Cursor; - Edge : in Edge_Type; - Label : in Edge_Label_Type) - renames Base.Replace_Label; - - procedure Delete_Label - (Position : in Cursor) - renames Base.Delete_Label; - - procedure Delete_Label - (Container : in Cursor; - Edge : in Edge_Type) - renames Base.Delete_Label; - - procedure Delete_Subgraph - (Position : in out Cursor) - renames Base.Delete_Subgraph; - - procedure Swap - (Left, Right : in out Cursor) - renames Base.Swap; - - function Constant_Label_Reference - (Position : in Cursor) - return Node_Label_Constant_Reference - renames Base.Constant_Label_Reference; - - function Label_Reference - (Position : in Cursor) - return Node_Label_Reference - renames Base.Label_Reference; - - function Constant_Label_Reference - (Container : in Cursor; - Edge : in Edge_Type) - return Edge_Label_Constant_Reference - renames Base.Constant_Label_Reference; - - function Label_Reference - (Container : in Cursor; - Edge : in Edge_Type) - return Edge_Label_Reference - renames Base.Label_Reference; - - function Has_Label - (Position : in Cursor) + function Isomorphic_Subgraph + (Left_Graph : in Parse_Graph; + Left_Position : in Finished_Token; + Right_Graph : in Parse_Graph; + Right_Position : in Finished_Token) return Boolean - renames Base.Has_Label; + with Pre => Left_Graph.Contains (Left_Position) and + Right_Graph.Contains (Right_Position); - function Has_Label - (Container : in Cursor; - Edge : in Edge_Type) - return Boolean - renames Base.Has_Label; - - function Label - (Position : in Cursor) - return Node_Label_Type - renames Base.Label; - - function Label - (Container : in Cursor; - Edge : in Edge_Type) - return Edge_Label_Type - renames Base.Label; - - function Neighbors - (Position : in Cursor) - return Node_Array - renames Base.Neighbors; - - function Parents - (Position : in Cursor) - return Node_Array - renames Base.Parents; - - function Children - (Position : in Cursor) - return Node_Array - renames Base.Children; - - function Outbound - (Position : in Cursor) - return Edge_Array - renames Base.Outbound; - - function Inbound - (Position : in Cursor) - return Edge_Array - renames Base.Inbound; - - function Between - (Parent, Child : in Cursor) - return Edge_Array - renames Base.Between; - - function Outdegree - (Position : in Cursor) - return Ada.Containers.Count_Type - renames Base.Outdegree; - - function Indegree - (Position : in Cursor) - return Ada.Containers.Count_Type - renames Base.Indegree; - - function Degree - (Position : in Cursor) - return Ada.Containers.Count_Type - renames Base.Degree; - - function Has_Edge - (Parent, Child : in Cursor) - return Boolean - renames Base.Has_Edge; - function Has_Labeled_Edge - (Parent, Child : in Cursor) - return Boolean - renames Base.Has_Labeled_Edge; +private - function Has_Neighbor - (Left, Right : in Cursor) - return Boolean - renames Base.Has_Neighbor; - - function Find_In_Subgraph - (Position : in Cursor; - Label : in Node_Label_Type) - return Node_Array - renames Base.Find_In_Subgraph; - - function Find_In_Subgraph - (Position : in Cursor; - Label : in Edge_Label_Type) - return Edge_Array - renames Base.Find_In_Subgraph; - - function Contains_In_Subgraph - (Position : in Cursor; - Node : in Extended_Node_ID_Type) - return Boolean - renames Base.Contains_In_Subgraph; - function Contains_In_Subgraph - (Position : in Cursor; - Node : in Extended_Node_ID_Type; - Label : in Node_Label_Type) - return Boolean - renames Base.Contains_In_Subgraph; + type Node_ID_Type is new Positive; + type Edge_ID_Type is new Positive; - function Contains_In_Subgraph - (Position : in Cursor; - Edge_ID : in Edge_ID_Type) - return Boolean - renames Base.Contains_In_Subgraph; + subtype Node_Label_Type is Gen_Tokens.Token; - function Contains_In_Subgraph - (Position : in Cursor; - Edge : in Edge_Type) - return Boolean - renames Base.Contains_In_Subgraph; + subtype Group_ID_Type is Positive; - function Contains_In_Subgraph - (Position : in Cursor; - Edge : in Edge_Type; - Label : in Edge_Label_Type) - return Boolean - renames Base.Contains_In_Subgraph; + type Edge_Label_Type is record + Group_ID : Group_ID_Type; + Group_Finish : Finish_Type; + Subnode_Finish : Finish_Type; + end record; - function Contains_Label_In_Subgraph - (Position : in Cursor; - Label : in Node_Label_Type) - return Boolean - renames Base.Contains_Label_In_Subgraph; - function Contains_Label_In_Subgraph - (Position : in Cursor; - Label : in Edge_Label_Type) - return Boolean - renames Base.Contains_Label_In_Subgraph; - function Next - (Position : in Cursor) - return Cursor - renames Base.Next; - procedure Next - (Position : in out Cursor) - renames Base.Next; + function To_Node + (Container : in Parse_Graph; + Token : in Gen_Tokens.Token) + return Node_ID_Type; + + function To_Node + (Container : in Parse_Graph; + Position : in Finished_Token) + return Node_ID_Type; + + function Locally_Reachable + (Container : in Parse_Graph; + Node : in Node_ID_Type) + return Boolean; + + - function Previous - (Position : in Cursor) - return Cursor - renames Base.Previous; - procedure Previous - (Position : in out Cursor) - renames Base.Previous; + -- This 'use type' is to avoid some ambiguities with "=" functions when + -- instantiating the Base package. + use type Gen_Tokens.Token; - function Follow - (Position : in Cursor; - Edge : in Edge_Type) - return Cursor - renames Base.Follow; + package Base is new Directed_Graphs + (Node_ID_Type => Node_ID_Type, + Edge_ID_Type => Edge_ID_Type, + Node_Label_Type => Node_Label_Type, + Edge_Label_Type => Edge_Label_Type); - function Cursor_To - (Position : in Cursor; - Node : in Node_ID_Type) - return Cursor - renames Base.Cursor_To; + package Finish_Vectors is new Ada.Containers.Vectors + (Index_Type => Positive, + Element_Type => Finish_Type); -private + package Finish_Sort is new Finish_Vectors.Generic_Sorting; + package Node_Label_Maps is new Ada.Containers.Ordered_Maps + (Key_Type => Gen_Tokens.Token, + Element_Type => Node_ID_Type); - type Parse_Graph is new Base.Graph with record - Root_Node : Extended_Node_ID_Type := No_Node; + type Parse_Graph is tagged record + Internal_Graph : Base.Graph := Base.Empty_Graph; + Root_Node : Base.Extended_Node_ID_Type := Base.No_Node; + Root_Finishes : Finish_Vectors.Vector := Finish_Vectors.Empty_Vector; + Label_Map : Node_Label_Maps.Map := Node_Label_Maps.Empty_Map; end record; Empty_Graph : constant Parse_Graph := - (Base.Empty_Graph with Root_Node => No_Node); + (Internal_Graph => Base.Empty_Graph, + Root_Node => Base.No_Node, + Root_Finishes => Finish_Vectors.Empty_Vector, + Label_Map => Node_Label_Maps.Empty_Map); + package Finished_Token_Array_Holders is new Ada.Containers.Indefinite_Holders + (Element_Type => Finished_Token_Array); + + type Token_Group is record + Parent : Finished_Token; + Elems : Finished_Token_Array_Holders.Holder; + end record; + + + + + -- should a lot of these actually be ordered sets instead? package Node_Vectors is new Ada.Containers.Vectors (Index_Type => Positive, Element_Type => Node_ID_Type); - package Cursor_Vectors is new Ada.Containers.Vectors + package Group_ID_Vectors is new Ada.Containers.Vectors (Index_Type => Positive, - Element_Type => Cursor); + Element_Type => Group_ID_Type); - package Finish_Vectors is new Ada.Containers.Vectors + package Finished_Token_Vectors is new Ada.Containers.Vectors (Index_Type => Positive, - Element_Type => Finish_Type); + Element_Type => Finished_Token); + + package Token_Group_Vectors is new Ada.Containers.Vectors + (Index_Type => Positive, + Element_Type => Token_Group); + + package Finished_Token_Sort is new Finished_Token_Vectors.Generic_Sorting; + package Token_Group_Sort is new Token_Group_Vectors.Generic_Sorting; + + package Group_Finished_Token_Maps is new Ada.Containers.Ordered_Maps + (Key_Type => Group_ID_Type, + Element_Type => Finished_Token_Vectors.Vector, + "=" => Finished_Token_Vectors."="); - package Finsort is new Finish_Vectors.Generic_Sorting; + package Finish_Group_Maps is new Ada.Containers.Ordered_Maps + (Key_Type => Finish_Type, + Element_Type => Group_ID_Type); + package Enum_Node_Maps is new Ada.Containers.Ordered_Maps + (Key_Type => Label_Enum, + Element_Type => Node_Vectors.Vector, + "=" => Node_Vectors."="); + + generic + type Base_Type is private; + type Array_Type is array (Positive range <>) of Base_Type; + with function "<" (Left, Right : in Base_Type) return Boolean is <>; + function Sorted + (Input : in Array_Type) + return Boolean; + + generic + type Base_Type is private; + type Array_Type is array (Positive range <>) of Base_Type; + with function "=" (Left, Right : in Base_Type) return Boolean is <>; + function No_Dups + (Input : in Array_Type) + return Boolean; + generic type Base_Type is private; type Array_Type is array (Positive range <>) of Base_Type; |