summaryrefslogtreecommitdiff
path: root/src/packrat-parse_graphs.ads
blob: 4d2d95663b33e4a8f34f0b3d74f28bbfe38c2794 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532


with

    Ada.Containers,
    Packrat.Traits;

private with

    Ada.Containers.Vectors,
    Ada.Containers.Ordered_Maps,
    Directed_Graphs;


generic

    with package Traits is new Packrat.Traits (<>);

package Packrat.Parse_Graphs is


    --  get rid of Parse_Cursor and replace with Finished_Tokens,
    --  rewrite Groups to also use Finished_Tokens

    --  this will allow simplification regarding checks for same graphs, etc

    --  there should be pruning functions for Token, Finished_Token, Token_Group,
    --  and for removing all bits unreachable from the root

    --  by inserting and building the parse graph from the bottom up,
    --  no pruning needs to be done while parsing until the very end,
    --  as even if a bit of parsing fails then the nodes will either be used later
    --  or stay unreachable, and thus a single removal of all unreachable
    --  nodes at the end will suffice

    --  Token_Groups should include a record of their parent node to make other things easier


    type Parse_Graph is tagged private;

    function "="
           (Left, Right : in Parse_Graph)
        return Boolean;

    Empty_Graph : constant Parse_Graph;

    subtype Finish_Type is Natural;
    type Finish_Array is array (Positive range <>) of Finish_Type;

    type Finished_Token is record
        Token  : Traits.Tokens.Token;
        Finish : Finish_Type;
    end record;

    type Finished_Token_Array is array (Positive range <>) of Finished_Token;

    function "<"
           (Left, Right : in Finished_Token)
        return Boolean;

    function "<"
           (Left, Right : in Finished_Token_Array)
        return Boolean;

    use type Ada.Containers.Count_Type;
    type Token_Group is private with Type_Invariant => Length (Token_Group) > 0;
    type Token_Group_Array is array (Positive range <>) of Token_Group;

    function "<"
           (Left, Right : in Token_Group)
        return Boolean;




    procedure Assign
           (Target : in out Parse_Graph;
            Source : in     Parse_Graph);

    function Copy
           (Source : in Parse_Graph)
        return Parse_Graph;

    procedure Move
           (Target, Source : in out Parse_Graph);




    function Is_Empty
           (Container : in Parse_Graph)
        return Boolean;

    procedure Clear
           (Container : in out Parse_Graph);




    function Debug_String
           (Container : in Parse_Graph)
        return String;




    function Contains
           (Container : in Parse_Graph;
            Token     : in Traits.Tokens.Token)
        return Boolean;

    function Contains
           (Container : in Parse_Graph;
            Position  : in Finished_Token)
        return Boolean;

    function Contains
           (Container : in Parse_Graph;
            Grouping  : in Token_Group)
        return Boolean;

    function Reachable
           (Container : in Parse_Graph;
            Position  : in Finished_Token)
        return Boolean
    with Pre => Container.Has_Root;

    function All_Reachable
           (Container : in Parse_Graph)
        return Boolean
    with Pre => Container.Has_Root;

    function Valid_Token
           (Fin_Token : in Finished_Token)
        return Boolean;

    function Valid_Starts_Finishes
           (Parent    : in Finished_Token;
            Subtokens : in Finished_Token_Array)
        return Boolean
    with Pre => Subtokens'Length > 0;

    function Loops_Introduced
           (Container : in Parse_Graph;
            Parent    : in Finished_Token;
            Subtokens : in Finished_Token_Array)
        return Boolean
    with Pre => Subtokens'Length > 0 and
            Valid_Starts_Finishes (Parent, Subtokens);

    function Is_Sorted
           (Finishes : in Finish_Array)
        return Boolean;

    function Is_Sorted
           (Positions : in Finished_Token_Array)
        return Boolean;

    function Is_Sorted
           (Groupings : in Token_Group_Array)
        return Boolean;

    function No_Duplicates
           (Finishes : in Finish_Array)
        return Boolean;

    function No_Duplicates
           (Positions : in Finished_Token_Array)
        return Boolean;

    function No_Duplicates
           (Groupings : in Token_Group_Array)
        return Boolean;




    procedure Include
           (Container : in out Parse_Graph;
            Token     : in     Traits.Tokens.Token)
    with Post => Container.Contains (Token);

    procedure Connect
           (Container : in out Parse_Graph;
            Parent    : in     Finished_Token;
            Subtokens : in     Finished_Token_Array)
    with Pre => Subtokens'Length > 0 and
            Valid_Starts_Finishes (Parent, Subtokens) and
            not Container.Loops_Introduced (Parent, Subtokens);

    procedure Prune
           (Container : in out Parse_Graph;
            Token     : in     Traits.Tokens.Token)
    with Post => not Container.Contains (Token);

    procedure Prune
           (Container : in out Parse_Graph;
            Position  : in     Finished_Token)
    with Post => not Container.Contains (Position);

    procedure Prune
           (Container : in out Parse_Graph;
            Grouping  : in     Token_Group)
    with Post => not Container.Contains (Grouping);

    procedure Delete_Unreachable
           (Container : in out Parse_Graph)
    with Pre => Container.Has_Root,
        Post => Container.All_Reachable;




    function Has_Root
           (Container : in Parse_Graph)
        return Boolean;

    procedure Set_Root
           (Container : in out Parse_Graph;
            Tokens    : in     Finished_Token_Array)
    with Pre => (for all F of Tokens => Container.Contains (F.Token)),
        Post => Container.Has_Root;

    procedure Clear_Root
           (Container : in out Parse_Graph)
    with Post => not Container.Has_Root;

    function Root_Elements
           (Container : in Parse_Graph)
        return Finished_Token_Array
    with Pre => Container.Has_Root;




    function Finish_List
           (Container : in Parse_Graph;
            Token     : in Traits.Tokens.Token)
        return Finish_Array
    with Pre => Container.Contains (Token),
        Post => Is_Sorted (Finish_List'Result) and
            No_Duplicates (Finish_List'Result);

    function Is_Leaf
           (Container : in Parse_Graph;
            Position  : in Finished_Token)
        return Boolean
    with Pre => Container.Contains (Position);

    function Is_Branch
           (Container : in Parse_Graph;
            Position  : in Finished_Token)
        return Boolean
    with Pre => Container.Contains (Position);




    function Subgroups
           (Container : in Parse_Graph;
            Position  : in Finished_Token)
        return Token_Group_Array
    with Pre => Container.Contains (Position),
        Post => Is_Sorted (Subgroups'Result) and
            No_Duplicates (Subgroups'Result) and
            (for all G of Subgroups'Result => Finish (G) = Position.Finish);

    function First_Index
           (Grouping : in Token_Group)
        return Positive;

    function Last_Index
           (Grouping : in Token_Group)
        return Positive;

    function Length
           (Grouping : in Token_Group)
        return Ada.Containers.Count_Type;

    function Element
           (Grouping : in Token_Group;
            Index    : in Positive)
        return Finished_Token
    with Pre => Index in First_Index (Grouping) .. Last_Index (Grouping);

    function Elements
           (Grouping : in Token_Group)
        return Finished_Token_Array
    with Post => Is_Sorted (Elements'Result) and
            Valid_Starts_Finishes (Parent (Grouping), Elements'Result);

    function Parent
           (Grouping : in Token_Group)
        return Finished_Token;

    function Finish
           (Grouping : in Token_Group)
        return Finish_Type;




    --  An ambiguous graph means that either some node exists with multiple groups
    --  attached to it with the same Group_Finish value, or the root node has multiple
    --  groups of any Group_Finish value attached to it.

    function Is_Root_Ambiguous
           (Container : in Parse_Graph)
        return Boolean
    with Pre => Container.Has_Root;

    function Is_Ambiguous
           (Container : in Parse_Graph)
        return Boolean;

    function Ambiguities
           (Container      : in     Parse_Graph;
            Ambiguous_Root :    out Boolean)
        return Finished_Token_Array
    with Post => Is_Sorted (Ambiguities'Result) and
            No_Duplicates (Ambiguities'Result);




    function Isomorphic
           (Left, Right : in Parse_Graph)
        return Boolean
    with Pre => Left.Has_Root and Right.Has_Root;

    function Isomorphic_Subgraph
           (Left_Graph     : in Parse_Graph;
            Left_Position  : in Finished_Token;
            Right_Graph    : in Parse_Graph;
            Right_Position : in Finished_Token)
        return Boolean
    with Pre => Left_Graph.Contains (Left_Position) and
            Right_Graph.Contains (Right_Position);


private


    use type Traits.Label_Enum;
    use type Traits.Element_Type;
    use type Traits.Element_Array;




    type Node_ID_Type is new Positive;
    type Edge_ID_Type is new Positive;

    subtype Node_Label_Type is Traits.Tokens.Token;

    subtype Group_ID_Type is Positive;

    type Edge_Label_Type is record
        Group_ID       : Group_ID_Type;
        Group_Finish   : Finish_Type;
        Subnode_Finish : Finish_Type;
    end record;




    function To_Node
           (Container : in Parse_Graph;
            Token     : in Traits.Tokens.Token)
        return Node_ID_Type;

    function To_Node
           (Container : in Parse_Graph;
            Position  : in Finished_Token)
        return Node_ID_Type;

    function Locally_Reachable
           (Container : in Parse_Graph;
            Node      : in Node_ID_Type)
        return Boolean;




    --  This 'use type' is to avoid some ambiguities with "=" functions when
    --  instantiating the Base package.
    use type Traits.Tokens.Token;

    package Base is new Directed_Graphs
       (Node_ID_Type    => Node_ID_Type,
        Edge_ID_Type    => Edge_ID_Type,
        Node_Label_Type => Node_Label_Type,
        Edge_Label_Type => Edge_Label_Type);





    package Finished_Token_Vectors is new Ada.Containers.Vectors
       (Index_Type   => Positive,
        Element_Type => Finished_Token);

    function "<"
           (Left, Right : in Finished_Token_Vectors.Vector)
        return Boolean;

    type Token_Group is record
        Parent : Finished_Token;
        Elems  : Finished_Token_Vectors.Vector;
    end record;





    package Finish_Vectors is new Ada.Containers.Vectors
       (Index_Type   => Positive,
        Element_Type => Finish_Type);

    package Finish_Sort is new Finish_Vectors.Generic_Sorting;

    package Node_Label_Maps is new Ada.Containers.Ordered_Maps
       (Key_Type     => Traits.Tokens.Token,
        Element_Type => Node_ID_Type);

    type Parse_Graph is tagged record
        Internal_Graph : Base.Graph                    := Base.Empty_Graph;
        Root_Elems     : Finished_Token_Vectors.Vector := Finished_Token_Vectors.Empty_Vector;
        Label_Map      : Node_Label_Maps.Map           := Node_Label_Maps.Empty_Map;
    end record;




    --  should a lot of these actually be ordered sets instead?
    package Node_Vectors is new Ada.Containers.Vectors
       (Index_Type   => Positive,
        Element_Type => Node_ID_Type);

    package Group_ID_Vectors is new Ada.Containers.Vectors
       (Index_Type   => Positive,
        Element_Type => Group_ID_Type);

    package Token_Group_Vectors is new Ada.Containers.Vectors
       (Index_Type   => Positive,
        Element_Type => Token_Group);

    package Finished_Token_Sort is new Finished_Token_Vectors.Generic_Sorting;
    package Token_Group_Sort is new Token_Group_Vectors.Generic_Sorting;

    package Group_Finished_Token_Maps is new Ada.Containers.Ordered_Maps
       (Key_Type     => Group_ID_Type,
        Element_Type => Finished_Token_Vectors.Vector,
        "="          => Finished_Token_Vectors."=");

    package Finish_Group_Maps is new Ada.Containers.Ordered_Maps
       (Key_Type     => Finish_Type,
        Element_Type => Group_ID_Type);

    package Enum_Node_Maps is new Ada.Containers.Ordered_Maps
       (Key_Type     => Traits.Label_Enum,
        Element_Type => Node_Vectors.Vector,
        "="          => Node_Vectors."=");





    package Isomorph_Maps is new Ada.Containers.Ordered_Maps
       (Key_Type     => Finished_Token,
        Element_Type => Finished_Token_Vectors.Vector,
        "="          => Finished_Token_Vectors."=");

    function Group_Isomorph
           (Left_Graph        : in     Parse_Graph;
            Left_Token_Group  : in     Token_Group;
            Right_Graph       : in     Parse_Graph;
            Right_Token_Group : in     Token_Group;
            Offset            : in     Integer;
            Mapping           : in out Isomorph_Maps.Map)
        return Boolean;

    function Token_Isomorph
           (Left_Graph     : in     Parse_Graph;
            Left_Position  : in     Finished_Token;
            Right_Graph    : in     Parse_Graph;
            Right_Position : in     Finished_Token;
            Offset         : in     Integer;
            Mapping        : in out Isomorph_Maps.Map)
        return Boolean;




    Empty_Graph : constant Parse_Graph :=
       (Internal_Graph => Base.Empty_Graph,
        Root_Elems     => Finished_Token_Vectors.Empty_Vector,
        Label_Map      => Node_Label_Maps.Empty_Map);




    generic
        type Base_Type is private;
        type Array_Type is array (Positive range <>) of Base_Type;
        with function "<" (Left, Right : in Base_Type) return Boolean is <>;
    function Sorted
           (Input : in Array_Type)
        return Boolean;

    generic
        type Base_Type is private;
        type Array_Type is array (Positive range <>) of Base_Type;
        with function "=" (Left, Right : in Base_Type) return Boolean is <>;
    function No_Dups
           (Input : in Array_Type)
        return Boolean;

    generic
        type Base_Type is private;
        type Array_Type is array (Positive range <>) of Base_Type;
        with package Type_Vectors is new Ada.Containers.Vectors
           (Index_Type   => Positive,
            Element_Type => Base_Type);
    function Vector_To_Array
           (Input : in Type_Vectors.Vector)
        return Array_Type;


end Packrat.Parse_Graphs;