summaryrefslogtreecommitdiff
path: root/experimental/frisch/extension_points.txt
blob: 595536859e46d848fe9a91165b81b78fd6254fb1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
This file describes the changes on the extension_points branch.


=== Attributes

Attributes are "decorations" of the syntax tree which are ignored by
the type-checker.  An attribute is made of an identifier (written id)
and an optional expression (written expr below).

The identifier can be a lowercase or uppercase identifier (including
OCaml keywords) or a sequence of such atomic identifiers separated with
a dots (whitespaces are allowed around the dots).  In the Parsetree,
the identifier is represented as a single string (without spaces).

Attributes on expressions, type expressions, module expressions, module type expressions,
patterns, class expressions, class type expressions:
 
  ... [@id expr]

The same syntax [@id expr] is also available to add attributes on
constructors and labels in type declarations:

  type t =
    | A [@id1]
    | B [@id2] of int [@id3]

Here, id1 (resp. id2) is attached to the constructor A (resp. B)
and id3 is attached to the int type expression.  Example on records:

 type t =
   {
      x [@id1]: int;
      mutable y [@id2] [@id3]: string [@id4];
   }  


Attributes on items:

  ... [@@id expr]

  Items designate signature items, structure items, class fields,
  class type fields and also individual components of multiple
  declaration in structures and signatures (type declarations, recursive modules, class
  declarations, class type declarations).

  For instance, consider:

    type t1 = ... [@@id1] [@@id2] and t2 = ... [@@id3] [@@id4]

  Here, the attributes on t1 are id1, id23; the attributes on
  t2 are id3 and id4.

  Note: item attributes are currently not supported on Pstr_eval
  and Pstr_value structure items.

  The [@@id expr] form, when used at the beginning of a signature or
  structure, or after a double semi-colon (;;), defines an attribute
  which stands as a stand-alone signature or structure item (not
  attached to another item).

  Example:

  module type S = sig
    [@@id1]
    type t
    [@@id2]
    ;; [@@id3] [@@id4]
    ;; [@@id5]
    type s
    [@@id6]
  end

  Here, id1, id3, id4, id5 are stand-alone attributes, while
  id2 is attached to the type t and id6 is attached to the type s.

=== Extension nodes

Extension nodes replace valid components in the syntax tree.  They are
normally interpreted and expanded by AST mapper.  The type-checker
fails when it encounters such an extension node.  An extension node is
made of an identifier (an "LIDENT", written id below) and an optional
expression (written expr below).

Two syntaxes exist for extension node:

As expressions, type expressions, module expressions, module type expressions,
patterns, class expressions, class type expressions:

  [%id expr]

As structure item, signature item, class field, class type field:
 
  [%%id expr]

As other structure item, signature item, class field or class type
field, attributes can be attached to a [%%id expr] extension node.


=== Alternative syntax for attributes and extensions on specific kinds of nodes

All expression constructions starting with a keyword (EXPR = KW REST) support an
alternative syntax for attributes and/or extensions:

  KW[@id expr]...[@id expr] REST
  ---->
  EXPR[@id expr]...[@id expr]

  KW%id REST
  ---->
  [%id EXPR]

  KW%id[@id expr]...[@id expr] REST
  ---->
  [%id EXPR[@id expr]...[@id expr]]


where KW can stand for:
 assert
 begin
 for
 fun
 function
 if
 lazy
 let
 let module
 let open
 match
 new
 object
 try
 while


For instance:

let[@foo] x = 2 in x + 1   ====   (let x = 2 in x + 1)[@foo]
begin[@foo] ... end        ====   (begin ... end)[@foo]
match%foo e with ...       ====   [%foo match e with ...]


=== Quoted strings

Quoted strings gives a different syntax to write string literals in
OCaml code.  This will typically be used to support embedding pieces
of foreign syntax fragments (to be interpret by a -ppx filter or just
a library) in OCaml code.

The opening delimiter has the form {id| where id is a (possibly empty)
sequence of lowercase letters.  The corresponding closing delimiter is
|id} (the same identifier).  Contrary to regular OCaml string
literals, quoted strings don't interpret any character in a special
way.

Example:

String.length {|\"|}   (* returns 2 *)
String.length {foo|\"|foo}   (* returns 2 *)


The fact that a string literal comes from a quoted string is kept in
the Parsetree representation.  The Astypes.Const_string constructor is
now defined as:

  | Const_string of string * string option

where the "string option" represents the delimiter (None for a string
literal with the regular syntax).


=== Representation of attributes in the Parsetree

Attributes as standalone signature/structure items are represented
by a new constructor:

  | Psig_attribute of attribute
  | Pstr_attribute of attribute

Most other attributes are stored in an extra field in their record:

and expression =  {
  ...
  pexp_attributes: attribute list;
  ...
}
and type_declaration = {
  ...
  ptype_attributes: attribute list;
  ...
}

In a previous version, attributes on expressions (and types, patterns,
etc) used to be stored as a new constructor.  The current choice makes
it easier to pattern match on structured AST fragments while ignoring
attributes.

For open/include signature/structure items and exception rebind
structure item, the attributes are stored directly in the constructor
of the item:

  | Pstr_open of Longident.t loc * attribute list


=== Attributes in the Typedtree

The Typedtree representation has been updated to follow closely the
Parsetree, and attributes are kept exactly as in the Parsetree.  This
can allow external tools to process .cmt/.cmti files and process
attributes in them.  An example of a mini-ocamldoc based on this
technique is in experimental/frisch/minidoc.ml.


=== Other changes to the parser and Parsetree

--- Introducing Ast_helper module

This module simplifies the creation of AST fragments, without having to
touch the concrete type definitions of Parsetree. Record and sum types
are encapsulated in builder functions, with some optional arguments, e.g.
to represent attributes.

--- Relaxing the syntax for signatures and structures

It is now possible to start a signature or a structure with a ";;" token and to have two successive ";;" tokens.

Rationale:
   It makes it possible to always prefix a "standalone" attribute by ";;" independently
   from its context (this will work at the beginning of the signature/structure and after
   another item finished with ";;").

-- Relaxing the syntax for exception declarations

The parser now accepts the same syntax for exceptioon declarations as for constructor declarations,
which permits the GADT syntax:

 exception A : int -> foo

The type-checker rejects this form.  Note that it is also possible to
define exception whose name is () or ::.

Attributes can be put on the constructor or on the whole declaration:

 exception A[@foo] of int [@@bar]

Rationale:
  One less notion in the Parsetree, more uniform parsing.  Also
  open the door to existentials in exception constructors.

--- Relaxing the syntax for recursive modules

Before:
   module X1 : MT1 = M1 and ... and Xn : MTn = Mn

Now:
   module X1 = M1 and ... and Xn = Mn
   (with the usual sugar that Xi = (Mi : MTi) can be written as Xi : MTi = Mi
   which gives the old syntax)

   The type-checker fails when a module expression is not of
   the form (M : MT)
   

Rationale:

1. More uniform representation in the Parsetree.

2. The type-checker can be made more clever in the future to support
   other forms of module expressions (e.g. functions with an explicit
   constraint on its result; or a structure with only type-level
   components).
    
      
--- Turning some tuple or n-ary constructors into records

Before:

  | Pstr_module of string loc * module_expr

After:

  | Pstr_module of module_binding
...
  and module_binding =
    {
     pmb_name: string loc;
     pmb_expr: module_expr;
     pmb_attributes: attribute list;
    }



Rationale:

More self-documented, more robust to future additions (such as
attributes), simplifies some code.


--- Keeping names inside value_description and type_declaration

Before:

  | Psig_type of (string loc * type_declaration) list


After:

  | Psig_type of type_declaration list

....
and type_declaration =
  { ptype_name: string loc;
    ...
  }

Rationale:

More self-documented, simplifies some code.


--- Better representation of variance information on type parameters

Introduced a new type Asttypes.variance to represent variance
(Covariant/Contravariant/Invariant) and use it instead of bool * bool
in Parsetree.  Moreover, variance information is now attached
directly to the parameters fields:

 and type_declaration =
   { ptype_name: string loc;
-    ptype_params: string loc option list;
+    ptype_params: (string loc option * variance) list;
     ptype_cstrs: (core_type * core_type * Location.t) list;
     ptype_kind: type_kind;
     ptype_private: private_flag;
     ptype_manifest: core_type option;
-    ptype_variance: (bool * bool) list;
     ptype_attributes: attribute list;
     ptype_loc: Location.t }


--- Getting rid of 'Default' case in Astypes.rec_flag

This constructor was used internally only during the compilation of
default expression for optional arguments, in order to trigger a
subsequent optimization (see PR#5975).  This behavior is now
implemented by creating an attribute internally (whose name "#default"
cannot be used in real programs).

Rationale:

 - Attributes give a way to encode information local to the
   type-checker without polluting the definition of the Parsetree.

--- Simpler and more faithful representation of object types

-  | Ptyp_object of core_field_type list
+  | Ptyp_object of (string * core_type) list * closed_flag

(and get rid of Parsetree.core_field_type)

And same in the Typedtree.

Rationale:

 - More faithful representation of the syntax really supported
   (i.e. the ".." can only be the last field).
 - One less "concept" in the Parsetree.


--- Do not require empty Ptyp_poly nodes in the Parsetree

The type-checker automatically inserts Ptyp_poly node (with no
variable) where needed.  It is still allowed to put empty
Ptyp_poly nodes in the Parsetree. 

Rationale:

 - Less chance that Ast-related code forget to insert those nodes.

To be discussed: should we segrate simple_poly_type from core_type in the
Parsetree to prevent Ptyp_poly nodes to be inserted in the wrong place?


--- Use constructor names closer to concrete syntax

E.g. Pcf_cstr -> Pcf_constraint.

Rationale:

 - Make the Parsetree more self-documented.

--- Merge concrete/virtual val and method constructors

As in the Typedtree.

-  | Pcf_valvirt of (string loc * mutable_flag * core_type)
-  | Pcf_val of (string loc * mutable_flag * override_flag * expression)
-  | Pcf_virt of (string loc * private_flag * core_type)
-  | Pcf_meth of (string loc * private_flag * override_flag * expression)
+  | Pcf_val of (string loc * mutable_flag * class_field_kind)
+  | Pcf_method of (string loc * private_flag * class_field_kind
...
+and class_field_kind =
+  | Cfk_virtual of core_type
+  | Cfk_concrete of override_flag * expression
+

--- Explicit representation of "when" guards

Replaced the "(pattern * expression) list" argument of Pexp_function, Pexp_match, Pexp_try 
with "case list", with case defined as:

  {
   pc_lhs: pattern;
   pc_guard: expression option;
   pc_rhs: expression;
  }

and get rid of Pexp_when. Idem in the Typedtree.

Rationale:

  - Make it explicit when the guard can appear.


=== More TODOs

- Adapt pprintast to print attributes and extension nodes.
- Adapt Camlp4 (both its parser(s) and its internal representation of OCaml ASTs).
- Consider adding hooks to the type-checker so that custom extension expanders can be registered (a la OCaml Templates).
- More cleanups to the Parsetree.
- Make the Ast_helper module more user-friendly (e.g. with optional arguments and good default values) and/or
  expose higher-level convenience functions.  Maybe give a way to set the location in a given scope without
  having to pass it explicitly to each constructor.
- Document the Parsetree and Ast_helper modules.

=== Use cases

From https://github.com/gasche/ocaml-syntax-extension-discussion/wiki/Use-Cases

-- Bisect

  let f x =
  match List.map foo [x; a x; b x] with
  | [y1; y2; y3] -> tata
  | _ -> assert false [@bisect VISIT]

;;[@@bisect IGNORE-BEGIN]
let unused = ()
;;[@@bisect IGNORE-END]

-- OCamldoc

val stats : ('a, 'b) t -> statistics
[@@doc
 "[Hashtbl.stats tbl] returns statistics about the table [tbl]:
  number of buckets, size of the biggest bucket, distribution of
  buckets by size."
]
[@@since "4.00.0"]

;;[@@doc section 6 "Functorial interface"]

module type HashedType =
  sig
    type t
      [@@doc "The type of the hashtable keys."]
    val equal : t -> t -> bool
      [@@doc "The equality predicate used to compare keys."]
  end


-- type-conv, deriving

type t = {
  x : int [@default 42];
  y : int [@default 3] [@sexp_drop_default];
  z : int [@default 3] [@sexp_drop_if z_test];
} [@@sexp]


type r1 = {
  r1_l1 : int;
  r1_l2 : int;
} [@@deriving (Dump, Eq, Show, Typeable, Pickle, Functor)]

-- camlp4 map/fold generators

type variable = string
 and term =
  | Var of variable
  | Lam of variable * term
  | App of term * term


class map = [%generate_map term]
or:
[%%generate_map map term]


-- ocaml-rpc

type t = { foo [@rpc "type"]: int; bar [@rpc "let"]: int }
[@@ rpc]

or:

type t = { foo: int; bar: int }
[@@ rpc ("foo" > "type"), ("bar" > "let")]



-- pa_monad

begin%monad
  a <-- [1; 2; 3];
  b <-- [3; 4; 5];
  return (a + b)
end

-- pa_lwt

let%lwt x = start_thread foo
and y = start_other_thread foo in
try%lwt
  let%for_lwt (x, y) = waiting_threads in
  compute blah
with Killed -> bar

-- Bolt

let funct n =
  [%log "funct(%d)" n LEVEL DEBUG];
  for i = 1 to n do
    print_endline "..."
  done


-- pre-polyrecord

let r = [%polyrec x = 1; y = ref None]
let () = [%polyrec r.y <- Some 2]

-- orakuda

function%regexp
  | "$/^[0-9]+$/" as v -> `Int (int_of_string v#_0)
  | "$/^[a-z][A-Za-z0-9_]*$" as v -> `Variable v#_0
  | _ -> failwith "parse error"

-- bitstring

let bits = Bitstring.bitstring_of_file "/bin/ls" in
match%bitstring bits with
| [ 0x7f, 8; "ELF", 24, string;  (* ELF magic number *)
    e_ident, Mul(12,8), bitstring;    (* ELF identifier *)
    e_type, 16, littleendian;    (* object file type *)
    e_machine, 16, littleendian  (* architecture *)
  ] ->
  printf "This is an ELF binary, type %d, arch %d\n"
    e_type e_machine

-- sedlex

let rec token buf =
  let%regexp ('a'..'z'|'A'..'Z') = letter in
  match%sedlex buf with
  | number -> Printf.printf "Number %s\n" (Sedlexing.Latin1.lexeme buf); token buf
  | letter, Star ('A'..'Z' | 'a'..'z' | digit) -> Printf.printf "Ident %s\n" (Sedlexing.Latin1.lexeme buf); token buf
  | Plus xml_blank -> token buf
  | Plus (Chars "+*-/") -> Printf.printf "Op %s\n" (Sedlexing.Latin1.lexeme buf); token buf
  | Range(128,255) -> print_endline "Non ASCII"
  | eof -> print_endline "EOF"
  | _ -> failwith "Unexpected character"


-- cppo

[%%ifdef DEBUG]
[%%define debug(s) = Printf.eprintf "[%S %i] %s\n%!" __FILE__ __LINE__ s]
[%%else]
[%%define debug(s) = ()]
[%%endif]

debug("test")


-- PG'OCaml

let fetch_users dbh =
  [%pgsql dbh "select id, name from users"]


-- Macaque

let names view = [%view {name = t.name}, t <- !view]" 


-- Cass

let color1 = [%css{| black |}]
let color2 = [%css{| gray |}]
 
let button = [%css{|
   .button {
     $Css.gradient ~low:color2 ~high:color1$;
     color: white;
     $Css.top_rounded$;
 |}]