docs/backpack/algorithm.tex


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392

% TODO
%   - definite typechecker/compiler implies it only looks at the
%   installed unit database, not TRUE


\documentclass{article}

\usepackage{pifont}
\usepackage{graphicx} %[pdftex] OR [dvips]
\usepackage{fullpage}
\usepackage{wrapfig}
\usepackage{float}
\usepackage{titling}
\usepackage{hyperref}
\usepackage{tikz}
\usepackage{color}
\usepackage{footnote}
\usepackage{float}
\usepackage{algpseudocode}
\usepackage{bigfoot}
\usepackage{amssymb}
\usepackage{amsmath}
\usepackage{framed}

% Alter some LaTeX defaults for better treatment of figures:
% See p.105 of "TeX Unbound" for suggested values.
% See pp. 199-200 of Lamport's "LaTeX" book for details.
%   General parameters, for ALL pages:
\renewcommand{\topfraction}{0.9}	% max fraction of floats at top
\renewcommand{\bottomfraction}{0.8}	% max fraction of floats at bottom
%   Parameters for TEXT pages (not float pages):
\setcounter{topnumber}{2}
\setcounter{bottomnumber}{2}
\setcounter{totalnumber}{4}     % 2 may work better
\setcounter{dbltopnumber}{2}    % for 2-column pages
\renewcommand{\dbltopfraction}{0.9}	% fit big float above 2-col. text
\renewcommand{\textfraction}{0.07}	% allow minimal text w. figs
%   Parameters for FLOAT pages (not text pages):
\renewcommand{\floatpagefraction}{0.7}	% require fuller float pages
% N.B.: floatpagefraction MUST be less than topfraction !!
\renewcommand{\dblfloatpagefraction}{0.7}	% require fuller float pages

% remember to use [htp] or [htpb] for placement

\newcommand{\I}[1]{\ensuremath{\mathit{#1}}}

\newcommand{\optionrule}{\noindent\rule{1.0\textwidth}{0.75pt}}

\newenvironment{aside}
  {\begin{figure}\def\FrameCommand{\hspace{2em}}
   \MakeFramed{\advance\hsize-\width}\optionrule\small}
{\par\vskip-\smallskipamount\optionrule\endMakeFramed\end{figure}}

\setlength{\droptitle}{-6em}

\newcommand{\Red}[1]{{\color{red} #1}}

\title{The Backpack algorithm}

\begin{document}

\maketitle

\noindent Backpack introduces the \emph{Backpack language}, which is
used to define \emph{Backpack units} that provide some modules given
implementations for some signatures.  A Backpack unit can be
immediately typechecked and elaborated into core; at some later point,
each unit may then be compiled one or more times with different
instantiations of its requirements.

A unit is typechecked against the indefinite unit database
(and the installed unit database, for units that have no
requirements) in three steps:

\begin{enumerate}
    \item The \textbf{dependency solver} computes the module and unit
    dependency structure of the declarations in a unit.  Specifically,
    it produces (1) the set of \I{ModuleName}s which are required
    by the unit, (2) a directed acyclic graph labeled by \I{Module} of
    the modules and signatures of the unit, and (3) a directed (and acyclic, for now)
    graph labeled by \I{UnitId} of the includes of the unit.

    \item The \textbf{shaper} takes each module/signature in the DAG and
    computes its \I{Shape}, i.e., the list of \I{AvailInfo}s which are
    provided/required by each module/signature (respectively).  This
    step is interleaved with the next step.

    \item The \textbf{type checker} takes each
    module/signature in the DAG (annotated with its shape) and type
    checks it.  Cabal will save these type checking results
    in the indefinite unit database under the \I{ComponentId} associated
    with this unit.
\end{enumerate}

At some later point in time, a unit may be compiled against
the installed unit database, if the user specifies a mapping which
instantiates the requirements of a unit (a mapping from \I{ModuleName}s to \I{Module}s):

\begin{enumerate}
    \item The \textbf{dependency solver} operates as before, but it also
    is responsible for computing the full set of \I{InstalledUnitId}s which
    must be compiled before this unit can be compiled, and compiling
    them.

    \item The \textbf{shaper} is not needed. (ToDo: except for recursive
    module loops.)

    \item The \textbf{type checker and compiler} takes each
    module/signature in the DAG, and typechecks and compiles the unit.
    Cabal will save the type checking results and object
    code to the installed unit database under the \I{InstalledUnitId}
    associated with this (instantiated) unit.
\end{enumerate}

\Red{ToDo: Rewrite this for added clarity}

\newpage

\section{Front-end syntax}

\begin{figure}[htpb]
$$
\begin{array}{rcll}
p,q,r && \mbox{Component names} \\
m,n   && \mbox{Module names} \\[1em]
\multicolumn{3}{l}{\mbox{\bf Units}} \\
  \I{unit} & ::= & \verb|unit|\; p\; [\I{provreq}]\; \verb|where {| d_1 \verb|;| \ldots \verb|;| d_n \verb|}| \\[1em]
\multicolumn{3}{l}{\mbox{\bf Declarations}} \\
  d & ::= & \verb|module|\;    m \; [exports]\; \verb|where|\; \I{body} \\
    & |   & \verb|signature|\; m \; [exports]\; \verb|where|\; \I{body} \\
    & |   & \verb|include|\; p \; [provreq] \\[1em]
\multicolumn{3}{l}{\mbox{\bf Provides/requires specification}} \\
\I{provreq} & ::= & \verb|(| \, \I{rns} \, \verb|)| \; 
        [ \verb|requires(|\, \I{rns} \, \verb|)| ] \\
\I{rns} & ::= & \I{rn}_0 \verb|,| \, \ldots \verb|,| \, \I{rn}_n [\verb|,|] & \mbox{Renamings} \\
\I{rn} & ::= & m\; \verb|as| \; n & \mbox{Renaming} \\[1em] 
\multicolumn{3}{l}{\mbox{\bf Haskell code}} \\
\I{exports} & & \mbox{A Haskell module export list} \\
\I{body}    & & \mbox{A Haskell module body} \\
\end{array}
$$
\caption{Syntax of Backpack} \label{fig:syntax}
\end{figure}

A (slightly simplified) syntax of Backpack is given in Figure~\ref{fig:syntax}.

\newpage
\section{Unit databases and the unit renamer}

\begin{figure}[htpb]
$$
\begin{array}{rcll}
\multicolumn{3}{l}{\mbox{\bf Identifiers}} \\
  \I{ComponentId} & ::= & \mbox{Opaque unique identifier}  \\
  \I{UnitId} & ::= & \I{ComponentId} \verb|(|\, \I{HoleMap}\, \verb|)|  \\
              & | & \verb|hole| \\
  \I{HoleMap} & ::= & \I{ModuleName}\; \verb|->|\; \I{Module}\; \verb|,|\, \ldots \\
  \I{Module} & ::= & \I{UnitId} \verb|:| \I{ModuleName}  \\
  \I{InstalledUnitId} & ::= & \I{UnitId} \quad \mbox{(with no occurrences of \texttt{hole})} \\[1em]
\multicolumn{3}{l}{\mbox{\bf Unit databases}} \\
  \I{ComponentDb} & ::= & \I{ComponentId} \; \verb|->| \; \I{ComponentRecord} \verb|,|\, \ldots \\
  \I{InstalledUnitDb} & ::= & \I{InstalledUnitId} \; \verb|->| \; \I{InstalledUnitRecord} \verb|,|\, \ldots \\
\end{array}
$$
\caption{Unit identification} \label{fig:ids}
\end{figure}

\subsection{The component and unit database}

To install a package so that it is available when other packages are compiled,
we must record it in some sort of database (which the compiler will query later).
The obvious design for such a database is for it to record \emph{installed packages},
each of which has a collection of object files and interfaces from a compilation.
However, with Backpack, we have to refine this picture in two ways:

\begin{enumerate}
    \item A package is always a unit of distribution: something that has
    single authorship and is uploaded to Hackage.  It would seriously hamper
    modular programming in the small, however, if you always had to create
    a new package to abstract over some requirements.  So we say that a
    package can define multiple \emph{components}.  The \emph{component database}
    records typechecked components.
    \item  In Backpack, it may not be possible to \emph{compile} a component
    at install time: it may depend on some (as yet) unspecified holes.
    A component that is compiled to a specific instantiation is called a
    \emph{unit}.  Thus, we maintain a second \emph{installed unit database}
    which records compiled units; the \emph{component database} contains
    typechecked-only components.  (In practice, these two database are
    stored together, as components with no holes live both in the component
    database and the installed unit database.)
\end{enumerate}
%
Thus, there are four closely related types of identifier to be aware of:

\begin{description}
\item[Component IDs]  A component ID uniquely represents a component
in a Cabal package, including the name and version of the containing package,
the transitive dependencies of the component, and even the build information
for the component.  This ID is opaque to GHC and selected by Cabal
(although GHC may take a component ID and suffix it with a unit name to
derive a new component ID.)  Component IDs identity entries in the
\textbf{component database}, which contains the results of typechecking
a component, but no actual object code.  However, it does contain the
elaborated source, so that it can be built into actual code when
the requirement is filled.

\item[Unit ID]  A unit ID is a component ID augmented with a
\I{HoleMap}, which says how the requirements of the component are
instantiated.  Every component ID induces a unit ID, where each
requirement is filled with a fictitious unit \verb|hole|: when we
typecheck a component for the component database, it is as if we are
``compiling'' it instantiated with holes.  Units instantiated with holes
are never installed; they are strictly for type-checking (although we
could generate code for them, which could be linked if the \verb|hole|
symbols are rewritten to their true destinations).

\item[Installed unit IDs]  An installed unit ID is a unit ID which has
no \verb|hole|s; it identifies a unit that can be compiled.  The
\textbf{installed unit database} caches the compilation results of these
units, so that if a unit is compiled multiple times with the same
instantiation, this code can be reused. (This database most closely
resembles the existing installed package database with GHC today.)
\end{description}

\subsection{The unit renamer}

The unit renamer is responsible for transforming component names in a
Backpack file into \I{ComponentId}s, so that they can be uniquely
identified in the component database.
Given a base \I{ComponentId} of the library component we are compiling (\I{ThisComponentId})
and a mapping from $p$ to \I{ComponentId} (\I{ComponentNameMap}), we rename
as follows:

\begin{itemize}
    \item Every unit declaration $\verb|unit|\; p$ is renamed to $\I{ThisComponentId}\, \verb|-|\, p$.
    \item Every unit include $\verb|include|\; p$ is renamed to $\I{ThisComponentId}\, \verb|-|\, p$ if $p$ was declared in the Backpack file; otherwise it is renamed according to \I{ComponentNameMap}.
\end{itemize}

The \I{ComponentNameMap} is entirely user specified, so there is a great deal
of flexibility on how it can be created, but the convention we expect to
be used by Cabal is that a component name $p$ corresponds to the same-named
unit in a \emph{package} named $p$.  Packages that don't use Backpack
only have one component, the library component, which has the same name as package.

\paragraph{Notational conventions}
In the rest of this document, when it is unambiguous, we will use $p$, $q$ and $r$
interchangeably with \I{ComponentId}, as after unit renaming, there
are no occurrences of component names.

\newpage
\section{Dependency solver}

\begin{figure}[htpb]
$$
\begin{array}{rcll}
  \tilde{d} & ::= & \verb|module|\;    Module \; [exports]\; \verb|where|\; \I{body} \\
    & |   & \verb|signature|\; \I{Module} \; [exports]\; \verb|where|\; \I{body} \\
    & |   & \verb|merge|\; \I{Module} \\
    & |   & \verb|include|\; \I{UnitId} \\
  \I{ComponentRecord}^{\mathsf{dep}} & ::= & \verb|provides:|\; m\; \verb|->|\; \I{Module}\verb|,|\, \ldots\\
    & & \verb|requires:|\; m\verb|,|\, \ldots
\end{array}
$$
\caption{Resolved declarations} \label{fig:resolved}
\end{figure}

The dependency solver computes the unfilled requirements of a component, a
dependency DAG on the modules/signatures in the component, and a dependency
DAG on the includes in the component.  We assume every referenced $p$ in the
component must be recorded in the component database, such that we can
look up $\I{ComponentRecord}^{\mathsf{dep}}$.

\paragraph{Computing unfilled requirements}  The unfilled requirements are $R-P$, where $R$ and $P$ are sets of module names computed from the declarations in the following manner:

\begin{itemize}
    \item $\verb|include|\; p$: union the (domain of the) provisions with $P$ and the requirements with $R$.
    \item $\verb|module|\; m$: add $m$ to $P$.
    \item $\verb|signature|\; m$: add $m$ to $R$.
\end{itemize}

\paragraph{Declaration dependency graph}
We define a graph where the nodes are as described in Figure~\ref{fig:resolved}:
there is a node per
for each module and signature, as well as an extra ``merge'' node for
every unfilled requirement, which merges the interfaces of a local signature and
any requirements brought in from includes.
%
Each node is identified by the tuple $\left(\I{Module}, \I{IsSource?}\right)$, where
the \I{Module} of a declaration $m$ in component $p$ is \verb|p(H):m|, where $H$ maps
each unfilled requirement $n$ to \verb|hole:n|, and \I{IsSource?} is true only for signatures.

The edges of the directed graph signify a ``depends on'' relation, and are
defined as follows:

\begin{itemize}
    \item A module/signature $m$ depends on a module/signature merge $n$ if $m$ imports $n$.
    \item A module/signature $m$ depends on a signature $n$ if $m$ \verb|{-# SOURCE #-}| imports $n$.
    \item A module/signature merge $m$ depends on a local signature $m$ (if it exists).
\end{itemize}
%
If the resulting graph has a cycle, this is an error.

\paragraph{Include dependency graph}  We define an dependency graph
between includes, where an $\verb|include|\; p$ depends on
$\verb|include|\; q$ if, for some module name $m$, $p$ requires $m$ and
$q$ provides $m$.  If there is a cyclic, then there is cross-component
mutual recursion: for now, this is an error.

Assuming an acyclic graph, we can compute the \I{UnitId} of each
key as follows.  Initialize $\Gamma$, a substitution from holes to \I{Module},
to the identity substitution. For each $\verb|include|\; p$ in topological
order, define its \I{UnitId} to be $p$ with the mapping $\Gamma$ with its
domain restricted to the requirements of $p$.  Then, for each provision
$m\; \verb|->|\; \I{Module}$, update $\Gamma$ so that
$\Gamma(m) = \operatorname{subst} (\Gamma, \I{Module})$
(where $\operatorname{subst}$ recursively applies the substitution $\Gamma$ in \I{Module}).

During compilation, the include dependency graph is useful for
determining a compilation order for included units.

\newpage
\section{Requirement calculation}

\Red{to write}

\newpage
\section{Shaping pass}

\begin{figure}[htpb]
$$
\begin{array}{rcll}
\I{Shape} & ::= & \verb|provides:|\; m \; \verb|->|\; \I{Module}\; \I{ModShape} \verb|;|\; \ldots \\
      &     & \verb|requires:| \; m \; \verb|->|\; \textcolor{white}{\I{Module}}\; \I{ModShape}  \verb|;|\; \ldots \\
\I{ModShape} & ::= & \I{AvailInfo}_0 \verb|,|\, \ldots \verb|,|\, \I{AvailInfo}_n \\
\I{AvailInfo} & ::= & \I{Name} & \mbox{Plain identifiers} \\
          & |   & \I{Name} \, \verb|{| \, \I{Name}_0\verb|,| \, \ldots\verb|,| \, \I{Name}_n \, \verb|}| & \mbox{Type constructors} \\
\I{Name}   & ::= & \I{Module} \verb|.| \I{OccName} \\
\I{OccName} & & \mbox{Unqualified name in a namespace} \\
\I{ComponentRecord}^{\mathsf{shape}} & ::= & \I{Shape}
\end{array}
$$
\caption{Shaping} \label{fig:shaping}
\end{figure}

The shaping pass computes the export \I{AvailInfo}s for each node in
the dependency graph; collectively, these form the \I{Shape} of
the unit described in Figure~\ref{fig:shaping}.  Equivalently,
the \I{Shape} of unit specifies what a unit requires and provides
at the Haskell declaration level.

An \I{AvailInfo} names a Haskell declaration that may be exported.
It may be a plain identifier \I{Name}, or it may be a type constructor,
in which case it has children \I{Name}s representing the names of the
data constructors, record selectors, etc.  This level of hierarchy
makes it possible to use ellipses in an import list, e.g. \verb|TyCon(..)|,
to selectively import just the logical children of a type constructor.
Children names have the invariant that they have the same \I{Module} as the parent name.
In a \I{ModShape}/export list,
the \I{OccName}s of the plain identifier \I{AvailInfo}s and the \emph{children}
of type constructor are unique
(although the top-level \I{Name}s may not have unique \I{OccName}s).

The compilation of every node is associated with a ``shape context'',
which represents the modules which are transitively depended upon. Let the
environment shape context is the merge of the shapes of all includes; to
shape a node:

\begin{enumerate}
    \item Merge the environment shape contexts with the shape contexts
    of all direct dependencies, resulting in the initial shape context.
    \item Rename the module/signature according to the initial shape context,
    getting a \I{ModShape}.  Importantly, when renaming the signature \verb|M|,
    any declarations defined in the signature are assigned a \I{Name}s with the \I{Module} \verb|hole:M| (rather than a \I{Module} based on the current unit $p$).
    \item Merge this \I{ModShape} into the initial shape context
    (modules go in provisions while signatures go in provisions), the
    result defining the shape context of this node.
\end{enumerate}

We now elaborate on these steps in more detail.

\subsection{Shapes of includes}
Given an \verb|include p (X) requires (Y)|, we can look up the shape
for $p$ from the indefinite package database.  However, an include can
also rename provisions and requires (where $X$, $Y$ are partial maps
from module name to module name), which requires transforms the shape
in the following way:

\begin{itemize}
    \item For each original provision $m\; \verb|->|\; \ldots$, provide
          $X (m)\; \verb|->|\; \ldots$ if $X (m)$ is defined.
    \item For each original requirement $m\; \verb|->|\; \ldots$, require
          $Y (m)\; \verb|->|\; \ldots$ if $Y (m)$ is defined, and $m$ if it is not.
          (Non-mentioned requirements are always passed through).
    \item For each requirement renaming from \verb|M| to \verb|N| in $Y$, substitute
          all occurrences of \verb|hole:M| to \verb|hole:N| in the \I{ModShape}
          of all provisions and requirements.
\end{itemize}

\subsection{Shape merging}
Before specifying the how to merge shapes algorithm, we must define some subprocedures
for unifying and merging lower-level entities such as \I{AvailInfo}s and \I{Name}s,
which produce \I{Name} substitution that are applied to shapes.

\begin{description}
    \item[Unify two \textit{Name}s]
    (produces a \I{Name} and a \I{Name} substitution) \\
    Error if the names do not have matching \I{OccName}s.  Error if neither name
    is a hole name.  Otherwise, without loss of generality let $m$ be the hole name
    and $n$ the other name, return $n$ and the substitution of $m$ to $n$.
    \item[Merge two sets of \textit{Name}s]
    (produces a set of \I{Name}s and a \I{Name} substitution) \\
    Let two \I{Name}s be related if they have the same \I{OccName}.
    Union the two sets, unifying related names.
    \item[Unify two \textit{AvailInfo}s]
    (produces an \I{AvailInfo} and a \I{Name} substitution) \\
    If both \I{AvailInfo}s are simply
    a \I{Name}, unify the two \I{Name}s.  If both \I{AvailInfo}s are
    $\I{Name}\, \verb|{|\, \I{Name}_0\verb|,|\, \ldots\verb|,|\, \I{Name}_n\, \verb|}|$,
    unify the top-level \I{Name}, apply the substitution to both \I{AvailInfo}s,
    and return the unified \I{Name} with the union of the child names of the
    substituted \I{AvailInfo}s.
    Otherwise, error.
    \item[Merge two sets of \textit{AvailInfo}s]
    (produces a set of \I{AvailInfo}s and a \I{Name} substitution) \\
    Let two \I{AvailInfo}s be related if they both are of the form
    \I{Name} and have matching \I{OccName}s, or if they both are of
    the form
    $\I{Name}\, \verb|{|\, \I{Name}_0\verb|,|\, \ldots\verb|,|\, \I{Name}_n\, \verb|}|$
    and there exists a child name in each which have matching \I{OccName}s.
    Union the two sets, unifying related \I{AvailInfo}s.
    \item[Apply a name substitution on an \textit{AvailInfo}] (produces an \I{AvailInfo}) \\
    Substitute the top-level \I{Name}, which induces a substitution from
    \I{Module} to $\I{Module}'$.  Apply this module substitution to each child
    \I{Name} in the \I{AvailInfo}.

\end{description}
%
Shape merging takes two units with inputs (requirements) and outputs
(provisions) and ``wires'' them up so that outputs feed into inputs. To
merge the shape of $p$ with the shape of $q$:

\begin{enumerate}
    \item \emph{Fill every requirement of $q$ with provided modules from
        $p$.} For each requirement $M$ of $q$ that is provided by $p$,
        substitute each \I{Module} occurrence of \verb|hole:M| with the
        provided $p\verb|(|M\verb|)|$ (however, do \textbf{NOT} substitute the
        top-level \I{Module} in a \I{Name}s), merge the \I{AvailInfo}s and apply
        the resulting \I{Name} substitution, and
        remove the requirement from $q$.  If the \I{AvailInfo}s of the
        provision are not a superset of the required \I{AvailInfo}s,
        error.
    \item If mutual recursion is supported, \emph{fill every requirement
        of $p$ with provided modules from $q$.}
    \item \emph{Merge leftover requirements.}  For each requirement $M$
        of $q$ that is not provided by $p$ but required by $p$, and let
        the new requirement be the merge of
        \I{AvailInfo}s, applying the resulting \I{Name} substitution.
    \item \emph{Add provisions of $q$.} Union the provisions of $p$ and $q$, (lazily) erroring
        if there is a duplicate that doesn't have the same \I{Module}.
\end{enumerate}

\newpage
\section{Indefinite type checker}

\begin{figure}[htpb]
$$
\begin{array}{rcll}
\I{ComponentRecord} & ::= & \I{ModIface}_0 \verb|;|\, \ldots\verb|;|\, \I{ModIface}_n \\[1em]
\multicolumn{3}{l}{\mbox{\bf Module interface}} \\
\I{ModIface} & ::= & \verb|module| \; \I{Module} \; \verb|(| \I{mi\_exports} \verb|)| \; \verb|where| \\
& & \qquad \I{mi\_decls} \\
& & \qquad \I{mi\_insts} \\
& & \qquad \I{dep\_orphs} \\
\I{mi\_exports} & ::= & \I{AvailInfo}_0 \verb|,|\, \ldots \verb|,|\, \I{AvailInfo}_n & \mbox{Export list} \\
\I{mi\_decls} & ::= & \I{IfaceDecl}_0 \verb|;|\, \ldots \verb|;|\, \I{IfaceDecl}_n & \mbox{Defined declarations} \\
\I{mi\_insts} & ::= & \I{IfaceClsInst}_0 \verb|;|\, \ldots \verb|;|\, \I{IfaceClsInst}_n & \mbox{Defined instances} \\
\I{dep\_orphs} & ::= & \I{Module}_0 \verb|;|\, \ldots \verb|;|\, \I{Module}_n & \mbox{Transitive orphan dependencies} \\[1em]
\multicolumn{3}{l}{\mbox{\bf Interface declarations}} \\
\I{IfaceDecl} & ::= & \I{OccName} \; \verb|::| \; \I{IfaceId} \\
              & |   & \verb|data| \; \I{OccName} \; \verb|=| \;\ \I{IfaceData} \\
              & |   & \ldots \\
\I{IfaceClsInst} & & \mbox{A type-class instance} \\
\I{IfaceId} & & \mbox{Interface of top-level binder} \\
\I{IfaceData} & & \mbox{Interface of type constructor} \\
\end{array}
$$
\caption{Module interfaces in GHC} \label{fig:typecheck}
\end{figure}

\Red{This needs updating.}

In general terms,
type checking an indefinite unit (a unit with holes) involves
calculating, for every module, a \I{ModIface} representing the
type/interface of the module in question (which is serialized
to disk).  The general form of these
interface files are described in Figure~\ref{fig:typecheck}; notably,
the interfaces \I{IfaceId}, \I{IfaceData}, etc. contain \I{Name} references,
which must be resolved by
looking up a \I{ModIface} corresponding to the \I{Module} associated
with the \I{Name}. (We will say more about this lookup process shortly.)
For example, given:

\begin{verbatim}
    unit p where
        signature H where
            data T
        module A(S, T) where
            import H
            data S = S T
\end{verbatim}
%
the \I{PkgType} is:

\begin{verbatim}
    module hole:H (hole:H.T) where
        data T -- abstract type constructor
    module THIS:A (THIS:A.S, hole:H.T) where
        data S = S hole:H.T
    -- where THIS = p(H -> hole:H)
\end{verbatim}
%
However, while it is true that the \I{ModIface} is the final result
of type checking, we actually are conflating two distinct concepts: the user-visible
notion of a \I{ModuleName}, which, when imported, brings some \I{Name}s
into scope (or could trigger a deprecation warning, or pull in some
orphan instances\ldots), versus the actual declarations, which, while recorded
in the \I{ModIface}, have an independent existence: even if a declaration
is not visible for an import, we may internally refer to its \I{Name}, and
need to look it up to find out type information.  (A simple case when
this can occur is if a module exports a function with type \verb|T -> T|,
but doesn't export \verb|T|).

\begin{figure}[htpb]
$$
\begin{array}{rcll}
\I{ModDetails} & ::= & \langle\I{md\_types} \verb|;|\; \I{md\_insts}\rangle \\
\I{md\_types}  & ::= & \I{TyThing}_0 \verb|,|\, \ldots\verb|,|\, \I{TyThing}_n \\
\I{md\_insts}  & ::= & \I{ClsInst}_0 \verb|,|\, \ldots\verb|,|\, \I{ClsInst}_n \\[1em]
\multicolumn{3}{l}{\mbox{\bf Type-checked declarations}} \\
\I{TyThing}    &     & \mbox{Type-checked thing with a \I{Name}} \\
\I{ClsInst}    &     & \mbox{Type-checked type class instance} \\
\end{array}
$$
\caption{Semantic objects in GHC} \label{fig:typecheck-more}
\end{figure}

Thus, a \I{ModIface} can be type-checked into a \I{ModDetails}, described in
Figure~\ref{fig:typecheck-more}.  Notice that a \I{ModDetails} is just
a bag of type-checkable entities which GHC knows about.  We
define the \emph{external package state (EPT)} to
simply be the union of the \I{ModDetails}
of all external modules.

Type checking is a delicate balancing act between module
interfaces and our semantic objects.  A \I{ModIface} may get
type-checked multiple times with different hole instantiations
to provide multiple \I{ModDetails}.
Furthermore complicating matters
is that GHC does this resolution \emph{lazily}: a \I{ModIface}
is only converted to a \I{ModDetails} when we are looking up
the type of a \I{Name} that is described by the interface;
thus, unlike usual theoretical treatments of type checking, we can't
eagerly go ahead and perform substitutions on \I{ModIface}s when
they get included.

In a separate compiler like GHC, there are two primary functions we must provide:

\paragraph{\textit{ModuleName} to \textit{ModIface}}  Given a \I{ModuleName} which
was explicitly imported by a user, we must produce a \I{ModIface}
that, among other things, specifies what \I{Name}s are brought
into scope.  This is used by the renamer to resolve plain references
to identifiers to real \I{Name}s.  (By the way, if shaping produced
renamed trees, it would not be necessary to do this step!)

\paragraph{\textit{Module} to \textit{ModDetails}/EPT}  Given a \I{Module} which may be
a part of a \I{Name}, we must be able to type check it into
a \I{ModDetails} (usually by reading and typechecking the \I{ModIface}
associated with the \I{Module}, but this process is involved).  This
is used by the type checker to find out type information on things. \\

There are two points in the type checker where these capabilities are exercised:

\paragraph{Source-level imports}  When a user explicitly imports a
module, the \textit{ModuleName} is mapped to a \textit{ModIface}
to find out what exports are brought into scope (\I{mi\_exports})
and what orphan instances must be loaded (\I{dep\_orphs}).  Additionally,
the \textit{Module} is loaded to the EPT to bring instances from
the module into scope.

\paragraph{Internal name lookup}  During type checking, we may have
a \I{Name} for which we need type information (\I{TyThing}).  If it's not already in the
EPT, we type check and load
into the EPT the \I{ModDetails} of the \I{Module} in the \I{Name},
and then check the EPT again. (\verb|importDecl|)

\subsection{\textit{ModuleName} to \textit{ModIface}}

In all cases, the \I{mi\_exports} can be calculated directly from the
shaping process, which specifies exactly for each \I{ModuleName} in scope
what will be brought into scope.

\paragraph{Modules} Modules are straightforward, as for any
\I{Module} there is only one possibly \I{ModIface} associated
with it (the \I{ModIface} for when we type-checked the (unique) \verb|module|
declaration.)

\paragraph{Signatures} For signatures, there may be multiple \I{ModIface}s
associated with a \I{ModuleName} in scope, e.g. in this situation:

\begin{verbatim}
    unit p where
        signature S where
            data A
    unit q where
        include p
        signature S where
            data B
        module M where
            import S
\end{verbatim}
%
Each literal \verb|signature| has a \I{ModIface} associated with it; and
the import of \verb|S| in \verb|M|, we want to see the \emph{merged}
\I{ModIface}s.  We can determine the \I{mi\_exports} from the shape,
but we also need to pull in orphan instances for each signature, and
produce a warning for each deprecated signature.

\begin{aside}
\textbf{Does hiding a signature hide its orphans.} Suppose that we have
extended Backpack to allow hiding signatures from import.

\begin{verbatim}
    unit p requires (H) where -- H is hidden from import
        module A where
            instance Eq (a -> b) where -- orphan
        signature H {-# DEPRECATED "Don't use me" #-} where
            import A

    unit q where
        include p
        signature H where
            data T
        module M where
            import H                -- warn deprecated?
            instance Eq (a -> b)    -- overlap?
\end{verbatim}

It is probably the most consistent to not pull in orphan instances
and not give the deprecated warning: this corresponds to merging
visible \I{ModIface}s, and ignoring invisible ones.
\end{aside}

\subsection{\textit{Module} to \textit{ModDetails}}

\paragraph{Modules}  For modules, we have a \I{Module} of
the form $\I{p}\verb|(|m\; \verb|->|\; \I{Module}\verb|,|\, \ldots\verb|)|$,
and we also have a unique \I{ModIface}, where each hole instantiation
is $\verb|hole:|m$.

To generate the \I{ModDetails} associated with the specific instantiation,
we have to type-check the \I{ModIface} with the following adjustments:

\begin{enumerate}
    \item Perform a \I{Module} substitution according to the instantiation
          of the \I{ModIface}'s \I{Module}. (NB: we \emph{do}
          substitute \verb|hole:A.x| to \verb|hole:B.x| if we instantiated
          \verb|A -> hole:B|, \emph{unlike} the disjoint
          substitutions applied by shaping.)
    \item Perform a \I{Name} substitution as follows: for any name
          with a unit key that is a $\verb|hole|$,
          substitute with the recorded \I{Name} in the requirements of the shape.
          Otherwise, look up the (unique) \I{ModIface} for the \I{Module},
          and substitute with the corresponding \I{Name} in the \I{mi\_exports}.
\end{enumerate}

\paragraph{Signatures}  For signatures, we have a \I{Module} of the form
$\verb|hole:|m$.  Unlike modules, there are multiple \I{ModIface}s associated with a hole.
We distinguish each separate \I{ModIface} by considering the full \I{UnitId}
it was defined in, e.g. \verb|p(A -> hole:C, B -> q():B)|; call this
the hole's \emph{defining unit key}; the set of \I{ModIface}s for a hole
and their defining unit keys can easily be calculated during shaping.

To generate the \I{ModDetails} associated with a hole, we type-check each
\I{ModIface}, with the following adjustments:

\begin{enumerate}
    \item Perform a \I{Module} substitution according to the instantiation
        of the defining unit key.  (NB: This may rename the hole itself!)
    \item Perform a \I{Name} substitution as follows, in the same manner
        as would be done in the case of modules.
    \item When these \I{ModDetails} are merged into the EPT, some merging
        of duplicate types may occur; a type
        may be defined multiple times, in which case we check that each
        definition is compatible with the previous ones.  A concrete
        type is always compatible with an abstract type.
\end{enumerate}

\paragraph{Invariants} When we perform \I{Name} substitutions, we must be
sure that we can always find out the correct \I{Name} to substitute to.
This isn't obviously true, consider:

\begin{verbatim}
    unit p where
        signature S(foo) where
            data T
            foo :: T
        module M(bar) where
            import S
            bar = foo
    unit q where
        module A(T(..)) where
            data T = T
            foo = T
        module S(foo) where
            import A
        include p
        module A where
            import M
            ... bar ...
\end{verbatim}
%
When we type check \verb|p|, we get the \I{ModIface}s:

\begin{verbatim}
    module hole:S(hole:S.foo) where
        data T
        foo :: hole:S.T
    module THIS:M(THIS:M.bar) where
        bar :: hole:S.T
\end{verbatim}
%
Now, when we type check \verb|A|, we pull on the \I{Name} \verb|p(S -> q():S):M.bar|,
which means we have to type check the \I{ModIface} for \verb|p(S -> q():S):M|.
The un-substituted type of \verb|bar| has a reference to \verb|hole:S.T|;
this should be substituted to \verb|q():S.T|.  But how do we discover this?
We know that \verb|hole:S| was instantiated to \verb|q():S|, so we might try
and look for \verb|q():S.T|.  However, this \I{Name} does not exist because
the \verb|module S| reexports the selector from \verb|A|!  Nor can we consult
the (unique) \I{ModIface} for the module, as it doesn't reexport the relevant
type.

The conclusion, then, is that a module written this way should be disallowed.
Specifically, the correctness condition for a signature is this: \emph{Any \I{Name}
mentioned in the \I{ModIface} of a signature must either be from an external module, or be
exported by the signature}.

\newpage
\section{Installation}

This section defines the syntax for the file-system format of the \I{ComponentDb}.
Like entries in the installed unit database, an entry is a sequence of fields
with values.

Indefinite unit entries share some entries in common with entries in the installed
unit database:

\begin{description}
    \item[\texttt{component-id:}] \I{ComponentId} \newline
        The unique identifier of an installed package.  This combined
        with \texttt{unit-name} uniquely identifies an entry in the
        installed unit database.
    \item[\texttt{exposed:}] \verb|True| or \verb|False| \newline
        Whether or not this unit is exposed, i.e. it is available for
        \verb|include| under its \verb|unit-name| (this is used to compute
        the default \I{ComponentNameMap} when GHC is called by itself).
    \item[\texttt{import-dirs:}] \I{FilePath} \newline
        Where interface files for this unit can be found. (NB: these
        interface files are templates, which contain references to holes
        which we can substitute.)  (There's exactly one.)
    \item[\texttt{exposed-modules:}] \I{ModuleName} or \I{ModuleName} \texttt{from} \I{Module} $\ldots$ \newline
        The set of exposed modules from this unit, including reexports from
        other units.
    \item[\texttt{other-modules:}] \I{ModuleName} $\ldots$ \newline
        Non-exposed modules; there is an interface for each of these
        in the import-dirs. (Redundant, but useful for error reporting.)
\end{description}
%
As well as all non-essential, Cabal-specific metadata; e.g. \texttt{name}, \texttt{version}, \ldots (\texttt{data-dir} and \texttt{haddock} probably)
Here are new entries for indefinite units:

\begin{description}
    \item[\texttt{requires:}] \I{ModuleName} \ldots \newline
        The set of module names which are requirements of this unit.
        (Installed units instead record \texttt{instantiated-with}, which
        specifies how each requirement was instantiated.)  Every
        requirement has an interface in the import-dirs.
    \item[\texttt{source-dir:}] \I{FilePath} \newline
        The path to the original source of the package.
    \item[\texttt{setup-executable:}] \I{FilePath} \newline
        The path to the \texttt{Setup} executable as described by the Cabal
        specification which is capable of building and installing the package
        using \texttt{./Setup instantiate} (this is a new command which lets
        us program how the requirements of the indefinite unit should be filled),
        \texttt{./Setup build}, \texttt{./Setup copy} and
        \texttt{./Setup register}.
    \item[\texttt{package-config:}] \I{FilePath} \newline
        The path to the package configuration saved when the indefinite
        unit was installed.  This should contain all of the relevant configuration
        information necessary to build a package, except how its requirements
        are instantiated.
\end{description}
%
The string representation of \I{Module} is worth remarking upon.  In
this specification, \I{Module} is a recursive data structure.  For
installed packages, it is acceptable to flatten \I{Module} into a
hash representing the \I{UnitId} and the \I{ModuleName}, as the \I{UnitId}
is an \I{InstalledUnitId} which has an entry in the database.  However,
this is unacceptable for indefinite units, because we don't have an
entry per \I{UnitId}.  So, for \I{UnitId}s in the indefinite unit database,
the full tree is written out, subject to this syntax:

\begin{verbatim}
Module ::= UnitId ":" ModuleName
UnitId ::= InstalledPackageId
          [ "/" UnitName "(" HoleMap ")" ]
          | "hole"
HoleMap ::= ModuleName "->" Module "," ...
\end{verbatim}

\section{Appendix: Shaping}

This section clarifies some subtle aspects about shaping.

\subsection{\textit{OccName} is implied by \textit{Name}}
In Haskell, the following is not valid syntax:

\begin{verbatim}
    import A (foobar as baz)
\end{verbatim}
In particular, a \I{Name} which is in scope will always have the same
\I{OccName} (even if it may be qualified.)  You might imagine relaxing
this restriction so that declarations can be used under different \I{OccName}s;
in such a world, we need a different definition of shape:

\begin{verbatim}
    Shape ::=
        provided: ModuleName -> Module { OccName -> Name }
        required: ModuleName ->        { OccName -> Name }
\end{verbatim}
Presently, however, such an \I{OccName} annotation would be redundant: it can be inferred from the \I{Name}.

\subsection{Holes of a unit are a mapping, not a set.}

Why can't the \I{UnitId} just record a
set of \I{Module}s, e.g. $\I{UnitId}\;::= \; p \; \verb|{| \; \I{Module} \; \verb|}|$?  Consider:

\begin{verbatim}
    unit p (A) requires (H1, H2) where
        signature H1(T) where
            data T
        signature H2(T) where
            data T
        module A(A(..)) where
            import qualified H1
            import qualified H2
            data A = A H1.T H2.T

    unit q (A12, A21) where
        module I1(T) where
            data T = T Int
        module I2(T) where
            data T = T Bool
        include p (A as A12) requires (H1 as I1, H2 as I2)
        include p (A as A21) requires (H1 as I2, H2 as I1)
\end{verbatim}
With a mapping, the first instance of \verb|p| has key \verb|p(H1 -> q():I1, H2 -> q():I2)|
while the second instance has key \verb|p(H1 -> q():I2, H2 -> q():I1)|; with
a set, both would have the key \verb|p{q():I1, q():I2}| and be
indistinguishable.

\subsection{Signatures can require a specific entity.}
With requirements like \verb|A -> { hole:A.T, hole:A.foo }|,
why not specify it as \verb|A -> { T, foo }|,
e.g., \verb|required: { ModuleName -> { OccName } }|?  Consider:

\begin{verbatim}
    unit p () requires (A, B) where
        signature A(T) where
            data T
        signature B(T) where
            import T
\end{verbatim}
The requirements of this unit specify that \verb|A.T| $=$ \verb|B.T|; this
can be expressed with \I{Name}s as

\begin{verbatim}
    A -> { hole:A.T }
    B -> { hole:A.T }
\end{verbatim}
But, without \I{Name}s, the sharing constraint is impossible:  \verb|A -> { T }; B -> { T }|. (NB: \verb|A| and \verb|B| could be filled with different modules, they just have
to both export the same \verb|T|.)

\subsection{The \textit{Name} of a value is used to avoid
ambiguous identifier errors.}
We state that two types
are equal when their \I{Name}s are the same; however,
for values, it is less clear why we care.  But consider this example:

\begin{verbatim}
    unit p (A) requires (H1, H2) where
        signature H1(x) where
            x :: Int
        signature H2(x) where
            import H1(x)
        module A(y) where
            import H1
            import H2
            y = x
\end{verbatim}
The reference to \verb|x| in \verb|A| is unambiguous, because it is known
that \verb|x| from \verb|H1| and \verb|x| from \verb|H2| are the same (have
the same \I{Name}.)  If they were not the same, it would be ambiguous and
should cause an error.  Knowing the \I{Name} of a value distinguishes
between these two cases.

\subsection{Holes are linear}
Requirements do not record what \I{Module} represents
the identity of a requirement, which means that it's not possible to assert
that hole \verb|A| and hole \verb|B| should be implemented with the same module,
as might occur with aliasing:

\begin{verbatim}
    signature A where
    signature B where
    alias A = B
\end{verbatim}
%
The benefit of this restriction is that when a requirement is filled,
it is obvious that this is the only requirement that is filled: you won't
magically cause some other requirements to be filled.  The downside is
it's not possible to write a unit which looks for an interface it is
looking for in one of $n$ names, accepting any name as an acceptable linkage.
If aliasing was allowed, we'd need a separate physical shaping context,
to make sure multiple mentions of the same hole were consistent.

\subsection{A unit does not ``provide'' its signatures}
We enforce the invariant that
a provision is always (syntactically) a \verb|module| and a requirement
is always a \verb|signature|.  This means that if you have a requirement
and a provision of the same name, the requirement can \emph{always} be filled
with the provision.

The alternate design, where a unit both requires and provides
its signatures, makes it unclear if a provision
will actually fill a signature.  Consider this example, where
a signature is required and exposed:

\begin{verbatim}
    unit a-sigs (A) requires (A) where -- ***
        signature A where
            data T

    unit a-user (B) requires (A) where
        signature A where
            data T
            x :: T
        module B where
            ...

    unit p where
        include a-sigs
        include a-user
\end{verbatim}
%
When we consider merging in the shape of \verb|a-user|, does the
\verb|A| provided by \verb|a-sigs| fill in the \verb|A| requirement
in \verb|a-user|?  It \emph{should not}, since \verb|a-sigs| does not
actually provide enough declarations to satisfy \verb|a-user|'s
requirement: the intended semantics \emph{merges} the requirements
of \verb|a-sigs| and \verb|a-user|.

What about this example?

\begin{verbatim}
    unit a-sigs (M as A) requires (H as A) where
        signature H(T) where
            data T
        module M(T) where
            import H(T)
\end{verbatim}
%
We rightly should error, since the provision is a module. And in this situation:

\begin{verbatim}
    unit a-sigs (H as A) requires (H) where
        signature H(T) where
            data T
\end{verbatim}
%
The requirements should be merged, but should the merged requirement
be under the name \verb|H| or \verb|A|?

It may still be possible to use the \verb|(A) requires (A)| syntax to
indicate exposed signatures, but this would be a mere syntactic
alternative to \verb|() requires (exposed A)|.

\subsection{Signature visibility, and defaulting}
The simplest formulation of requirements is to have them always be
importable.  One proposed enhancement, however, is to allow some
requirements to be ``non-importable''; that is, they are not visible
to people who include packages.

One simple way of modeling this is to associate each required module
with a flag indicating whether or not it is importable or not.  Then, we
might imagine that an explicit export list could be used to toggle
whether or not a requirement is visible or not.

However, when an export list is absent, we have to pick a default
visibility for a signature.  If we use the same behavior as with
modules, a strange situation can occur:

\begin{verbatim}
    unit p where -- S is visible
        signature S where
            x :: True

    unit q where -- use defaulting
        include p
        signature S where
            y :: True
        module M where
            import S
            z = x && y      -- OK

    unit r where
        include q
        module N where
            import S
            z = y           -- OK
            z = x           -- ???
\end{verbatim}
%
Absent the second signature declaration in \verb|q|, \verb|S.x| clearly
should not be visible in \verb|N|.  However, what ought to occur when this signature
declaration is added?  One interpretation is to say that only some
(but not all) declarations are provided (\verb|S.x| remains invisible);
another interpretation is that adding \verb|S| is enough to treat
the signature as ``in-line'', and all declarations are now provided
(\verb|S.x| is visible).

The latter interpretation avoids having to keep track of providedness
per declarations, and means that you can always express defaulting
behavior by writing an explicit provides declaration on the unit.
However, it has the odd behavior of making empty signatures semantically
meaningful:

\begin{verbatim}
unit q where
    include p
    signature S where
\end{verbatim}
%
%   SPJ: This would be too complicated (if there's yet a third way)

This is pretty complicated, so signature visibility is not currently
planned to be implemented.

\subsection{Tricky \textit{AvailInfo} merging scenarios}

\paragraph{Merging when type constructors are not in scope}

\begin{verbatim}
    signature A1(foo) where
        data A = A { foo :: Int, bar :: Bool }

    signature A2(bar) where
        data A = A { foo :: Int, bar :: Bool }
\end{verbatim}
%
If we merge \verb|A1| and \verb|A2|, are we supposed to conclude that
the types \verb|A1.A| and \verb|A2.A| (not in scope!) are the same?
The answer is no!  Consider these implementations:

\begin{verbatim}
    module A1(A(..)) where
        data A = A { foo :: Int, bar :: Bool }

    module A2(A(..)) where
        data A = A { foo :: Int, bar :: Bool }

    module A(foo, bar) where
        import A1(foo)
        import A2(bar)
\end{verbatim}

Here, \verb|module A1| implements \verb|signature A1|, \verb|module A2| implements \verb|signature A2|,
and \verb|module A| implements \verb|signature A1| and \verb|signature A2| individually
and should certainly implement their merge.  This is why we cannot simply
merge type constructors based on the \I{OccName} of their top-level type;
merging only occurs between in-scope identifiers.

\paragraph{Does merging a selector merge the type constructor?}

\begin{verbatim}
    signature A1(A(..)) where
        data A = A { foo :: Int, bar :: Bool }

    signature A2(A(..)) where
        data A = A { foo :: Int, bar :: Bool }

    signature A2(foo) where
        import A1(foo)
\end{verbatim}
%
Does the last signature, which is written in the style of a sharing constraint on \verb|foo|,
also cause \verb|bar| and the type and constructor \verb|A| to be unified?
Because a merge of a child name results in a substitution on the parent name,
the answer is yes.

\paragraph{Incomplete data declarations}

\begin{verbatim}
    signature A1(A(foo)) where
        data A = A { foo :: Int }

    signature A2(A(bar)) where
        data A = A { bar :: Bool }
\end{verbatim}
%
Should \verb|A1| and \verb|A2| merge?  If yes, this would imply
that data definitions in signatures could only be \emph{partial}
specifications of their true data types.  This seems complicated,
which suggests this should not be supported; however, in fact,
this sort of definition, while disallowed during type checking,
should be \emph{allowed} during shaping. The reason that the
shape we abscribe to the signatures \verb|A1| and \verb|A2| are
equivalent to the shapes for these which should merge:

\begin{verbatim}
    signature A1(A(foo)) where
        data A = A { foo :: Int, bar :: Bool }

    signature A2(A(bar)) where
        data A = A { foo :: Int, bar :: Bool }
\end{verbatim}

\subsection{Subtyping record selectors as functions}

\begin{verbatim}
    signature H(A, foo) where
        data A
        foo :: A -> Int

    module M(A, foo) where
        data A = A { foo :: Int, bar :: Bool }
\end{verbatim}
%
Does \verb|M| successfully fill \verb|H|?  If so, it means that anywhere
a signature requests a function \verb|foo|, we can instead validly
provide a record selector.  This capability seems quite attractive,
although in practice record selectors rarely seem to be abstracted this
way: one reason is that \verb|M.foo| still \emph{is} a record selector,
and can be used to modify a record.  (Many library authors find this
surprising!)

Nor does this seem to be an insurmountable instance of the avoidance
problem:
as a workaround, \verb|H| can equivalently be written as:

\begin{verbatim}
    signature H(foo) where
        data A = A { foo :: Int, bar :: Bool }
\end{verbatim}
%
However, you might not like this, as the otherwise irrelevant \verb|bar| must be mentioned
in the definition.

In any case, actually implementing this `subtyping' is quite complicated, because we can no
longer assume that every child name is associated with a parent name.
The technical difficulty is that we now need to unify a plain identifier
\I{AvailInfo} (from the signature) with a type constructor \I{AvailInfo}
(from a module.)  It is not clear what this should mean.
Consider this situation:

\begin{verbatim}
    unit p where
        signature H(A, foo, bar) where
            data A
            foo :: A -> Int
            bar :: A -> Bool
        module X(A, foo) where
            import H
    unit q where
        include p
        signature H(bar) where
            data A = A { foo :: Int, bar :: Bool }
        module Y where
            import X(A(..)) -- ???
\end{verbatim}

Should the wildcard import on \verb|X| be allowed?
This question is equivalent to whether or not shaping discovers
whether or not a function is a record selector and propagates this
information elsewhere.
If the wildcard is not allowed, here is another situation:

\begin{verbatim}
    unit p where
        -- define without record selectors
        signature X1(A, foo) where
            data A
            foo :: A -> Int
        module M1(A, foo) where
            import X1

    unit q where
        -- define with record selectors (X1s unify)
        signature X1(A(..)) where
            data A = A { foo :: Int, bar :: Bool }
        signature X2(A(..)) where
            data A = A { foo :: Int, bar :: Bool }

        -- export some record selectors
        signature Y1(bar) where
            import X1
        signature Y2(bar) where
            import X2

    unit r where
        include p
        include q

        -- sharing constraint
        signature Y2(bar) where
            import Y1(bar)

        -- the payload
        module Test where
            import M1(foo)
            import X2(foo)
            ... foo ... -- conflict?
\end{verbatim}

Without the sharing constraint, the \verb|foo|s from \verb|M1| and \verb|X2|
should conflict.  With it, however, we should conclude that the \verb|foo|s
are the same, even though the \verb|foo| from \verb|M1| is \emph{not}
considered a child of \verb|A|, and even though in the sharing constraint
we \emph{only} unified \verb|bar| (and its parent \verb|A|).  To know that
\verb|foo| from \verb|M1| should also be unified, we have to know a bit
more about \verb|A| when the sharing constraint performs unification;
however, the \I{AvailInfo} will only tell us about what is in-scope, which
is \emph{not} enough information.

\subsection{Some examples}

\subsubsection{A simple example}

In the following set of units:

\begin{verbatim}
    unit p(M) requires (A) where
        signature A(T) where
            data T
        module M(T, S) where
            import A(T)
            data S = S T

    unit q where
        module A where
            data T = T
        include p
\end{verbatim}

When we \verb|include p|, we need to merge the partial shape
of \verb|q| (with just provides \verb|A|) with the shape
of \verb|p|.  Here is each step of the merging process:

\begin{verbatim}
          shape 1                       shape 2
          --------------------------------------------------------------------------------
(initial shapes)
provides: A -> THIS:A { q():A.T }       M -> p(A -> hole:A) { hole:A.T, p(A -> hole:A).S }
requires: (nothing)                     A ->                { hole:A.T }

(after filling requirements)
provides: A -> THIS:A { q():A.T }       M -> p(A -> THIS:A) { q():A.T, p(A -> THIS:A).S }
requires: (nothing)                     (nothing)

(after adding provides)
provides: A -> THIS:A         { q():A.T }
          M -> p(A -> THIS:A) { q():A.T, p(A -> THIS:A).S }
requires: (nothing)
\end{verbatim}

Notice that we substituted \verb|hole:A| with \verb|THIS:A|, but \verb|hole:A.T| with \verb|q():A.T|.

\subsubsection{Requirements merging can affect provisions}

When a merge results in a substitution, we substitute over both
requirements and provisions:

\begin{verbatim}
    signature H(T) where
        data T
    module A(T) where
        import H(T)
    module B(T) where
        data T = T

    -- provides: A -> THIS:A { hole:H.T }
    --           B -> THIS:B { THIS:B.T }
    -- requires: H ->        { hole:H.T }

    signature H(T, f) where
        import B(T)
        f :: a -> a

    -- provides: A -> THIS:A { THIS:B.T }           -- UPDATED
    --           B -> THIS:B { THIS:B.T }
    -- requires: H ->        { THIS:B.T, hole:H.f } -- UPDATED
\end{verbatim}

\subsubsection{Sharing constraints}

Suppose you have two signature which both independently define a type,
and you would like to assert that these two types are the same.  In the
ML world, such a constraint is known as a sharing constraint.  Sharing
constraints can be encoded in Backpacks via clever use of reexports;
they are also an instructive example for signature merging.

\begin{verbatim}
    signature A(T) where
        data T
    signature B(T) where
        data T

    -- requires: A -> { hole:A.T }
                 B -> { hole:B.T }

    -- the sharing constraint!
    signature A(T) where
        import B(T)
    -- (shape to merge)
    -- requires: A -> { hole:B.T }

    -- (after merge)
    -- requires: A -> { hole:A.T }
    --           B -> { hole:A.T }
\end{verbatim}
%
\Red{I'm pretty sure any choice of \textit{Name} is OK, since the
subsequent substitution will make it alpha-equivalent.}

\subsection{Export declarations}

If an explicit export declaration is given, the final shape is the
computed shape, minus any provisions not mentioned in the list, with the
appropriate renaming applied to provisions and requirements.  (Requirements
are implicitly passed through if they are not named.)
If no explicit export declaration is given, the final shape is
the computed shape, including only provisions which were defined
in the declarations of the unit.


\section{Cabal}


%  \I{InstalledUnitId} & ::= & \I{ComponentId} \verb|(| \, m \; \verb|->| \; \I{Module} \verb|,|\, \ldots\, \verb|)| & \mbox{Also known as \I{UnitId}} \\
%  \I{Module} & ::= & \I{InstalledUnitId} \verb|:| m \\

\paragraph{Indefinite versus installed units}
The purpose of an \I{ComponentId} is to uniquely identify the results of \textbf{typechecking}
an indefinite unit; whereas an \I{InstalledUnitId} uniquely identifies
the results of \textbf{compiling} a unit with all its holes
filled.  Thus, an \I{InstalledUnitId} also records a \emph{hole mapping}
which specifies how each hole was filled.  If an \I{InstalledUnitId}
is only partially filled, we may refer to it as a \I{UnitId} (as these
are never installed.)

\paragraph{Units versus packages}

Cabal packages are:

\begin{itemize}
    \item The unit of distribution
    \item The unit that Hackage handles
    \item The unit of versioning
    \item The unit of ownership (who maintains it etc) 
\end{itemize}

Backpack units are the building blocks of modular development;
there may be multiple units per a Cabal package.  While in theory
Cabal could do sophisticated things with multiple units in a
package, we expect Cabal
to pick a distinguished unit (with the same unit name $p$ as
the package) which serves as the publically visible unit.


%\newpage

\end{document} % chktex 16