summaryrefslogtreecommitdiff
path: root/lib/stdlib/doc/src/erl_tar.xml
blob: ace73372c24d45fd92ef91806ba54aba9301875d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
<?xml version="1.0" encoding="utf-8" ?>
<!DOCTYPE erlref SYSTEM "erlref.dtd">

<erlref>
  <header>
    <copyright>
      <year>2003</year><year>2020</year>
      <holder>Ericsson AB. All Rights Reserved.</holder>
    </copyright>
    <legalnotice>
      Licensed under the Apache License, Version 2.0 (the "License");
      you may not use this file except in compliance with the License.
      You may obtain a copy of the License at
 
          http://www.apache.org/licenses/LICENSE-2.0

      Unless required by applicable law or agreed to in writing, software
      distributed under the License is distributed on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      See the License for the specific language governing permissions and
      limitations under the License.
    
    </legalnotice>

    <title>erl_tar</title>
    <prepared>Bjorn Gustavsson</prepared>
    <responsible>Bjorn Gustavsson</responsible>
    <docno>1</docno>
    <approved>Kenneth Lundin</approved>
    <checked></checked>
    <date>2003-01-21</date>
    <rev>A</rev>
    <file>erl_tar.xml</file>
  </header>
  <module since="">erl_tar</module>
  <modulesummary>Unix 'tar' utility for reading and writing tar archives.
  </modulesummary>
  <description>
    <p>This module archives and extract files to and from
    a tar file. This module supports reading most common tar formats,
    namely v7, STAR, USTAR, and PAX, as well as some of GNU tar's extensions
    to the USTAR format (sparse files most notably). It produces tar archives
    in USTAR format, unless the files being archived require PAX format due to
    restrictions in USTAR (such as unicode metadata, filename length, and more).
    As such, <c>erl_tar</c> supports tar archives produced by most all modern
    tar utilities, and produces tarballs which should be similarly portable.</p>
    <p>By convention, the name of a tar file is to end in "<c>.tar</c>".
      To abide to the convention, add "<c>.tar</c>" to the name.</p>

    <p>Tar files can be created in one operation using function
      <seemfa marker="#create/2"><c>create/2</c></seemfa> or
      <seemfa marker="#create/3"><c>create/3</c></seemfa>.</p>

    <p>Alternatively, for more control, use functions
      <seemfa marker="#open/2"><c>open/2</c></seemfa>,
      <seemfa marker="#add/3"><c>add/3,4</c></seemfa>, and
      <seemfa marker="#close/1"><c>close/1</c></seemfa>.</p>

    <p>To extract all files from a tar file, use function
      <seemfa marker="#extract/1"><c>extract/1</c></seemfa>.
      To extract only some files or to be able to specify some more options,
      use function <seemfa marker="#extract/2"><c>extract/2</c></seemfa>.</p>

    <p>To return a list of the files in a tar file,
      use function <seemfa marker="#table/1"><c>table/1</c></seemfa> or
      <seemfa marker="#table/2"><c>table/2</c></seemfa>.
      To print a list of files to the Erlang shell,
      use function <seemfa marker="#t/1"><c>t/1</c></seemfa> or
      <seemfa marker="#tt/1"><c>tt/1</c></seemfa>.</p>

    <p>To convert an error term returned from one of the functions
      above to a readable message, use function
      <seemfa marker="#format_error/1"><c>format_error/1</c></seemfa>.</p>
  </description>

  <section>
    <title>Unicode Support</title>
    <p>If <seemfa marker="kernel:file#native_name_encoding/0">
      <c>file:native_name_encoding/0</c></seemfa>
      returns <c>utf8</c>, path names are encoded in UTF-8 when
      creating tar files, and path names are assumed to be encoded in
      UTF-8 when extracting tar files.</p>

    <p>If <seemfa marker="kernel:file#native_name_encoding/0">
      <c>file:native_name_encoding/0</c></seemfa>
      returns <c>latin1</c>, no translation of path names is done.</p>

    <p>Unicode metadata stored in PAX headers is preserved</p>
  </section>

  <section>
    <title>Other Storage Media</title>
    <p>The <seeerl marker="ftp:ftp"><c>ftp</c></seeerl>
      module normally accesses the tar file on disk using
      the <seeerl marker="kernel:file"><c>file</c></seeerl> module.
      When other needs arise, you can define your own low-level Erlang
      functions to perform the writing and reading on the storage media;
      use function <seemfa marker="#init/3"><c>init/3</c></seemfa>.</p>

    <p>An example of this is the SFTP support in
      <seemfa marker="ssh:ssh_sftp#open_tar/3">
      <c>ssh_sftp:open_tar/3</c></seemfa>. This function opens a tar file
      on a remote machine using an SFTP channel.</p>
  </section>

  <section>
    <title>Limitations</title>
    <list type="bulleted">
      <item>
        <p>If you must remain compatible with the USTAR tar format, you must ensure file paths being
        stored are less than 255 bytes in total, with a maximum filename component
        length of 100 bytes. USTAR uses a header field (prefix) in addition to the name field, and
        splits file paths longer than 100 bytes into two parts. This split is done on a directory boundary,
        and is done in such a way to make the best use of the space available in those two fields, but in practice
        this will often mean that you have less than 255 bytes for a path. <c>erl_tar</c> will
        automatically upgrade the format to PAX to handle longer filenames, so this is only an issue if you
        need to extract the archive with an older implementation of <c>erl_tar</c> or <c>tar</c> which does
        not support PAX. In this case, the PAX headers will be extracted as regular files, and you will need to
        apply them manually.</p>
      </item>
      <item>
        <p>Like the above, if you must remain USTAR compatible, you must also ensure than paths for
        symbolic/hard links are no more than 100 bytes, otherwise PAX headers will be used.</p>
      </item>
    </list>
  </section>

  <datatypes>
    <datatype>
      <name name="name_in_archive"/>
    </datatype>
    <datatype>
      <name name="open_type"/>
    </datatype>
    <datatype>
      <name name="tar_descriptor"/>
    </datatype>
  </datatypes>

  <funcs>
    <func>
      <name name="add" arity="3" since=""/>
      <name name="add" arity="4" since=""/>
      <fsummary>Add a file to an open tar file.</fsummary>
      <type name="add_type"/>
      <type name="add_opt"/>
      <desc>
        <p>Adds a file to a tar file that has been opened for writing by
          <seemfa marker="#open/2"><c>open/1</c></seemfa>.</p>
        <p><c>NameInArchive</c> is the name under which the file becomes
          stored in the tar file. The file gets this name when it is
          extracted from the tar file.</p>
        <p>Options:</p>
        <taglist>
          <tag><c>dereference</c></tag>
          <item>
            <p>By default, symbolic links are stored as symbolic links
              in the tar file. To override the default and store the file
              that the symbolic link points to into the tar file, use
              option <c>dereference</c>.</p>
          </item>
          <tag><c>verbose</c></tag>
          <item>
            <p>Prints an informational message about the added file.</p>
          </item>
          <tag><c>{chunks,ChunkSize}</c></tag>
          <item>
            <p>Reads data in parts from the file. This is intended for
              memory-limited machines that, for example, builds a tar file
              on a remote machine over SFTP, see
              <seemfa marker="ssh:ssh_sftp#open_tar/3">
              <c>ssh_sftp:open_tar/3</c></seemfa>.</p>
          </item>
          <tag><c>{atime,non_neg_integer()}</c></tag>
          <item>
            <p>Sets the last time, as
	            <seeguide marker="erts:time_correction#POSIX_Time">
              POSIX time</seeguide>, when the file was read. See also
              <seemfa marker="kernel:file#read_file_info/1">
              <c>file:read_file_info/1</c></seemfa>.</p>
          </item>
          <tag><c>{mtime,non_neg_integer()}</c></tag>
          <item>
            <p>Sets the last time, as
	            <seeguide marker="erts:time_correction#POSIX_Time">
              POSIX time</seeguide>, when the file was written. See also
              <seemfa marker="kernel:file#read_file_info/1">
              <c>file:read_file_info/1</c></seemfa>.</p>
          </item>
          <tag><c>{ctime,non_neg_integer()}</c></tag>
          <item>
            <p>Sets the time, as
	            <seeguide marker="erts:time_correction#POSIX_Time">
              POSIX time</seeguide>, when the file was created. See also
              <seemfa marker="kernel:file#read_file_info/1">
              <c>file:read_file_info/1</c></seemfa>.</p>
          </item>
          <tag><c>{uid,non_neg_integer()}</c></tag>
          <item>
            <p>Sets the file owner.
              <seemfa marker="kernel:file#read_file_info/1">
              <c>file:read_file_info/1</c></seemfa>.</p>
          </item>
          <tag><c>{gid,non_neg_integer()}</c></tag>
          <item>
            <p>Sets the group that the file owner belongs to.
              <seemfa marker="kernel:file#read_file_info/1">
              <c>file:read_file_info/1</c></seemfa>.</p>
          </item>
        </taglist>
      </desc>
    </func>

    <func>
      <name name="close" arity="1" since=""/>
      <fsummary>Close an open tar file.</fsummary>
      <desc>
        <p>Closes a tar file
          opened by <seemfa marker="#open/2"><c>open/2</c></seemfa>.</p>
      </desc>
    </func>

    <func>
      <name name="create" arity="2" since=""/>
      <fsummary>Create a tar archive.</fsummary>
      <type name="filelist"/>
      <desc>
        <p>Creates a tar file and archives the files whose names are specified
          in <c>FileList</c> into it. The files can either be read from disk
          or be specified as binaries.</p>
      </desc>
    </func>

    <func>
      <name name="create" arity="3" since=""/>
      <fsummary>Create a tar archive with options.</fsummary>
      <type name="filelist"/>
      <type name="create_opt"/>
      <desc>
        <p>Creates a tar file and archives the files whose names are specified
          in <c>FileList</c> into it. The files can either be read from disk
          or be specified as binaries.</p>
        <p>The options in <c>OptionList</c> modify the defaults as follows:</p>
        <taglist>
          <tag><c>compressed</c></tag>
          <item>
            <p>The entire tar file is compressed, as if it has
              been run through the <c>gzip</c> program. To abide to the
              convention that a compressed tar file is to end in
              "<c>.tar.gz</c>" or "<c>.tgz</c>", add the appropriate
              extension.</p>
          </item>
          <tag><c>cooked</c></tag>
          <item>
            <p>By default, function <c>open/2</c> opens the tar file in
              <c>raw</c> mode, which is faster but does not allow a remote
              (Erlang) file server to be used. Adding <c>cooked</c> to the
              mode list overrides the default and opens the tar file without
              option <c>raw</c>.</p>
          </item>
          <tag><c>dereference</c></tag>
          <item>
            <p>By default, symbolic links are stored as symbolic links in
              the tar file. To override the default and store the file that
              the symbolic link points to into the tar file, use
              option <c>dereference</c>.</p>
          </item>
          <tag><c>verbose</c></tag>
          <item>
            <p>Prints an informational message about each added file.</p>
          </item>
        </taglist>
      </desc>
    </func>

    <func>
      <name name="extract" arity="1" since=""/>
      <fsummary>Extract all files from a tar file.</fsummary>
      <desc>
        <p>Extracts all files from a tar archive.</p>
        <p>If argument <c>Name</c> is specified as <c>{binary,Binary}</c>,
          the contents of the binary is assumed to be a tar archive.</p>
        <p>If argument <c>Name</c> is specified as <c>{file,Fd}</c>,
          <c>Fd</c> is assumed to be a file descriptor returned from function
          <c>file:open/2</c>.</p>
        <p>Otherwise, <c>Name</c> is to be a filename.</p>
	<note><p>Leading slashes in tar member names will be removed before
	writing the file. That is, absolute paths will be turned into
	relative paths. There will be an info message written to the error
	logger when paths are changed in this way.</p></note>
        <warning>
          <p>The <c>compressed</c> and <c>cooked</c> flags are invalid when
            passing a file descriptor with <c>{file,Fd}</c>. The file is
            assumed to have been opened with the appropriate flags.</p>
        </warning>
      </desc>
    </func>

    <func>
      <name name="extract" arity="2" since=""/>
      <fsummary>Extract files from a tar file.</fsummary>
      <type name="extract_opt"/>
      <desc>
        <p>Extracts files from a tar archive.</p>
        <p>If argument <c>Name</c> is specified as <c>{binary,Binary}</c>,
          the contents of the binary is assumed to be a tar archive.</p>
        <p>If argument <c>Name</c> is specified as <c>{file,Fd}</c>,
          <c>Fd</c> is assumed to be a file descriptor returned from function
          <c>file:open/2</c>.</p>
        <p>Otherwise, <c>Name</c> is to be a filename.</p>
        <p>The following options modify the defaults for the extraction as
          follows:</p>
        <taglist>
          <tag><c>{cwd,Cwd}</c></tag>
          <item>
            <p>Files with relative filenames are by default extracted
              to the current working directory. With this option, files are
              instead extracted into directory <c>Cwd</c>.</p>
          </item>
          <tag><c>{files,FileList}</c></tag>
          <item>
            <p>By default, all files are extracted from the tar file. With
              this option, only those files are extracted whose names are
              included in <c>FileList</c>.</p>
          </item>
          <tag><c>compressed</c></tag>
          <item>
            <p>With this option, the file is uncompressed while extracting.
              If the tar file is not compressed, this option is ignored.</p>
          </item>
          <tag><c>cooked</c></tag>
          <item>
            <p>By default, function <c>open/2</c> function opens the tar file
              in <c>raw</c> mode, which is faster but does not allow a remote
              (Erlang) file server to be used. Adding <c>cooked</c> to the mode
              list overrides the default and opens the tar file without option
              <c>raw</c>.</p>
          </item>
          <tag><c>memory</c></tag>
          <item>
            <p>Instead of extracting to a directory, this option gives the
	      result as a list of tuples <c>{Filename, Binary}</c>, where
	      <c>Binary</c> is a binary containing the extracted data of the
              file named <c>Filename</c> in the tar file.</p>
          </item>
          <tag><c>keep_old_files</c></tag>
          <item>
            <p>By default, all existing files with the same name as files in
              the tar file are overwritten. With this option, existing
              files are not overwriten.</p>
          </item>
          <tag><c>verbose</c></tag>
          <item>
            <p>Prints an informational message for each extracted file.</p>
          </item>
        </taglist>
        <warning>
          <p>The <c>compressed</c> and <c>cooked</c> flags are invalid when
            passing a file descriptor with <c>{file,Fd}</c>. The file is
            assumed to have been opened with the appropriate flags.</p>
        </warning>
      </desc>
    </func>

    <func>
      <name name="format_error" arity="1" since=""/>
      <fsummary>Convert error term to a readable string.</fsummary>
      <desc>
        <p>Converts an error reason term to a human-readable error message
          string.</p>
      </desc>
    </func>

    <func>
      <name name="init" arity="3" since="OTP 17.4"/>
      <fsummary>Create a <c>TarDescriptor</c> used in subsequent tar operations
        when defining own low-level storage access functions.</fsummary>
      <type name="user_data"/>
      <type name="file_op"/>
      <desc>
        <p>The <c>Fun</c> is the definition of what to do when the different
          storage operations functions are to be called from the higher tar
          handling functions (such as <c>add/3</c>, <c>add/4</c>, and
          <c>close/1</c>).</p>
        <p>The <c>Fun</c> is called when the tar function wants to do a
          low-level operation, like writing a block to a file. The <c>Fun</c>
          is called as <c>Fun(Op, {UserData,Parameters...})</c>, where
          <c>Op</c> is the operation name, <c>UserData</c> is the term
          passed as the first argument to <c>init/1</c> and
          <c>Parameters...</c> are the data added by the tar function to be
          passed down to the storage handling function.</p>
        <p>Parameter <c>UserData</c> is typically the result of opening a
          low-level structure like a file descriptor or an SFTP channel id.
          The different <c>Fun</c> clauses operate on that very term.</p>
        <p>The following are the fun clauses parameter lists:</p>
        <taglist>
          <tag><c>(write, {UserData,DataToWrite})</c></tag>
          <item>
            <p>Writes term <c>DataToWrite</c> using <c>UserData</c>.</p>
          </item>
          <tag><c>(close, UserData)</c></tag>
          <item>
            <p>Closes the access.</p>
          </item>
          <tag><c>(read2, {UserData,Size})</c></tag>
          <item>
            <p>Reads using <c>UserData</c> but only <c>Size</c> bytes.
            Notice that there is only an arity-2 read function, not an arity-1
            function.</p>
          </item>
          <tag><c>(position,{UserData,Position})</c></tag>
          <item>
            <p>Sets the position of <c>UserData</c> as defined for files in
              <seemfa marker="kernel:file#position/2">
              <c>file:position/2</c></seemfa></p>
          </item>
        </taglist>
        <p><em>Example:</em></p>
        <p>The following is a complete <c>Fun</c> parameter for reading and
          writing on files using the
          <seeerl marker="kernel:file"><c>file</c></seeerl> module:</p>
        <code type="none">
ExampleFun = 
   fun(write, {Fd,Data}) ->  file:write(Fd, Data);
      (position, {Fd,Pos}) -> file:position(Fd, Pos);
      (read2, {Fd,Size}) -> file:read(Fd, Size);
      (close, Fd) -> file:close(Fd)
   end</code>
        <p>Here <c>Fd</c> was specified to function <c>init/3</c> as:</p>
        <code>
{ok,Fd} = file:open(Name, ...).
{ok,TarDesc} = erl_tar:init(Fd, [write], ExampleFun),</code>
        <p><c>TarDesc</c> is then used:</p>
        <code>
erl_tar:add(TarDesc, SomeValueIwantToAdd, FileNameInTarFile),
...,
erl_tar:close(TarDesc)</code>
        <p>When the <c>erl_tar</c> core wants to, for example, write a piece
          of <c>Data</c>, it would call
          <c>ExampleFun(write, {UserData,Data})</c>.</p>
        <note>
          <p>This example with the <c>file</c> module operations is
            not necessary to use directly, as that is what function
            <seemfa marker="#open/2"><c>open/2</c></seemfa> in principle
            does.</p>
        </note>
        <warning>
          <p>The <c>TarDescriptor</c> term is not a file descriptor. You are
            advised not to rely on the specific contents of this term, as it
            can change in future Erlang/OTP releases when more features are
            added to this module.</p>
        </warning>
      </desc>
    </func>

    <func>
      <name name="open" arity="2" since=""/>
      <fsummary>Open a tar file for writing.</fsummary>
      <desc>
        <p>Creates a tar file for writing (any existing file with the same
          name is truncated).</p>
        <p>By convention, the name of a tar file is to end in "<c>.tar</c>".
          To abide to the convention, add "<c>.tar</c>" to the name.</p>
        <p>Except for the <c>write</c> atom, the following atoms
          can be added to <c>OpenModeList</c>:</p>
        <taglist>
          <tag><c>compressed</c></tag>
          <item>
            <p>The entire tar file is compressed, as if it has been run
              through the <c>gzip</c> program. To abide to the convention
              that a compressed tar file is to end in "<c>.tar.gz</c>" or
              "<c>.tgz</c>", add the appropriate extension.</p>
          </item>
          <tag><c>cooked</c></tag>
          <item>
            <p>By default, the tar file is opened in <c>raw</c> mode, which is
              faster but does not allow a remote (Erlang) file server to be
              used. Adding <c>cooked</c> to the mode list overrides the
              default and opens the tar file without option <c>raw</c>.</p>
          </item>
        </taglist>
        <p>To add one file at the time into an opened tar file, use function
          <seemfa marker="#add/3"><c>add/3,4</c></seemfa>. When you are
          finished adding files, use function <seemfa marker="#close/1">
          <c>close/1</c></seemfa> to close the tar file.</p>
        <warning>
          <p>The <c>compressed</c> and <c>cooked</c> flags are invalid when
            passing a file descriptor with <c>{file,Fd}</c>. The file must
            already be opened with the appropriate flags.</p>
        </warning>
        <warning>
          <p>The <c>TarDescriptor</c> term is not a file descriptor. You are
            advised not to rely on the specific contents of this term, as it
            can change in future Erlang/OTP releases when more features are
            added to this module.</p>
        </warning>
      </desc>
    </func>

    <func>
      <name name="table" arity="1" since=""/>
      <name name="table" arity="2" since=""/>
      <fsummary>Retrieve name and information of all files in a tar file.
      </fsummary>
      <type name="tar_entry"/>
      <type name="tar_time"/>
      <type name="typeflag"/>
      <type name="mode"/>
      <type name="uid"/>
      <type name="gid"/>
      <desc>
        <p>Retrieves the names of all files in the tar file <c>Name</c>.</p>
      </desc>
    </func>

    <func>
      <name name="t" arity="1" since=""/>
      <fsummary>Print the name of each file in a tar file.</fsummary>
      <desc>
        <p>Prints the names of all files in the tar file <c>Name</c> to the
          Erlang shell (similar to "<c>tar&nbsp;t</c>").</p>
      </desc>
    </func>

    <func>
      <name name="tt" arity="1" since=""/>
      <fsummary>Print name and information for each file in a tar file.
      </fsummary>
      <desc>
        <p>Prints names and information about all files in the tar file
          <c>Name</c> to the Erlang shell (similar to "<c>tar&nbsp;tv</c>").</p>
      </desc>
    </func>
  </funcs>
</erlref>