_released_docs/r3.8.1/apidocs/zookeeper-jute/org/apache/jute/package-summary.html


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (1.8.0_352) on Wed Jan 25 17:31:30 CET 2023 -->
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<title>org.apache.jute (Apache ZooKeeper - Jute 3.8.1 API)</title>
<meta name="date" content="2023-01-25">
<link rel="stylesheet" type="text/css" href="../../../stylesheet.css" title="Style">
<script type="text/javascript" src="../../../script.js"></script>
</head>
<body>
<script type="text/javascript"><!--
    try {
        if (location.href.indexOf('is-external=true') == -1) {
            parent.document.title="org.apache.jute (Apache ZooKeeper - Jute 3.8.1 API)";
        }
    }
    catch(err) {
    }
//-->
</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
<!-- ========= START OF TOP NAVBAR ======= -->
<div class="topNav"><a name="navbar.top">
<!--   -->
</a>
<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.top.firstrow">
<!--   -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../overview-summary.html">Overview</a></li>
<li class="navBarCell1Rev">Package</li>
<li>Class</li>
<li><a href="package-use.html">Use</a></li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../index-all.html">Index</a></li>
<li><a href="../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li>Prev&nbsp;Package</li>
<li><a href="../../../org/apache/jute/compiler/package-summary.html">Next&nbsp;Package</a></li>
</ul>
<ul class="navList">
<li><a href="../../../index.html?org/apache/jute/package-summary.html" target="_top">Frames</a></li>
<li><a href="package-summary.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_top">
<li><a href="../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
  allClassesLink = document.getElementById("allclasses_navbar_top");
  if(window==top) {
    allClassesLink.style.display = "block";
  }
  else {
    allClassesLink.style.display = "none";
  }
  //-->
</script>
</div>
<a name="skip.navbar.top">
<!--   -->
</a></div>
<!-- ========= END OF TOP NAVBAR ========= -->
<div class="header">
<h1 title="Package" class="title">Package&nbsp;org.apache.jute</h1>
<div class="docSummary">
<div class="block">Hadoop record I/O contains classes and a record description language
  translator for simplifying serialization and deserialization of records in a
  language-neutral manner.</div>
</div>
<p>See:&nbsp;<a href="#package.description">Description</a></p>
</div>
<div class="contentContainer">
<ul class="blockList">
<li class="blockList">
<table class="typeSummary" border="0" cellpadding="3" cellspacing="0" summary="Interface Summary table, listing interfaces, and an explanation">
<caption><span>Interface Summary</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colFirst" scope="col">Interface</th>
<th class="colLast" scope="col">Description</th>
</tr>
<tbody>
<tr class="altColor">
<td class="colFirst"><a href="../../../org/apache/jute/Index.html" title="interface in org.apache.jute">Index</a></td>
<td class="colLast">
<div class="block">Interface that acts as an iterator for deserializing maps.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><a href="../../../org/apache/jute/InputArchive.html" title="interface in org.apache.jute">InputArchive</a></td>
<td class="colLast">
<div class="block">Interface that all the Deserializers have to implement.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><a href="../../../org/apache/jute/OutputArchive.html" title="interface in org.apache.jute">OutputArchive</a></td>
<td class="colLast">
<div class="block">Interface that all the serializers have to implement.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><a href="../../../org/apache/jute/Record.html" title="interface in org.apache.jute">Record</a></td>
<td class="colLast">
<div class="block">Interface that is implemented by generated classes.</div>
</td>
</tr>
</tbody>
</table>
</li>
<li class="blockList">
<table class="typeSummary" border="0" cellpadding="3" cellspacing="0" summary="Class Summary table, listing classes, and an explanation">
<caption><span>Class Summary</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colFirst" scope="col">Class</th>
<th class="colLast" scope="col">Description</th>
</tr>
<tbody>
<tr class="altColor">
<td class="colFirst"><a href="../../../org/apache/jute/BinaryInputArchive.html" title="class in org.apache.jute">BinaryInputArchive</a></td>
<td class="colLast">&nbsp;</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><a href="../../../org/apache/jute/BinaryOutputArchive.html" title="class in org.apache.jute">BinaryOutputArchive</a></td>
<td class="colLast">&nbsp;</td>
</tr>
<tr class="altColor">
<td class="colFirst"><a href="../../../org/apache/jute/RecordReader.html" title="class in org.apache.jute">RecordReader</a></td>
<td class="colLast">
<div class="block">Front-end interface to deserializers.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><a href="../../../org/apache/jute/RecordWriter.html" title="class in org.apache.jute">RecordWriter</a></td>
<td class="colLast">
<div class="block">Front-end for serializers.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><a href="../../../org/apache/jute/ToStringOutputArchive.html" title="class in org.apache.jute">ToStringOutputArchive</a></td>
<td class="colLast">&nbsp;</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><a href="../../../org/apache/jute/Utils.html" title="class in org.apache.jute">Utils</a></td>
<td class="colLast">
<div class="block">Various utility functions for Hadoop record I/O runtime.</div>
</td>
</tr>
</tbody>
</table>
</li>
</ul>
<a name="package.description">
<!--   -->
</a>
<h2 title="Package org.apache.jute Description">Package org.apache.jute Description</h2>
<div class="block">Hadoop record I/O contains classes and a record description language
  translator for simplifying serialization and deserialization of records in a
  language-neutral manner.
  
  <h2>Introduction</h2>
  
  Software systems of any significant complexity require mechanisms for data 
interchange with the outside world. These interchanges typically involve the
marshaling and unmarshaling of logical units of data to and from data streams
(files, network connections, memory buffers etc.). Applications usually have
some code for serializing and deserializing the data types that they manipulate
embedded in them. The work of serialization has several features that make
automatic code generation for it worthwhile. Given a particular output encoding
(binary, XML, etc.), serialization of primitive types and simple compositions
of primitives (structs, vectors etc.) is a very mechanical task. Manually
written serialization code can be susceptible to bugs especially when records
have a large number of fields or a record definition changes between software
versions. Lastly, it can be very useful for applications written in different
programming languages to be able to share and interchange data. This can be 
made a lot easier by describing the data records manipulated by these
applications in a language agnostic manner and using the descriptions to derive
implementations of serialization in multiple target languages. 

This document describes Hadoop Record I/O, a mechanism that is aimed 
at
<ul> 
<li> enabling the specification of simple serializable data types (records) 
<li> enabling the generation of code in multiple target languages for
marshaling and unmarshaling such types
<li> providing target language specific support that will enable application 
programmers to incorporate generated code into their applications
</ul>

The goals of Hadoop Record I/O are similar to those of mechanisms such as XDR,
ASN.1, PADS and ICE. While these systems all include a DDL that enables
the specification of most record types, they differ widely in what else they
focus on. The focus in Hadoop Record I/O is on data marshaling and
multi-lingual support.  We take a translator-based approach to serialization.
Hadoop users have to describe their data in a simple data description
language. The Hadoop DDL translator rcc generates code that users
can invoke in order to read/write their data from/to simple stream 
abstractions. Next we list explicitly some of the goals and non-goals of
Hadoop Record I/O.


<h3>Goals</h3>

<ul>
<li> Support for commonly used primitive types. Hadoop should include as
primitives commonly used builtin types from programming languages we intend to
support.

<li> Support for common data compositions (including recursive compositions).
Hadoop should support widely used composite types such as structs and
vectors.

<li> Code generation in multiple target languages. Hadoop should be capable of
generating serialization code in multiple target languages and should be
easily extensible to new target languages. The initial target languages are
C++ and Java.

<li> Support for generated target languages. Hadooop should include support
in the form of headers, libraries, packages for supported target languages 
that enable easy inclusion and use of generated code in applications.

<li> Support for multiple output encodings. Candidates include
packed binary, comma-separated text, XML etc.

<li> Support for specifying record types in a backwards/forwards compatible
manner. This will probably be in the form of support for optional fields in
records. This version of the document does not include a description of the
planned mechanism, we intend to include it in the next iteration.

</ul>

<h3>Non-Goals</h3>

<ul>
  <li> Serializing existing arbitrary C++ classes.
  <li> Serializing complex data structures such as trees, linked lists etc.
  <li> Built-in indexing schemes, compression, or check-sums.
  <li> Dynamic construction of objects from an XML schema.
</ul>

The remainder of this document describes the features of Hadoop record I/O
in more detail. Section 2 describes the data types supported by the system.
Section 3 lays out the DDL syntax with some examples of simple records. 
Section 4 describes the process of code generation with rcc. Section 5
describes target language mappings and support for Hadoop types. We include a
fairly complete description of C++ mappings with intent to include Java and
others in upcoming iterations of this document. The last section talks about
supported output encodings.


<h2>Data Types and Streams</h2>

This section describes the primitive and composite types supported by Hadoop.
We aim to support a set of types that can be used to simply and efficiently
express a wide range of record types in different programming languages.

<h3>Primitive Types</h3>

For the most part, the primitive types of Hadoop map directly to primitive
types in high level programming languages. Special cases are the
ustring (a Unicode string) and buffer types, which we believe
find wide use and which are usually implemented in library code and not
available as language built-ins. Hadoop also supplies these via library code
when a target language built-in is not present and there is no widely
adopted "standard" implementation. The complete list of primitive types is:

<ul>
  <li> byte: An 8-bit unsigned integer.
  <li> boolean: A boolean value.
  <li> int: A 32-bit signed integer.
  <li> long: A 64-bit signed integer.
  <li> float: A single precision floating point number as described by
    IEEE-754.
  <li> double: A double precision floating point number as described by
    IEEE-754.
  <li> ustring: A string consisting of Unicode characters.
  <li> buffer: An arbitrary sequence of bytes. 
</ul>


<h3>Composite Types</h3>
Hadoop supports a small set of composite types that enable the description
of simple aggregate types and containers. A composite type is serialized
by sequentially serializing it constituent elements. The supported
composite types are:

<ul>

  <li> record: An aggregate type like a C-struct. This is a list of
typed fields that are together considered a single unit of data. A record
is serialized by sequentially serializing its constituent fields. In addition
to serialization a record has comparison operations (equality and less-than)
implemented for it, these are defined as memberwise comparisons.

  <li>vector: A sequence of entries of the same data type, primitive
or composite.

  <li> map: An associative container mapping instances of a key type to
instances of a value type. The key and value types may themselves be primitive
or composite types. 

</ul>

<h3>Streams</h3>

Hadoop generates code for serializing and deserializing record types to
abstract streams. For each target language Hadoop defines very simple input
and output stream interfaces. Application writers can usually develop
concrete implementations of these by putting a one method wrapper around
an existing stream implementation.


<h2>DDL Syntax and Examples</h2>

We now describe the syntax of the Hadoop data description language. This is
followed by a few examples of DDL usage.
 
<h3>Hadoop DDL Syntax</h3>

<pre><code>
recfile = *include module *record
include = "include" path
path = (relative-path / absolute-path)
module = "module" module-name
module-name = name *("." name)
record := "class" name "{" 1*(field) "}"
field := type name ";"
name :=  ALPHA (ALPHA / DIGIT / "_" )*
type := (ptype / ctype)
ptype := ("byte" / "boolean" / "int" |
          "long" / "float" / "double"
          "ustring" / "buffer")
ctype := (("vector" "&lt;" type "&gt;") /
          ("map" "&lt;" type "," type "&gt;" ) ) / name)
</code></pre>

A DDL file describes one or more record types. It begins with zero or
more include declarations, a single mandatory module declaration
followed by zero or more class declarations. The semantics of each of
these declarations are described below:

<ul>

<li>include: An include declaration specifies a DDL file to be
referenced when generating code for types in the current DDL file. Record types
in the current compilation unit may refer to types in all included files.
File inclusion is recursive. An include does not trigger code
generation for the referenced file.

<li> module: Every Hadoop DDL file must have a single module
declaration that follows the list of includes and precedes all record
declarations. A module declaration identifies a scope within which
the names of all types in the current file are visible. Module names are
mapped to C++ namespaces, Java packages etc. in generated code.

<li> class: Records types are specified through class
declarations. A class declaration is like a Java class declaration.
It specifies a named record type and a list of fields that constitute records
of the type. Usage is illustrated in the following examples.

</ul>

<h3>Examples</h3>

<ul>
<li>A simple DDL file links.jr with just one record declaration. 
<pre><code>
module links {
    class Link {
        ustring URL;
        boolean isRelative;
        ustring anchorText;
    };
}
</code></pre>

<li> A DDL file outlinks.jr which includes another
<pre><code>
include "links.jr"

module outlinks {
    class OutLinks {
        ustring baseURL;
        vector&lt;links.Link&gt; outLinks;
    };
}
</code></pre>
</ul>

<h2>Code Generation</h2>

The Hadoop translator is written in Java. Invocation is done by executing a 
wrapper shell script named named rcc. It takes a list of
record description files as a mandatory argument and an
optional language argument (the default is Java) --language or
-l. Thus a typical invocation would look like:
<pre><code>
$ rcc -l C++ &lt;filename&gt; ...
</code></pre>


<h2>Target Language Mappings and Support</h2>

For all target languages, the unit of code generation is a record type. 
For each record type, Hadoop generates code for serialization and
deserialization, record comparison and access to record members.

<h3>C++</h3>

Support for including Hadoop generated C++ code in applications comes in the
form of a header file recordio.hh which needs to be included in source
that uses Hadoop types and a library librecordio.a which applications need
to be linked with. The header declares the Hadoop C++ namespace which defines
appropriate types for the various primitives, the basic interfaces for
records and streams and enumerates the supported serialization encodings.
Declarations of these interfaces and a description of their semantics follow:

<pre><code>
namespace hadoop {

  enum RecFormat { kBinary };

  class InStream {
  public:
    virtual ssize_t read(void *buf, size_t n) = 0;
  };

  class OutStream {
  public:
    virtual ssize_t write(const void *buf, size_t n) = 0;
  };

  class IOError : public runtime_error {
  public:
    explicit IOError(const std::string&amp; msg);
  };

  class IArchive;
  class OArchive;

  class RecordReader {
  public:
    RecordReader(InStream&amp; in, RecFormat fmt);
    virtual ~RecordReader(void);

    virtual void read(Record&amp; rec);
  };

  class RecordWriter {
  public:
    RecordWriter(OutStream&amp; out, RecFormat fmt);
    virtual ~RecordWriter(void);

    virtual void write(Record&amp; rec);
  };


  class Record {
  public:
    virtual std::string type(void) const = 0;
    virtual std::string signature(void) const = 0;
  protected:
    virtual bool validate(void) const = 0;

    virtual void
    serialize(OArchive&amp; oa, const std::string&amp; tag) const = 0;

    virtual void
    deserialize(IArchive&amp; ia, const std::string&amp; tag) = 0;
  };
}
</code></pre>

<ul>

<li> RecFormat: An enumeration of the serialization encodings supported
by this implementation of Hadoop.

<li> InStream: A simple abstraction for an input stream. This has a 
single public read method that reads n bytes from the stream into
the buffer buf. Has the same semantics as a blocking read system
call. Returns the number of bytes read or -1 if an error occurs.

<li> OutStream: A simple abstraction for an output stream. This has a 
single write method that writes n bytes to the stream from the
buffer buf. Has the same semantics as a blocking write system
call. Returns the number of bytes written or -1 if an error occurs.

<li> RecordReader: A RecordReader reads records one at a time from
an underlying stream in a specified record format. The reader is instantiated
with a stream and a serialization format. It has a read method that
takes an instance of a record and deserializes the record from the stream.

<li> RecordWriter: A RecordWriter writes records one at a
time to an underlying stream in a specified record format. The writer is
instantiated with a stream and a serialization format. It has a
write method that takes an instance of a record and serializes the
record to the stream.

<li> Record: The base class for all generated record types. This has two
public methods type and signature that return the typename and the
type signature of the record.

</ul>

Two files are generated for each record file (note: not for each record). If a
record file is named "name.jr", the generated files are 
"name.jr.cc" and "name.jr.hh" containing serialization 
implementations and record type declarations respectively.

For each record in the DDL file, the generated header file will contain a
class definition corresponding to the record type, method definitions for the
generated type will be present in the '.cc' file.  The generated class will
inherit from the abstract class hadoop::Record. The DDL files
module declaration determines the namespace the record belongs to.
Each '.' delimited token in the module declaration results in the
creation of a namespace. For instance, the declaration module docs.links
results in the creation of a docs namespace and a nested 
docs::links namespace. In the preceding examples, the Link class
is placed in the links namespace. The header file corresponding to
the links.jr file will contain:

<pre><code>
namespace links {
  class Link : public hadoop::Record {
    // ....
  };
};
</code></pre>

Each field within the record will cause the generation of a private member
declaration of the appropriate type in the class declaration, and one or more
acccessor methods. The generated class will implement the serialize and
deserialize methods defined in hadoop::Record+. It will also 
implement the inspection methods type and signature from
hadoop::Record. A default constructor and virtual destructor will also
be generated. Serialization code will read/write records into streams that
implement the hadoop::InStream and the hadoop::OutStream interfaces.

For each member of a record an accessor method is generated that returns 
either the member or a reference to the member. For members that are returned 
by value, a setter method is also generated. This is true for primitive 
data members of the types byte, int, long, boolean, float and 
double. For example, for a int field called MyField the following
code is generated.

<pre><code>
...
private:
  int32_t mMyField;
  ...
public:
  int32_t getMyField(void) const {
    return mMyField;
  };

  void setMyField(int32_t m) {
    mMyField = m;
  };
  ...
</code></pre>

For a ustring or buffer or composite field. The generated code
only contains accessors that return a reference to the field. A const
and a non-const accessor are generated. For example:

<pre><code>
...
private:
  std::string mMyBuf;
  ...
public:

  std::string&amp; getMyBuf() {
    return mMyBuf;
  };

  const std::string&amp; getMyBuf() const {
    return mMyBuf;
  };
  ...
</code></pre>

<h4>Examples</h4>

Suppose the inclrec.jr file contains:
<pre><code>
module inclrec {
    class RI {
        int      I32;
        double   D;
        ustring  S;
    };
}
</code></pre>

and the testrec.jr file contains:

<pre><code>
include "inclrec.jr"
module testrec {
    class R {
        vector&lt;float&gt; VF;
        RI            Rec;
        buffer        Buf;
    };
}
</code></pre>

Then the invocation of rcc such as:
<pre><code>
$ rcc -l c++ inclrec.jr testrec.jr
</code></pre>
will result in generation of four files:
inclrec.jr.{cc,hh} and testrec.jr.{cc,hh}.

The inclrec.jr.hh will contain:

<pre><code>
#ifndef _INCLREC_JR_HH_
#define _INCLREC_JR_HH_

#include "recordio.hh"

namespace inclrec {
  
  class RI : public hadoop::Record {

  private:

    int32_t      mI32;
    double       mD;
    std::string  mS;

  public:

    RI(void);
    virtual ~RI(void);

    virtual bool operator==(const RI&amp; peer) const;
    virtual bool operator&lt;(const RI&amp; peer) const;

    virtual int32_t getI32(void) const { return mI32; }
    virtual void setI32(int32_t v) { mI32 = v; }

    virtual double getD(void) const { return mD; }
    virtual void setD(double v) { mD = v; }

    virtual std::string&amp; getS(void) const { return mS; }
    virtual const std::string&amp; getS(void) const { return mS; }

    virtual std::string type(void) const;
    virtual std::string signature(void) const;

  protected:

    virtual void serialize(hadoop::OArchive&amp; a) const;
    virtual void deserialize(hadoop::IArchive&amp; a);

    virtual bool validate(void);
  };
} // end namespace inclrec

#endif /* _INCLREC_JR_HH_ */

</code></pre>

The testrec.jr.hh file will contain:


<pre><code>

#ifndef _TESTREC_JR_HH_
#define _TESTREC_JR_HH_

#include "inclrec.jr.hh"

namespace testrec {
  class R : public hadoop::Record {

  private:

    std::vector&lt;float&gt; mVF;
    inclrec::RI        mRec;
    std::string        mBuf;

  public:

    R(void);
    virtual ~R(void);

    virtual bool operator==(const R&amp; peer) const;
    virtual bool operator&lt;(const R&amp; peer) const;

    virtual std::vector&lt;float&gt;&amp; getVF(void) const;
    virtual const std::vector&lt;float&gt;&amp; getVF(void) const;

    virtual std::string&amp; getBuf(void) const ;
    virtual const std::string&amp; getBuf(void) const;

    virtual inclrec::RI&amp; getRec(void) const;
    virtual const inclrec::RI&amp; getRec(void) const;
    
    virtual bool serialize(hadoop::OutArchive&amp; a) const;
    virtual bool deserialize(hadoop::InArchive&amp; a);
    
    virtual std::string type(void) const;
    virtual std::string signature(void) const;
  };
}; // end namespace testrec
#endif /* _TESTREC_JR_HH_ */

</code></pre>

<h3>Java</h3>

Code generation for Java is similar to that for C++. A Java class is generated
for each record type with private members corresponding to the fields. Getters
and setters for fields are also generated. Some differences arise in the
way comparison is expressed and in the mapping of modules to packages and
classes to files. For equality testing, an equals method is generated
for each record type. As per Java requirements a hashCode method is also
generated. For comparison a compareTo method is generated for each
record type. This has the semantics as defined by the Java Comparable
interface, that is, the method returns a negative integer, zero, or a positive
integer as the invoked object is less than, equal to, or greater than the
comparison parameter.

A .java file is generated per record type as opposed to per DDL
file as in C++. The module declaration translates to a Java
package declaration. The module name maps to an identical Java package
name. In addition to this mapping, the DDL compiler creates the appropriate
directory hierarchy for the package and places the generated .java
files in the correct directories.

<h2>Mapping Summary</h2>

<pre><code>
DDL Type        C++ Type            Java Type 

boolean         bool                boolean
byte            int8_t              byte
int             int32_t             int
long            int64_t             long
float           float               float
double          double              double
ustring         std::string         Text
buffer          std::string         java.io.ByteArrayOutputStream
class type      class type          class type
vector&lt;type&gt;    std::vector&lt;type&gt;   java.util.ArrayList
map&lt;type,type&gt;  std::map&lt;type,type&gt; java.util.TreeMap
</code></pre>

<h2>Data encodings</h2>

This section describes the format of the data encodings supported by Hadoop.
Currently, one data encoding is supported, namely binary.

<h3>Binary Serialization Format</h3>

The binary data encoding format is fairly dense. Serialization of composite
types is simply defined as a concatenation of serializations of the constituent
elements (lengths are included in vectors and maps).

Composite types are serialized as follows:
<ul>
<li> class: Sequence of serialized members.
<li> vector: The number of elements serialized as an int. Followed by a
sequence of serialized elements.
<li> map: The number of key value pairs serialized as an int. Followed
by a sequence of serialized (key,value) pairs.
</ul>

Serialization of primitives is more interesting, with a zero compression
optimization for integral types and normalization to UTF-8 for strings. 
Primitive types are serialized as follows:

<ul>
<li> byte: Represented by 1 byte, as is.
<li> boolean: Represented by 1-byte (0 or 1)
<li> int/long: Integers and longs are serialized zero compressed.
Represented as 1-byte if -120 &lt;= value &lt; 128. Otherwise, serialized as a
sequence of 2-5 bytes for ints, 2-9 bytes for longs. The first byte represents
the number of trailing bytes, N, as the negative number (-120-N). For example,
the number 1024 (0x400) is represented by the byte sequence 'x86 x04 x00'.
This doesn't help much for 4-byte integers but does a reasonably good job with
longs without bit twiddling.
<li> float/double: Serialized in IEEE 754 single and double precision
format in network byte order. This is the format used by Java.
<li> ustring: Serialized as 4-byte zero compressed length followed by
data encoded as UTF-8. Strings are normalized to UTF-8 regardless of native
language representation.
<li> buffer: Serialized as a 4-byte zero compressed length followed by the
raw bytes in the buffer.
</ul></div>
</div>
<!-- ======= START OF BOTTOM NAVBAR ====== -->
<div class="bottomNav"><a name="navbar.bottom">
<!--   -->
</a>
<div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.bottom.firstrow">
<!--   -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../overview-summary.html">Overview</a></li>
<li class="navBarCell1Rev">Package</li>
<li>Class</li>
<li><a href="package-use.html">Use</a></li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../index-all.html">Index</a></li>
<li><a href="../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li>Prev&nbsp;Package</li>
<li><a href="../../../org/apache/jute/compiler/package-summary.html">Next&nbsp;Package</a></li>
</ul>
<ul class="navList">
<li><a href="../../../index.html?org/apache/jute/package-summary.html" target="_top">Frames</a></li>
<li><a href="package-summary.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_bottom">
<li><a href="../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
  allClassesLink = document.getElementById("allclasses_navbar_bottom");
  if(window==top) {
    allClassesLink.style.display = "block";
  }
  else {
    allClassesLink.style.display = "none";
  }
  //-->
</script>
</div>
<a name="skip.navbar.bottom">
<!--   -->
</a></div>
<!-- ======== END OF BOTTOM NAVBAR ======= -->
<p class="legalCopy"><small>Copyright &#169; 2008&#x2013;2023 <a href="https://www.apache.org/">The Apache Software Foundation</a>. All rights reserved.</small></p>
</body>
</html>