/**
* Copyright (C) 2014 MongoDB Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the GNU Affero General Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#pragma once
#include "mongo/db/diskloc.h"
#include "mongo/db/storage/mmap_v1/dur.h"
#include "mongo/db/jsobj.h"
#include "mongo/db/storage/record.h"
#include "mongo/db/structure/btree/key.h"
namespace mongo {
const int OldBucketSize = 8192;
//
// On-disk index format
//
#pragma pack(1)
/**
* This is the fixed width data component for storage of a key within a bucket. It contains an
* offset pointer to the variable width bson data component. This may be 'unused', please see
* below.
*
* Why is this templated on Loc? Because V0 and V1 have different size DiskLoc(s) but otherwise
* the same layout.
*/
template
struct FixedWidthKey {
//
// Data
//
/**
* The 'left' child bucket of this key. If this is the i-th key, it points to the i index
* child bucket.
*/
LocType prevChildBucket;
/**
* The location of the record associated with this key.
*/
LocType recordLoc;
/**
* Offset within current bucket of the variable width bson key for this _KeyNode.
*/
unsigned short _kdo;
//
// Accessors / mutators
//
short keyDataOfs() const {
return static_cast(_kdo);
}
void setKeyDataOfs(short s) {
_kdo = s;
invariant(s>=0);
}
void setKeyDataOfsSavingUse(short s) {
// XXX kill this func
setKeyDataOfs(s);
}
/**
* Unused keys are not returned by read operations. Keys may be marked
* as unused in cases where it is difficult to delete them while
* maintaining the constraints required of a btree.
*
* Setting ofs to odd is the sentinel for unused, as real recordLoc's
* are always even numbers. Note we need to keep its value basically
* the same as we use the recordLoc as part of the key in the index
* (to handle duplicate keys efficiently).
*
* Flagging keys as unused is a feature that is being phased out in favor
* of deleting the keys outright. The current btree implementation is
* not expected to mark a key as unused in a non legacy btree.
*/
void setUnused() {
recordLoc.GETOFS() |= 1;
}
void setUsed() { recordLoc.GETOFS() &= ~1; }
int isUnused() const {
return recordLoc.getOfs() & 1;
}
int isUsed() const {
return !isUnused();
}
};
/**
* This structure represents header data for a btree bucket. An object of
* this type is typically allocated inside of a buffer of size BucketSize,
* resulting in a full bucket with an appropriate header.
*
* The body of a btree bucket contains an array of _KeyNode objects starting
* from its lowest indexed bytes and growing to higher indexed bytes. The
* body also contains variable width bson keys, which are allocated from the
* highest indexed bytes toward lower indexed bytes.
*
* |hhhh|kkkkkkk--------bbbbbbbbbbbuuubbbuubbb|
* h = header data
* k = KeyNode data
* - = empty space
* b = bson key data
* u = unused (old) bson key data, that may be garbage collected
*/
struct BtreeBucketV0 {
/**
* Parent bucket of this bucket, which isNull() for the root bucket.
*/
DiskLoc parent;
/**
* Given that there are n keys, this is the n index child.
*/
DiskLoc nextChild;
/**
* Can be reused, value is 8192 in current pdfile version Apr2010
*/
unsigned short _wasSize;
/**
* zero
*/
unsigned short _reserved1;
int flags;
/** basicInsert() assumes the next three members are consecutive and in this order: */
/** Size of the empty region. */
int emptySize;
/** Size used for bson storage, including storage of old keys. */
int topSize;
/* Number of keys in the bucket. */
int n;
int reserved;
/* Beginning of the bucket's body */
char data[4];
};
/**
* A variant of DiskLoc Used by the V1 bucket type.
*/
struct DiskLoc56Bit {
//
// Data
//
int ofs;
unsigned char _a[3];
//
// Accessors XXX rename these, this is terrible
//
int& GETOFS() { return ofs; }
int getOfs() const { return ofs; }
//
// Comparison
//
bool isNull() const { return ofs < 0; }
unsigned long long toLongLong() const {
// endian
unsigned long long result = ofs;
char* cursor = reinterpret_cast(&result);
*reinterpret_cast(cursor + 4) = *reinterpret_cast(&_a[0]);
*reinterpret_cast(cursor + 6) = *reinterpret_cast(&_a[2]);
*reinterpret_cast(cursor + 7) = uint8_t(0);
return result;
}
bool operator<(const DiskLoc56Bit& rhs) const {
// the orderering of dup keys in btrees isn't too critical, but we'd like to put items
// that are close together on disk close together in the tree, so we do want the file #
// to be the most significant bytes
return toLongLong() < rhs.toLongLong();
}
int compare(const DiskLoc56Bit& rhs) const {
unsigned long long a = toLongLong();
unsigned long long b = rhs.toLongLong();
if ( a < b ) {
return -1;
}
else {
return a == b ? 0 : 1;
}
}
bool operator==(const DiskLoc56Bit& rhs) const {
return toLongLong() == rhs.toLongLong();
}
bool operator!=(const DiskLoc56Bit& rhs) const {
return toLongLong() != rhs.toLongLong();
}
bool operator==(const DiskLoc& rhs) const {
return DiskLoc(*this) == rhs;
}
bool operator!=(const DiskLoc& rhs) const {
return !(*this==rhs);
}
//
// Mutation
//
enum {
// first bit of offsets used in _KeyNode we don't use -1 here.
OurNullOfs = -2
};
void Null() {
ofs = OurNullOfs;
_a[0] = _a[1] = _a[2] = 0;
}
void operator=(const DiskLoc& loc) {
ofs = loc.getOfs();
int la = loc.a();
invariant( la <= 0xffffff ); // must fit in 3 bytes
if( la < 0 ) {
if ( la != -1 ) {
log() << "btree diskloc isn't negative 1: " << la << std::endl;
invariant ( la == -1 );
}
la = 0;
ofs = OurNullOfs;
}
memcpy(_a, &la, 3); // endian
}
//
// Type Conversion
//
operator const DiskLoc() const {
// endian
if( isNull() ) return DiskLoc();
unsigned a = *((unsigned *) (_a-1));
return DiskLoc(a >> 8, ofs);
}
std::string toString() const { return DiskLoc(*this).toString(); }
};
struct BtreeBucketV1 {
/** Parent bucket of this bucket, which isNull() for the root bucket. */
DiskLoc56Bit parent;
/** Given that there are n keys, this is the n index child. */
DiskLoc56Bit nextChild;
unsigned short flags;
/** Size of the empty region. */
unsigned short emptySize;
/** Size used for bson storage, including storage of old keys. */
unsigned short topSize;
/* Number of keys in the bucket. */
unsigned short n;
/* Beginning of the bucket's body */
char data[4];
};
struct BtreeLayoutV0 {
typedef FixedWidthKey FixedWidthKeyType;
typedef DiskLoc LocType;
typedef KeyBson KeyType;
typedef KeyBson KeyOwnedType;
typedef BtreeBucketV0 BucketType;
enum { BucketSize = 8192 };
// largest key size we allow. note we very much need to support bigger keys (somehow) in
// the future.
static const int KeyMax = OldBucketSize / 10;
// A sentinel value sometimes used to identify a deallocated bucket.
static const int INVALID_N_SENTINEL = -1;
static void initBucket(BucketType* bucket) {
bucket->_reserved1 = 0;
bucket->_wasSize = BucketSize;
bucket->reserved = 0;
}
};
struct BtreeLayoutV1 {
typedef FixedWidthKey FixedWidthKeyType;
typedef KeyV1 KeyType;
typedef KeyV1Owned KeyOwnedType;
typedef DiskLoc56Bit LocType;
typedef BtreeBucketV1 BucketType;
// The -16 is to leave room for the Record header.
enum { BucketSize = 8192 - 16 };
static const int KeyMax = 1024;
// A sentinel value sometimes used to identify a deallocated bucket.
static const unsigned short INVALID_N_SENTINEL = 0xffff;
static void initBucket(BucketType* bucket) { }
};
#pragma pack()
} // namespace mongo