summaryrefslogtreecommitdiff
path: root/db/extsort.h
blob: 42702a5a47ac74c066e11f8331441991fe581023 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
// extsort.h

/**
*    Copyright (C) 2008 10gen Inc.
*
*    This program is free software: you can redistribute it and/or  modify
*    it under the terms of the GNU Affero General Public License, version 3,
*    as published by the Free Software Foundation.
*
*    This program is distributed in the hope that it will be useful,
*    but WITHOUT ANY WARRANTY; without even the implied warranty of
*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*    GNU Affero General Public License for more details.
*
*    You should have received a copy of the GNU Affero General Public License
*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#pragma once

#include "../stdafx.h"

#include "jsobj.h"
#include "namespace.h"

#include <map>

namespace mongo {

    /**
       for sorting by BSONObj and attaching a value
     */
    class BSONObjExternalSorter : boost::noncopyable {
    public:
        
        typedef pair<BSONObj,DiskLoc> Data;
        
    private:
        class FileIterator : boost::noncopyable {
        public:
            FileIterator( string file );
            ~FileIterator();
            bool more();
            Data next();            
        private:
            MemoryMappedFile _file;
            char * _buf;
            char * _end;
        };

        class MyCmp {
        public:
            MyCmp( const BSONObj & order = BSONObj() ) : _order( order ){}
            bool operator()( const Data &l, const Data &r ) const {
                _compares++;
                int x = l.first.woCompare( r.first , _order );
                if ( x )
                    return x < 0;
                return l.second.compare( r.second ) < 0;
            };
        private:
            BSONObj _order;
        };
        
    public:

        typedef list<Data> InMemory;

        class Iterator : boost::noncopyable {
        public:
            
            Iterator( BSONObjExternalSorter * sorter );
            ~Iterator();
            bool more();
            Data next();
            
        private:
            MyCmp _cmp;
            vector<FileIterator*> _files;
            vector< pair<Data,bool> > _stash;
            
            InMemory * _in;
            InMemory::iterator _it;
            
        };
        
        BSONObjExternalSorter( const BSONObj & order = BSONObj() , long maxFileSize = 1024 * 1024 * 100 );
        ~BSONObjExternalSorter();
        
        void add( const BSONObj& o , const DiskLoc & loc );
        void add( const BSONObj& o , int a , int b ){
            add( o , DiskLoc( a , b ) );
        }

        /* call after adding values, and before fetching the iterator */
        void sort();
        
        auto_ptr<Iterator> iterator(){
            uassert( "not sorted" , _sorted );
            return auto_ptr<Iterator>( new Iterator( this ) );
        }
        
        int numFiles(){
            return _files.size();
        }

    private:
        
        void sort( string file );
        void finishMap();
        
        BSONObj _order;
        long _maxFilesize;
        path _root;
        
        InMemory * _cur;
        long _curSizeSoFar;
        
        list<string> _files;
        bool _sorted;

        static unsigned long long _compares;
    };
}