summaryrefslogtreecommitdiff
path: root/db/compact.cpp
blob: 6bafd91be15fbdf13696f701feb9b66f196daee8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
/* @file compact.cpp
   compaction of deleted space in pdfiles (datafiles)
*/

/* NOTE 6Oct2010 : this file PRELIMINARY, EXPERIMENTAL, NOT DONE, NOT USED YET (not in SConstruct) */

/**
*    Copyright (C) 2010 10gen Inc.
*
*    This program is free software: you can redistribute it and/or  modify
*    it under the terms of the GNU Affero General Public License, version 3,
*    as published by the Free Software Foundation.
*
*    This program is distributed in the hope that it will be useful,b
*    but WITHOUT ANY WARRANTY; without even the implied warranty of
*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*    GNU Affero General Public License for more details.
*
*    You should have received a copy of the GNU Affero General Public License
*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include "pch.h"
#include "pdfile.h"
#include "concurrency.h"
#include "commands.h"
#include "curop-inl.h"
#include "../util/concurrency/task.h"

namespace mongo {

    class CompactJob : public task::Task {
    public:
        CompactJob(string ns) : _ns(ns) { }
    private:
        virtual string name() const { return "compact"; }
        virtual void doWork();
        NamespaceDetails * beginBlock();
        void doBatch();
        void prep();
        const string _ns;
        unsigned long long _nrecords;
        unsigned long long _ncompacted;
        DiskLoc _firstExtent;
    };

    // lock & set context first.  this checks that collection still exists, and that it hasn't
    // morphed into a capped collection between locks (which is possible)
    NamespaceDetails * CompactJob::beginBlock() {
        NamespaceDetails *nsd = nsdetails(_ns.c_str());
        if( nsd == 0 ) throw "ns no longer present";
        if( nsd->firstExtent.isNull() )
            throw "no first extent";
        if( nsd->capped )
            throw "capped collection";
        return nsd;
    }

    void CompactJob::doBatch() {
        unsigned n = 0;
        {
            /* pre-touch records in a read lock so that paging happens in read not write lock.
               note we are only touching the records though; if indexes aren't in RAM, they will
               page later.  So the concept is only partial.
               */
            readlock lk;
            Timer t;
            Client::Context ctx(_ns);
            NamespaceDetails *nsd = beginBlock();
            if( nsd->firstExtent != _firstExtent )  {
                // TEMP DEV - stop after 1st extent
                throw "change of first extent";
            }
            DiskLoc loc = nsd->firstExtent.ext()->firstRecord;
            while( !loc.isNull() ) {
                Record *r = loc.rec();
                loc = r->getNext(loc);
                if( ++n >= 100 || (n % 8 == 0 && t.millis() > 50) )
                    break;
            }
        }
        {
            writelock lk;
            Client::Context ctx(_ns);
            NamespaceDetails *nsd = beginBlock();
            for( unsigned i = 0; i < n; i++ ) {
                if( nsd->firstExtent != _firstExtent )  {
                    // TEMP DEV - stop after 1st extent
                    throw "change of first extent (or it is now null)";
                }
                DiskLoc loc = nsd->firstExtent.ext()->firstRecord;
                Record *rec = loc.rec();
                BSONObj o = loc.obj().getOwned(); // todo: inefficient, double mem copy...
                try {
                    theDataFileMgr.deleteRecord(_ns.c_str(), rec, loc, false);
                }
                catch(DBException&) { throw "error deleting record"; }
                try {
                    theDataFileMgr.insertNoReturnVal(_ns.c_str(), o);
                }
                catch(DBException&) {
                    /* todo: save the record somehow??? try again with 'avoid' logic? */
                    log() << "compact: error re-inserting record ns:" << _ns << " n:" << _nrecords << " _id:" << o["_id"].toString() << endl;
                    throw "error re-inserting record";
                }
                ++_ncompacted;
                if( killCurrentOp.globalInterruptCheck() )
                    throw "interrupted";
            }
        }
    }

    void CompactJob::prep() {
        readlock lk;
        Client::Context ctx(_ns);
        NamespaceDetails *nsd = beginBlock();
        DiskLoc L = nsd->firstExtent;
        assert( !L.isNull() );
        _firstExtent = L;
        _nrecords = nsd->stats.nrecords;
        _ncompacted = 0;
    }

    static mutex m("compact");
    static volatile bool running;

    void CompactJob::doWork() {
        Client::initThread("compact");
        cc().curop()->reset();
        cc().curop()->setNS(_ns.c_str());
        cc().curop()->markCommand();
        sleepsecs(60);
        try {
            prep();
            while( _ncompacted < _nrecords )
                doBatch();
        }
        catch(const char *p) {
            log() << "info: exception compact " << p << endl;
        }
        catch(...) {
            log() << "info: exception compact" << endl;
        }
        mongo::running = false;
        cc().shutdown();
    }

    /* --- CompactCmd --- */

    class CompactCmd : public Command {
    public:
        virtual bool run(const string& db, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
            string coll = cmdObj.firstElement().valuestr();
            if( coll.empty() || db.empty() ) {
                errmsg = "no collection name specified";
                return false;
            }
            string ns = db + '.' + coll;
            assert( isANormalNSName(ns.c_str()) );
            {
                readlock lk;
                Client::Context ctx(ns);
                if( nsdetails(ns.c_str()) == 0 ) {
                    errmsg = "namespace " + ns + " does not exist";
                    return false;
                }
            }
            {
                scoped_lock lk(m);
                if( running ) {
                    errmsg = "a compaction is already running";
                    return false;
                }
                running = true;
                task::fork( new CompactJob(ns) );
                return true;
            }
            errmsg = "not done";
            return false;
        }

        virtual LockType locktype() const { return NONE; }
        virtual bool adminOnly() const { return false; }
        virtual bool slaveOk() const { return true; }
        virtual bool logTheOp() { return false; }
        virtual void help( stringstream& help ) const {
            help << "compact / defragment a collection in the background, slowly, attempting to minimize disruptions to other operations\n"
                 "{ compact : <collection> }";
        }
        virtual bool requiresAuth() { return true; }

        /** @param webUI expose the command in the web ui as localhost:28017/<name>
            @param oldName an optional old, deprecated name for the command
        */
        CompactCmd() : Command("compact") { }
    };
    static CompactCmd compactCmd;

}