summaryrefslogtreecommitdiff
path: root/src/scoop.c
blob: 386cea96ee94aa660edac71e374193970e40fb4a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
/*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*-
 *
 * librsync -- the library for network deltas
 *
 * Copyright (C) 2000, 2001 by Martin Pool <mbp@sourcefrog.net>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

                              /*=
                               | To walk on water you've gotta sink
                               | in the ice.
                               |   -- Shihad, `The General Electric'.
                               */

/** \file scoop.c
 * This file deals with readahead from caller-supplied buffers.
 *
 * Many functions require a certain minimum amount of input to do their
 * processing. For example, to calculate a strong checksum of a block we need
 * at least a block of input.
 *
 * Since we put the buffers completely under the control of the caller, we
 * can't count on ever getting this much data all in one go. We can't simply
 * wait, because the caller might have a smaller buffer than we require and so
 * we'll never get it. For the same reason we must always accept all the data
 * we're given.
 *
 * So, stream input data that's required for readahead is put into a special
 * buffer, from which the caller can then read. It's essentially like an
 * internal pipe, which on any given read request may or may not be able to
 * actually supply the data.
 *
 * As a future optimization, we might try to take data directly from the input
 * buffer if there's already enough there.
 *
 * \todo We probably know a maximum amount of data that can be scooped up, so
 * we could just avoid dynamic allocation. However that can't be fixed at
 * compile time, because when generating a delta it needs to be large enough to
 * hold one full block. Perhaps we can set it up when the job is allocated? It
 * would be kind of nice to not do any memory allocation after startup, as
 * bzlib does this. */

#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "librsync.h"
#include "job.h"
#include "stream.h"
#include "trace.h"
#include "util.h"

/** Try to accept a from the input buffer to get LEN bytes in the scoop. */
void rs_scoop_input(rs_job_t *job, size_t len)
{
    rs_buffers_t *stream = job->stream;
    size_t tocopy;

    assert(len > job->scoop_avail);

    if (job->scoop_alloc < len) {
        /* Need to allocate a larger scoop. */
        rs_byte_t *newbuf;
        size_t newsize;
        for (newsize = 64; newsize < len; newsize <<= 1) ;
        newbuf = rs_alloc(newsize, "scoop buffer");
        if (job->scoop_avail)
            memcpy(newbuf, job->scoop_next, job->scoop_avail);
        if (job->scoop_buf)
            free(job->scoop_buf);
        job->scoop_buf = job->scoop_next = newbuf;
        rs_trace("resized scoop buffer to " FMT_SIZE " bytes from " FMT_SIZE "",
                 newsize, job->scoop_alloc);
        job->scoop_alloc = newsize;
    } else if (job->scoop_buf + job->scoop_alloc < job->scoop_next + len) {
        /* Move existing data to the front of the scoop. */
        rs_trace("moving scoop " FMT_SIZE " bytes to reuse " FMT_SIZE " bytes",
                 job->scoop_avail, (size_t)(job->scoop_next - job->scoop_buf));
        memmove(job->scoop_buf, job->scoop_next, job->scoop_avail);
        job->scoop_next = job->scoop_buf;
    }
    /* take as much input as is available, to give up to LEN bytes in the
       scoop. */
    tocopy = len - job->scoop_avail;
    if (tocopy > stream->avail_in)
        tocopy = stream->avail_in;
    assert(job->scoop_next + tocopy + job->scoop_avail <=
           job->scoop_buf + job->scoop_alloc);

    memcpy(job->scoop_next + job->scoop_avail, stream->next_in, tocopy);
    rs_trace("accepted " FMT_SIZE " bytes from input to scoop", tocopy);
    job->scoop_avail += tocopy;
    stream->next_in += tocopy;
    stream->avail_in -= tocopy;
}

/** Advance the input cursor forward \p len bytes.
 *
 * This is used after doing readahead, when you decide you want to keep it. \p
 * len must be no more than the amount of available data, so you can't cheat.
 *
 * So when creating a delta, we require one block of readahead. But after
 * examining that block, we might decide to advance over all of it (if there is
 * a match), or just one byte (if not). */
void rs_scoop_advance(rs_job_t *job, size_t len)
{
    rs_buffers_t *stream = job->stream;

    /* It never makes sense to advance over a mixture of bytes from the scoop
       and input, because you couldn't possibly have looked at them all at the
       same time. */
    if (job->scoop_avail) {
        /* reading from the scoop buffer */
        rs_trace("advance over " FMT_SIZE " bytes from scoop", len);
        assert(len <= job->scoop_avail);
        job->scoop_avail -= len;
        job->scoop_next += len;
    } else {
        rs_trace("advance over " FMT_SIZE " bytes from input buffer", len);
        assert(len <= stream->avail_in);
        stream->avail_in -= len;
        stream->next_in += len;
    }
}

/** Read from scoop without advancing.
 *
 * Ask for LEN bytes of input from the stream. If that much data is available,
 * then return a pointer to it in PTR, advance the stream input pointer over
 * the data, and return RS_DONE. If there's not enough data, then accept
 * whatever is there into a buffer, advance over it, and return RS_BLOCKED.
 *
 * The data is not actually removed from the input, so this function lets you
 * do readahead. If you want to keep any of the data, you should also call
 * rs_scoop_advance() to skip over it. */
rs_result rs_scoop_readahead(rs_job_t *job, size_t len, void **ptr)
{
    rs_buffers_t *stream = job->stream;
    rs_job_check(job);

    if (!job->scoop_avail && stream->avail_in >= len) {
        /* The scoop is empty and there's enough data in the input. */
        *ptr = stream->next_in;
        rs_trace("got " FMT_SIZE " bytes direct from input", len);
        return RS_DONE;
    } else if (job->scoop_avail < len && stream->avail_in) {
        /* There is not enough data in the scoop. */
        rs_trace("scoop has less than " FMT_SIZE " bytes, scooping from "
                 FMT_SIZE " input bytes", len, stream->avail_in);
        rs_scoop_input(job, len);
    }
    if (job->scoop_avail >= len) {
        /* There is enough data in the scoop now. */
        rs_trace("scoop has at least " FMT_SIZE " bytes, this is enough",
                 job->scoop_avail);
        *ptr = job->scoop_next;
        return RS_DONE;
    } else if (stream->eof_in) {
        /* Not enough input data and at EOF. */
        rs_trace("reached end of input stream");
        return RS_INPUT_ENDED;
    } else {
        /* Not enough input data yet. */
        rs_trace("blocked with insufficient input data");
        return RS_BLOCKED;
    }
}

/** Read LEN bytes if possible, and remove them from the input scoop.
 *
 * \param *job An rs_job_t pointer to the job instance.
 *
 * \param len The length of the data in the ptr buffer.
 *
 * \param **ptr will be updated to point to a read-only buffer holding the
 * data, if enough is available.
 *
 * \return RS_DONE if there was enough data, RS_BLOCKED if there was not enough
 * data yet, or RS_INPUT_ENDED if there was not enough data and at EOF. */
rs_result rs_scoop_read(rs_job_t *job, size_t len, void **ptr)
{
    rs_result result;

    result = rs_scoop_readahead(job, len, ptr);
    if (result == RS_DONE)
        rs_scoop_advance(job, len);
    return result;
}

/** Read whatever data remains in the input stream.
 *
 * \param *job The rs_job_t instance the job instance.
 *
 * \param *len will be updated to the length of the available data.
 *
 * \param **ptr will point at the available data.
 *
 * \return RS_DONE if there was data, RS_INPUT_ENDED if there was no data and
 * at EOF, RS_BLOCKED if there was no data and not at EOF. */
rs_result rs_scoop_read_rest(rs_job_t *job, size_t *len, void **ptr)
{
    *len = rs_scoop_avail(job);
    if (*len)
        return rs_scoop_read(job, *len, ptr);
    else if (job->stream->eof_in)
        return RS_INPUT_ENDED;
    else
        return RS_BLOCKED;
}