blob: 19a7d6fc7d7f2adeaa07daa618afb4686e341cb9 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
|
/* -*- C++ -*- */
// $Id$
// ============================================================================
//
// = LIBRARY
// examples/Web_Crawler
//
// = FILENAME
// Iterators.h
//
// = AUTHOR
// Douglas C. Schmidt <schmidt@cs.wustl.edu>
//
// ============================================================================
#ifndef _ITERATORS_H
#define _ITERATORS_H
#include "URL.h"
#if !defined (ACE_LACKS_PRAGMA_ONCE)
#define ACE_LACKS_PRAGMA_ONCE
#endif /* ACE_LACKS_PRAGMA_ONCE */
class URL_Iterator
{
// = TITLE
// An abstract base class that defines an iterator.
//
// = DESCRIPTION
// Subclasses of this base class can define what strings
// to return from <next>. This class decouples higher-level
// software from the details of whatever type of URL header or
// body we're iterating over.
public:
// = Initialization and termination methods.
virtual int destroy (void);
// "virtual" destructor.
// = Iterator methods.
virtual int next (ACE_CString &string) = 0;
// Pass back the next <string> that hasn't been seen yet. Returns 0
// when all items have been seen, else 1.
protected:
virtual ~URL_Iterator (void);
// C++ destructor.
};
class HTML_Body_Iterator : public URL_Iterator
{
// = TITLE
// An iterator that returns URLs embedded in HTML files.
public:
// = Initialization and termination methods.
HTML_Body_Iterator (URL &url);
// Constructor.
// = Iterator methods.
virtual int next (ACE_CString &url);
// Pass back the next <url> that hasn't been seen in the
// memory-mapped file. Returns 0 when all items have been seen,
// else 1.
private:
URL &url_;
// HTTP URL that we're iterating over.
};
class HTTP_Header_Iterator : public URL_Iterator
{
// = TITLE
// An iterator that iterates over the HTTP header.
public:
// = Initialization and termination methods.
HTTP_Header_Iterator (URL &url);
// Constructor.
// = Iterator methods.
virtual int next (ACE_CString &line);
// Pass back the next <line> that hasn't been seen in the
// memory-mapped file header. Returns 0 when we've reached the end
// of the header. seen, else 1.
private:
URL &url_;
// HTTP URL that we're iterating over.
int end_of_header_;
// We've found the end of the header, which means this iterator is
// finished.
};
class URL_Download_Iterator : public URL_Iterator
{
// = TITLE
// An iterator that iterates over the contents of an entire URL,
// i.e., both header and body, and returns it in <BUFSIZ>
// <buffer>s.
public:
// = Initialization and termination methods.
URL_Download_Iterator (URL &url);
// Constructor.
// = Iterator methods.
virtual int next (ACE_CString &buffer);
// Pass back the next <buffer> data from the stream, where
// <buffer.size> <= <BUFSIZ> . Returns 0 when we've reached the end
// of the header, else 1.
private:
URL &url_;
// HTTP URL that we're iterating over.
};
#endif /* _ITERATORS_H */
|