summaryrefslogtreecommitdiff
path: root/examples/Web_Crawler/Iterators.h
blob: 19a7d6fc7d7f2adeaa07daa618afb4686e341cb9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
/* -*- C++ -*- */
// $Id$

// ============================================================================
//
// = LIBRARY
//    examples/Web_Crawler
//
// = FILENAME
//    Iterators.h
//
// = AUTHOR
//    Douglas C. Schmidt <schmidt@cs.wustl.edu>
//
// ============================================================================

#ifndef _ITERATORS_H
#define _ITERATORS_H

#include "URL.h"

#if !defined (ACE_LACKS_PRAGMA_ONCE)
#define ACE_LACKS_PRAGMA_ONCE 
#endif /* ACE_LACKS_PRAGMA_ONCE */

class URL_Iterator
{
  // = TITLE
  //    An abstract base class that defines an iterator.
  // 
  // = DESCRIPTION
  //    Subclasses of this base class can define what strings
  //    to return from <next>.  This class decouples higher-level
  //    software from the details of whatever type of URL header or
  //    body we're iterating over.
public:
  // = Initialization and termination methods.
  virtual int destroy (void);
  // "virtual" destructor.

  // = Iterator methods.
  virtual int next (ACE_CString &string) = 0;
  // Pass back the next <string> that hasn't been seen yet.  Returns 0
  // when all items have been seen, else 1.

protected:
  virtual ~URL_Iterator (void);
  // C++ destructor.
};

class HTML_Body_Iterator : public URL_Iterator
{
  // = TITLE
  //    An iterator that returns URLs embedded in HTML files.
public:
  // = Initialization and termination methods.
  HTML_Body_Iterator (URL &url);
  // Constructor.

  // = Iterator methods.
  virtual int next (ACE_CString &url);
  // Pass back the next <url> that hasn't been seen in the
  // memory-mapped file.  Returns 0 when all items have been seen,
  // else 1.

private:
  URL &url_;
  // HTTP URL that we're iterating over.
};

class HTTP_Header_Iterator : public URL_Iterator
{
  // = TITLE
  //    An iterator that iterates over the HTTP header.
public:
  // = Initialization and termination methods.
  HTTP_Header_Iterator (URL &url);
  // Constructor.

  // = Iterator methods.
  virtual int next (ACE_CString &line);
  // Pass back the next <line> that hasn't been seen in the
  // memory-mapped file header.  Returns 0 when we've reached the end
  // of the header.  seen, else 1.

private:
  URL &url_;
  // HTTP URL that we're iterating over.

  int end_of_header_;
  // We've found the end of the header, which means this iterator is
  // finished.
};

class URL_Download_Iterator : public URL_Iterator
{
  // = TITLE
  //    An iterator that iterates over the contents of an entire URL,
  //    i.e., both header and body, and returns it in <BUFSIZ>
  //    <buffer>s. 
public:
  // = Initialization and termination methods.
  URL_Download_Iterator (URL &url);
  // Constructor.

  // = Iterator methods.
  virtual int next (ACE_CString &buffer);
  // Pass back the next <buffer> data from the stream, where
  // <buffer.size> <= <BUFSIZ> .  Returns 0 when we've reached the end
  // of the header, else 1.

private:
  URL &url_;
  // HTTP URL that we're iterating over.
};

#endif /* _ITERATORS_H */